From ec958de64904d0f9bbb11dd90ad0f05e3f3cddc6 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Fri, 20 Mar 2026 11:55:37 +1000 Subject: [PATCH] ASM generation fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Many comment fixes, label renaming and non-functional changes. Bug Fixes x86_64 (aes_xts_asm.S/.asm) - Removed a spurious movl %edx, %eax that was clobbering a register, then fixed two comparisons to use %edx instead of the now-stale %eax. This was a functional bug in AES-XTS key-rounds selection. x86_64 (fe_x25519_asm.S) - Changed xor %rbx, %rbx → xorq %rbx, %rbx (explicit 64-bit operand size suffix). ARM32 (sp_arm32.c, sp_cortexm.c) - Fixed typo in assembly label names: sub_in_pkace → sub_in_place (both label definitions and branch targets). Affected 2048-bit and 3072-bit SP functions. - Fixed wrong source register in multiply/accumulate sequences: r11 → r7 and r3 → r4 (functional register-use bugs). ARM32 ChaCha (armv8-32-chacha-asm.S/_c.c) - Fixed label typo: same_keyb_ytes → same_key_bytes - Fixed NEON instruction syntax: vrev32.i16 → vrev32.16 (invalid mnemonic → correct ARM NEON form, affects multiple sites) ARM32 SHA3 (armv8-32-sha3-asm_c.c, .S) - Fixed symbol name typo: L_sha3_arm2_neon_rt / L_sha3_arm2_rt → L_sha3_arm32_neon_rt / L_sha3_arm32_rt ARM32 AES (armv8-32-aes-asm_c.c, thumb2-aes-asm_c.c, .S variants) - Fixed #endif comment: WOLFSSL_ARMASM_AES_BLOCK_INLINE → !WOLFSSL_ARMASM_AES_BLOCK_INLINE (logic inversion was missing from the comment) ARM64 ChaCha (armv8-chacha-asm_c.c/.S) - Fixed label typo: arm64loop_lt_8 → arm64_loop_lt_8 ARM32 ML-KEM (armv8-32-mlkem-asm.S/_c.c) - Fixed #endif comment typo: WOLFSLS_ARM_ARCH → WOLFSSL_ARM_ARCH (across many occurrences) SHA-512 (sha512_asm.S) - Corrected off-by-one in comments: msg_sched done: 0-3 → 0-1, 2-5 → 2-3, etc. (only 2 entries scheduled per block, not 4) --- wolfcrypt/src/aes_xts_asm.S | 5 +- wolfcrypt/src/aes_xts_asm.asm | 5 +- wolfcrypt/src/fe_x25519_asm.S | 2 +- wolfcrypt/src/port/arm/armv8-32-aes-asm.S | 6 +- wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c | 6 +- wolfcrypt/src/port/arm/armv8-32-chacha-asm.S | 28 +-- .../src/port/arm/armv8-32-chacha-asm_c.c | 28 +-- wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S | 184 +++++++++--------- wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c | 184 +++++++++--------- wolfcrypt/src/port/arm/armv8-32-sha3-asm.S | 16 +- wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c | 26 +-- wolfcrypt/src/port/arm/armv8-chacha-asm.S | 4 +- wolfcrypt/src/port/arm/armv8-chacha-asm_c.c | 4 +- wolfcrypt/src/port/arm/thumb2-aes-asm.S | 6 +- wolfcrypt/src/port/arm/thumb2-aes-asm_c.c | 6 +- wolfcrypt/src/port/arm/thumb2-chacha-asm.S | 6 +- wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c | 10 +- wolfcrypt/src/sha512_asm.S | 96 ++++----- wolfcrypt/src/sp_arm32.c | 182 +++++++++-------- wolfcrypt/src/sp_cortexm.c | 136 +++++++------ wolfcrypt/src/sp_x86_64_asm.S | 10 +- wolfcrypt/src/sp_x86_64_asm.asm | 10 +- 22 files changed, 493 insertions(+), 467 deletions(-) diff --git a/wolfcrypt/src/aes_xts_asm.S b/wolfcrypt/src/aes_xts_asm.S index bb876466947..ee646203023 100644 --- a/wolfcrypt/src/aes_xts_asm.S +++ b/wolfcrypt/src/aes_xts_asm.S @@ -1444,7 +1444,6 @@ AES_XTS_init_avx1: .p2align 4 _AES_XTS_init_avx1: #endif /* __APPLE__ */ - movl %edx, %eax vmovdqu (%rdi), %xmm0 # aes_enc_block vpxor (%rsi), %xmm0, %xmm0 @@ -1466,13 +1465,13 @@ _AES_XTS_init_avx1: vaesenc %xmm2, %xmm0, %xmm0 vmovdqu 144(%rsi), %xmm2 vaesenc %xmm2, %xmm0, %xmm0 - cmpl $11, %eax + cmpl $11, %edx vmovdqu 160(%rsi), %xmm2 jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last vaesenc %xmm2, %xmm0, %xmm0 vmovdqu 176(%rsi), %xmm3 vaesenc %xmm3, %xmm0, %xmm0 - cmpl $13, %eax + cmpl $13, %edx vmovdqu 192(%rsi), %xmm2 jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last vaesenc %xmm2, %xmm0, %xmm0 diff --git a/wolfcrypt/src/aes_xts_asm.asm b/wolfcrypt/src/aes_xts_asm.asm index 6d573ce3bd1..c28cb2c9ad2 100644 --- a/wolfcrypt/src/aes_xts_asm.asm +++ b/wolfcrypt/src/aes_xts_asm.asm @@ -1456,7 +1456,6 @@ _text ENDS IFDEF HAVE_INTEL_AVX1 _text SEGMENT READONLY PARA AES_XTS_init_avx1 PROC - mov eax, r8d vmovdqu xmm0, OWORD PTR [rcx] ; aes_enc_block vpxor xmm0, xmm0, [rdx] @@ -1478,13 +1477,13 @@ AES_XTS_init_avx1 PROC vaesenc xmm0, xmm0, xmm2 vmovdqu xmm2, OWORD PTR [rdx+144] vaesenc xmm0, xmm0, xmm2 - cmp eax, 11 + cmp r8d, 11 vmovdqu xmm2, OWORD PTR [rdx+160] jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last vaesenc xmm0, xmm0, xmm2 vmovdqu xmm3, OWORD PTR [rdx+176] vaesenc xmm0, xmm0, xmm3 - cmp eax, 13 + cmp r8d, 13 vmovdqu xmm2, OWORD PTR [rdx+192] jl L_AES_XTS_init_avx1_tweak_aes_enc_block_last vaesenc xmm0, xmm0, xmm2 diff --git a/wolfcrypt/src/fe_x25519_asm.S b/wolfcrypt/src/fe_x25519_asm.S index 1a5a397e331..f4cdf343c0a 100644 --- a/wolfcrypt/src/fe_x25519_asm.S +++ b/wolfcrypt/src/fe_x25519_asm.S @@ -12372,7 +12372,7 @@ _fe_cmov_table_avx2: pushq %r15 pushq %rbx movq %rdx, %rcx - xor %rbx, %rbx + xorq %rbx, %rbx movsbq %cl, %rax cdq xorb %dl, %al diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S index 94e9e37dcd4..88882b3a486 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm.S @@ -12555,7 +12555,7 @@ L_AES_CBC_encrypt_block_nr_256: eor r5, r5, r9 eor r6, r6, r10 eor r7, r7, r11 -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ pop {r1, r2, lr} ldr r3, [sp] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -13269,7 +13269,7 @@ L_AES_CBC_encrypt_block_nr_192: eor r5, r5, r9 eor r6, r6, r10 eor r7, r7, r11 -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ pop {r1, r2, lr} ldr r3, [sp] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -13983,7 +13983,7 @@ L_AES_CBC_encrypt_block_nr_128: eor r5, r5, r9 eor r6, r6, r10 eor r7, r7, r11 -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ pop {r1, r2, lr} ldr r3, [sp] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) diff --git a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c index bfcd4ff83d3..360e0fc5981 100644 --- a/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-aes-asm_c.c @@ -12930,7 +12930,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ "pop {r1, %[len], lr}\n\t" "ldr %[ks], [sp]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -13647,7 +13647,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ "pop {r1, %[len], lr}\n\t" "ldr %[ks], [sp]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) @@ -14364,7 +14364,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "eor r5, r5, r9\n\t" "eor r6, r6, r10\n\t" "eor r7, r7, r11\n\t" -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ "pop {r1, %[len], lr}\n\t" "ldr %[ks], [sp]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S b/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S index 6c71cded644..c740134a16e 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm.S @@ -89,14 +89,14 @@ wc_chacha_setkey: #endif /* BIG_ENDIAN_ORDER */ stm r0!, {r4, r5, r12, lr} # Next 16 bytes of key. - beq L_chacha_arm32_setkey_same_keyb_ytes + beq L_chacha_arm32_setkey_same_key_bytes # Update key pointer for next 16 bytes. add r1, r1, r2 ldr r4, [r1] ldr r5, [r1, #4] ldr r12, [r1, #8] ldr lr, [r1, #12] -L_chacha_arm32_setkey_same_keyb_ytes: +L_chacha_arm32_setkey_same_key_bytes: stm r0, {r4, r5, r12, lr} pop {r4, r5, pc} .size wc_chacha_setkey,.-wc_chacha_setkey @@ -572,11 +572,11 @@ L_chacha_crypt_bytes_arm32_round_start_256: ror r11, r11, #16 veor q7, q7, q4 add r8, r8, r10 - vrev32.i16 q15, q15 + vrev32.16 q15, q15 add r9, r9, r11 - vrev32.i16 q3, q3 + vrev32.16 q3, q3 eor r4, r4, r8 - vrev32.i16 q7, q7 + vrev32.16 q7, q7 eor r5, r5, r9 # c += d; b ^= c; b <<<= 12; vadd.i32 q14, q14, q15 @@ -685,11 +685,11 @@ L_chacha_crypt_bytes_arm32_round_start_256: ror r10, r10, #16 veor q7, q7, q4 add r8, r8, r11 - vrev32.i16 q15, q15 + vrev32.16 q15, q15 add r9, r9, r10 - vrev32.i16 q3, q3 + vrev32.16 q3, q3 eor r5, r5, r8 - vrev32.i16 q7, q7 + vrev32.16 q7, q7 eor r6, r6, r9 # c += d; b ^= c; b <<<= 12; vadd.i32 q14, q14, q15 @@ -891,8 +891,8 @@ L_chacha_crypt_bytes_arm32_round_start_128: vadd.i32 q4, q4, q5 veor q3, q3, q0 veor q7, q7, q4 - vrev32.i16 q3, q3 - vrev32.i16 q7, q7 + vrev32.16 q3, q3 + vrev32.16 q7, q7 # c += d; b ^= c; b <<<= 12; vadd.i32 q2, q2, q3 vadd.i32 q6, q6, q7 @@ -932,8 +932,8 @@ L_chacha_crypt_bytes_arm32_round_start_128: vadd.i32 q4, q4, q5 veor q3, q3, q0 veor q7, q7, q4 - vrev32.i16 q3, q3 - vrev32.i16 q7, q7 + vrev32.16 q3, q3 + vrev32.16 q7, q7 # c += d; b ^= c; b <<<= 12; vadd.i32 q2, q2, q3 vadd.i32 q6, q6, q7 @@ -1178,13 +1178,13 @@ wc_chacha_setkey: vldm r3, {q0} vld1.8 {q1}, [r1]! #ifdef BIG_ENDIAN_ORDER - vrev32.i16 q1, q1 + vrev32.16 q1, q1 #endif /* BIG_ENDIAN_ORDER */ vstm r0!, {q0, q1} beq L_chacha_setkey_arm32_done vld1.8 {q1}, [r1] #ifdef BIG_ENDIAN_ORDER - vrev32.i16 q1, q1 + vrev32.16 q1, q1 #endif /* BIG_ENDIAN_ORDER */ L_chacha_setkey_arm32_done: vstm r0, {q1} diff --git a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c index 6ece843ba41..323e93ff329 100644 --- a/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-chacha-asm_c.c @@ -134,7 +134,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, #endif /* BIG_ENDIAN_ORDER */ "stm %[x]!, {r4, r5, r12, lr}\n\t" /* Next 16 bytes of key. */ - "beq L_chacha_arm32_setkey_same_keyb_ytes_%=\n\t" + "beq L_chacha_arm32_setkey_same_key_bytes_%=\n\t" /* Update key pointer for next 16 bytes. */ "add %[key], %[key], %[keySz]\n\t" "ldr r4, [%[key]]\n\t" @@ -142,7 +142,7 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "ldr r12, [%[key], #8]\n\t" "ldr lr, [%[key], #12]\n\t" "\n" - "L_chacha_arm32_setkey_same_keyb_ytes_%=: \n\t" + "L_chacha_arm32_setkey_same_key_bytes_%=: \n\t" "stm %[x], {r4, r5, r12, lr}\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), @@ -693,11 +693,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "ror r11, r11, #16\n\t" "veor q7, q7, q4\n\t" "add r8, r8, r10\n\t" - "vrev32.i16 q15, q15\n\t" + "vrev32.16 q15, q15\n\t" "add r9, r9, r11\n\t" - "vrev32.i16 q3, q3\n\t" + "vrev32.16 q3, q3\n\t" "eor r4, r4, r8\n\t" - "vrev32.i16 q7, q7\n\t" + "vrev32.16 q7, q7\n\t" "eor r5, r5, r9\n\t" /* c += d; b ^= c; b <<<= 12; */ "vadd.i32 q14, q14, q15\n\t" @@ -806,11 +806,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "ror r10, r10, #16\n\t" "veor q7, q7, q4\n\t" "add r8, r8, r11\n\t" - "vrev32.i16 q15, q15\n\t" + "vrev32.16 q15, q15\n\t" "add r9, r9, r10\n\t" - "vrev32.i16 q3, q3\n\t" + "vrev32.16 q3, q3\n\t" "eor r5, r5, r8\n\t" - "vrev32.i16 q7, q7\n\t" + "vrev32.16 q7, q7\n\t" "eor r6, r6, r9\n\t" /* c += d; b ^= c; b <<<= 12; */ "vadd.i32 q14, q14, q15\n\t" @@ -1014,8 +1014,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vadd.i32 q4, q4, q5\n\t" "veor q3, q3, q0\n\t" "veor q7, q7, q4\n\t" - "vrev32.i16 q3, q3\n\t" - "vrev32.i16 q7, q7\n\t" + "vrev32.16 q3, q3\n\t" + "vrev32.16 q7, q7\n\t" /* c += d; b ^= c; b <<<= 12; */ "vadd.i32 q2, q2, q3\n\t" "vadd.i32 q6, q6, q7\n\t" @@ -1055,8 +1055,8 @@ WC_OMIT_FRAME_POINTER void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, "vadd.i32 q4, q4, q5\n\t" "veor q3, q3, q0\n\t" "veor q7, q7, q4\n\t" - "vrev32.i16 q3, q3\n\t" - "vrev32.i16 q7, q7\n\t" + "vrev32.16 q3, q3\n\t" + "vrev32.16 q7, q7\n\t" /* c += d; b ^= c; b <<<= 12; */ "vadd.i32 q2, q2, q3\n\t" "vadd.i32 q6, q6, q7\n\t" @@ -1329,13 +1329,13 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "vldm r3, {q0}\n\t" "vld1.8 {q1}, [%[key]]!\n\t" #ifdef BIG_ENDIAN_ORDER - "vrev32.i16 q1, q1\n\t" + "vrev32.16 q1, q1\n\t" #endif /* BIG_ENDIAN_ORDER */ "vstm %[x]!, {q0-q1}\n\t" "beq L_chacha_setkey_arm32_done_%=\n\t" "vld1.8 {q1}, [%[key]]\n\t" #ifdef BIG_ENDIAN_ORDER - "vrev32.i16 q1, q1\n\t" + "vrev32.16 q1, q1\n\t" #endif /* BIG_ENDIAN_ORDER */ "\n" "L_chacha_setkey_arm32_done_%=: \n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S index cec2df3f1b9..90a275c2326 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm.S @@ -185,7 +185,7 @@ mlkem_arm32_ntt: #else movt r10, #0xcff #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r2, #16 L_mlkem_arm32_ntt_loop_123: str r2, [sp] @@ -288,7 +288,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r7 smulbt r7, r11, r7 @@ -379,7 +379,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r8 smulbt r8, r11, r8 @@ -470,7 +470,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r9 smulbt r9, r11, r9 @@ -561,7 +561,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #4] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r4 @@ -653,7 +653,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r5 smulbt r5, r11, r5 @@ -744,7 +744,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r8 smultt r8, r11, r8 @@ -834,7 +834,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r9 smultt r9, r11, r9 @@ -924,7 +924,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #8] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r3 @@ -1016,7 +1016,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r5 smultt r5, r11, r5 @@ -1106,7 +1106,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #12] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r7 @@ -1198,7 +1198,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r9 smultt r9, r11, r9 @@ -1288,7 +1288,7 @@ L_mlkem_arm32_ntt_loop_123: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ str r2, [r0] str r3, [r0, #64] str r4, [r0, #128] @@ -1408,7 +1408,7 @@ L_mlkem_arm32_ntt_loop_4_i: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r5 smulbt r5, r11, r5 @@ -1499,7 +1499,7 @@ L_mlkem_arm32_ntt_loop_4_i: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r8 smultt r8, r11, r8 @@ -1589,7 +1589,7 @@ L_mlkem_arm32_ntt_loop_4_i: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r9 smultt r9, r11, r9 @@ -1679,7 +1679,7 @@ L_mlkem_arm32_ntt_loop_4_i: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ str r2, [r0] str r3, [r0, #16] str r4, [r0, #32] @@ -1804,7 +1804,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r7 smulbt r7, r11, r7 @@ -1895,7 +1895,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r8 smulbt r8, r11, r8 @@ -1986,7 +1986,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r9 smulbt r9, r11, r9 @@ -2077,7 +2077,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [sp, #4] add r11, r1, r11, lsr #2 ldr r11, [r11, #64] @@ -2171,7 +2171,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r5 smulbt r5, r11, r5 @@ -2262,7 +2262,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r8 smultt r8, r11, r8 @@ -2352,7 +2352,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r9 smultt r9, r11, r9 @@ -2442,7 +2442,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [sp, #4] add r11, r1, r11, lsr #1 ldr r11, [r11, #128] @@ -2536,7 +2536,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r5 smultt r5, r11, r5 @@ -2626,7 +2626,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [sp, #4] add r11, r1, r11, lsr #1 ldr r11, [r11, #132] @@ -2720,7 +2720,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smultb r12, r11, r9 smultt r9, r11, r9 @@ -2810,7 +2810,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xc0 @@ -2836,7 +2836,7 @@ L_mlkem_arm32_ntt_loop_567: #else mov r10, #0xd01 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r2 smulwt lr, r11, r2 @@ -2872,7 +2872,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r2, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r3 smulwt lr, r11, r3 @@ -2908,7 +2908,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r3, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r4 smulwt lr, r11, r4 @@ -2944,7 +2944,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r4, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r5 smulwt lr, r11, r5 @@ -2980,7 +2980,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r5, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r6 smulwt lr, r11, r6 @@ -3016,7 +3016,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r6, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r7 smulwt lr, r11, r7 @@ -3052,7 +3052,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r7, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r8 smulwt lr, r11, r8 @@ -3088,7 +3088,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r8, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r9 smulwt lr, r11, r9 @@ -3124,7 +3124,7 @@ L_mlkem_arm32_ntt_loop_567: #else bfi r9, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r10, #0x1 @@ -3138,7 +3138,7 @@ L_mlkem_arm32_ntt_loop_567: #else movt r10, #0xcff #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ str r2, [r0] str r3, [r0, #4] str r4, [r0, #8] @@ -3309,7 +3309,7 @@ mlkem_arm32_invntt: #else movt r10, #0xcff #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r3, #0 L_mlkem_invntt_loop_765: add r11, r1, r3, lsr #1 @@ -3439,7 +3439,7 @@ L_mlkem_invntt_loop_765: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r4, r5 sadd16 r4, r4, r5 @@ -3555,7 +3555,7 @@ L_mlkem_invntt_loop_765: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [sp, #4] add r11, r1, r11, lsr #1 ldr r11, [r11, #4] @@ -3675,7 +3675,7 @@ L_mlkem_invntt_loop_765: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r8, r9 sadd16 r8, r8, r9 @@ -3791,7 +3791,7 @@ L_mlkem_invntt_loop_765: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [sp, #4] add r11, r1, r11, lsr #2 ldr r11, [r11, #128] @@ -3911,7 +3911,7 @@ L_mlkem_invntt_loop_765: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r3, r5 sadd16 r3, r3, r5 @@ -4028,7 +4028,7 @@ L_mlkem_invntt_loop_765: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r6, r8 sadd16 r6, r6, r8 @@ -4144,7 +4144,7 @@ L_mlkem_invntt_loop_765: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r7, r9 sadd16 r7, r7, r9 @@ -4260,7 +4260,7 @@ L_mlkem_invntt_loop_765: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [sp, #4] add r11, r1, r11, lsr #3 ldr r11, [r11, #192] @@ -4380,7 +4380,7 @@ L_mlkem_invntt_loop_765: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r3, r7 sadd16 r3, r3, r7 @@ -4497,7 +4497,7 @@ L_mlkem_invntt_loop_765: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r4, r8 sadd16 r4, r4, r8 @@ -4614,7 +4614,7 @@ L_mlkem_invntt_loop_765: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r5, r9 sadd16 r5, r5, r9 @@ -4731,7 +4731,7 @@ L_mlkem_invntt_loop_765: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xc0 @@ -4751,7 +4751,7 @@ L_mlkem_invntt_loop_765: #else mov r11, #0x4ebf #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r2 smulwt lr, r11, r2 @@ -4787,7 +4787,7 @@ L_mlkem_invntt_loop_765: #else bfi r2, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r3 smulwt lr, r11, r3 @@ -4823,7 +4823,7 @@ L_mlkem_invntt_loop_765: #else bfi r3, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r4 smulwt lr, r11, r4 @@ -4859,7 +4859,7 @@ L_mlkem_invntt_loop_765: #else bfi r4, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r5 smulwt lr, r11, r5 @@ -4895,7 +4895,7 @@ L_mlkem_invntt_loop_765: #else bfi r5, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ str r2, [r0] str r3, [r0, #4] str r4, [r0, #8] @@ -5042,7 +5042,7 @@ L_mlkem_invntt_loop_4_i: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r3, r5 sadd16 r3, r3, r5 @@ -5159,7 +5159,7 @@ L_mlkem_invntt_loop_4_i: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r6, r8 sadd16 r6, r6, r8 @@ -5275,7 +5275,7 @@ L_mlkem_invntt_loop_4_i: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r7, r9 sadd16 r7, r7, r9 @@ -5391,7 +5391,7 @@ L_mlkem_invntt_loop_4_i: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ str r2, [r0] str r3, [r0, #16] str r4, [r0, #32] @@ -5542,7 +5542,7 @@ L_mlkem_invntt_loop_321: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r4, r5 sadd16 r4, r4, r5 @@ -5658,7 +5658,7 @@ L_mlkem_invntt_loop_321: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #244] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r6, r7 @@ -5776,7 +5776,7 @@ L_mlkem_invntt_loop_321: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r8, r9 sadd16 r8, r8, r9 @@ -5892,7 +5892,7 @@ L_mlkem_invntt_loop_321: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #248] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r2, r4 @@ -6010,7 +6010,7 @@ L_mlkem_invntt_loop_321: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r3, r5 sadd16 r3, r3, r5 @@ -6127,7 +6127,7 @@ L_mlkem_invntt_loop_321: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r6, r8 sadd16 r6, r6, r8 @@ -6243,7 +6243,7 @@ L_mlkem_invntt_loop_321: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r7, r9 sadd16 r7, r7, r9 @@ -6359,7 +6359,7 @@ L_mlkem_invntt_loop_321: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) mov r11, #0xc0 @@ -6379,7 +6379,7 @@ L_mlkem_invntt_loop_321: #else mov r11, #0x4ebf #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r2 smulwt lr, r11, r2 @@ -6415,7 +6415,7 @@ L_mlkem_invntt_loop_321: #else bfi r2, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r3 smulwt lr, r11, r3 @@ -6451,7 +6451,7 @@ L_mlkem_invntt_loop_321: #else bfi r3, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r4 smulwt lr, r11, r4 @@ -6487,7 +6487,7 @@ L_mlkem_invntt_loop_321: #else bfi r4, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulwb r12, r11, r5 smulwt lr, r11, r5 @@ -6523,7 +6523,7 @@ L_mlkem_invntt_loop_321: #else bfi r5, lr, #16, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #252] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r2, r6 @@ -6641,7 +6641,7 @@ L_mlkem_invntt_loop_321: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r3, r7 sadd16 r3, r3, r7 @@ -6758,7 +6758,7 @@ L_mlkem_invntt_loop_321: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r4, r8 sadd16 r4, r4, r8 @@ -6875,7 +6875,7 @@ L_mlkem_invntt_loop_321: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) ssub16 r12, r5, r9 sadd16 r5, r5, r9 @@ -6992,7 +6992,7 @@ L_mlkem_invntt_loop_321: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ ldr r11, [r1, #254] #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r2 @@ -7076,7 +7076,7 @@ L_mlkem_invntt_loop_321: #else bfi r2, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r3 smulbt r3, r11, r3 @@ -7159,7 +7159,7 @@ L_mlkem_invntt_loop_321: #else bfi r3, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r4 smulbt r4, r11, r4 @@ -7242,7 +7242,7 @@ L_mlkem_invntt_loop_321: #else bfi r4, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r5 smulbt r5, r11, r5 @@ -7325,7 +7325,7 @@ L_mlkem_invntt_loop_321: #else bfi r5, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r6 smulbt r6, r11, r6 @@ -7408,7 +7408,7 @@ L_mlkem_invntt_loop_321: #else bfi r6, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r7 smulbt r7, r11, r7 @@ -7491,7 +7491,7 @@ L_mlkem_invntt_loop_321: #else bfi r7, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r8 smulbt r8, r11, r8 @@ -7574,7 +7574,7 @@ L_mlkem_invntt_loop_321: #else bfi r8, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) smulbb r12, r11, r9 smulbt r9, r11, r9 @@ -7657,7 +7657,7 @@ L_mlkem_invntt_loop_321: #else bfi r9, r12, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ str r2, [r0] str r3, [r0, #64] str r4, [r0, #128] @@ -7827,7 +7827,7 @@ mlkem_arm32_basemul_mont: #else movt r12, #0xcff #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r8, #0 L_mlkem_basemul_mont_loop: ldm r1!, {r4, r5} @@ -8078,7 +8078,7 @@ L_mlkem_basemul_mont_loop: #endif orr r4, r9, r8, lsr #16 orr r5, r11, r10, lsr #16 -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ stm r0!, {r4, r5} pop {r8} bne L_mlkem_basemul_mont_loop @@ -8105,7 +8105,7 @@ mlkem_arm32_basemul_mont_add: #else movt r12, #0xcff #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r8, #0 L_mlkem_arm32_basemul_mont_add_loop: ldm r1!, {r4, r5} @@ -8390,7 +8390,7 @@ L_mlkem_arm32_basemul_mont_add_loop: #else bfi r5, r10, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ stm r0!, {r4, r5} pop {r8} bne L_mlkem_arm32_basemul_mont_add_loop @@ -8421,7 +8421,7 @@ mlkem_arm32_csubq: #else movt lr, #0xd01 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ mov r11, #0x8000 #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) orr r11, r11, #0x80000000 @@ -8561,7 +8561,7 @@ L_mlkem_arm32_csubq_loop: #else bfi r5, r10, #0, #16 #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ stm r0!, {r2, r3, r4, r5} subs r1, r1, #8 bne L_mlkem_arm32_csubq_loop diff --git a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c index 0b80ee75316..721fc2b3ba2 100644 --- a/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-mlkem-asm_c.c @@ -118,7 +118,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "movt r10, #0xcff\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r2, #16\n\t" "\n" "L_mlkem_arm32_ntt_loop_123_%=: \n\t" @@ -222,7 +222,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r7\n\t" "smulbt r7, r11, r7\n\t" @@ -313,7 +313,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r8\n\t" "smulbt r8, r11, r8\n\t" @@ -404,7 +404,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r9\n\t" "smulbt r9, r11, r9\n\t" @@ -495,7 +495,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #4]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r4\n\t" @@ -587,7 +587,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r5\n\t" "smulbt r5, r11, r5\n\t" @@ -678,7 +678,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r8\n\t" "smultt r8, r11, r8\n\t" @@ -768,7 +768,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r9\n\t" "smultt r9, r11, r9\n\t" @@ -858,7 +858,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #8]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r3\n\t" @@ -950,7 +950,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r5\n\t" "smultt r5, r11, r5\n\t" @@ -1040,7 +1040,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #12]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r7\n\t" @@ -1132,7 +1132,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r9\n\t" "smultt r9, r11, r9\n\t" @@ -1222,7 +1222,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "str r2, [%[r]]\n\t" "str r3, [%[r], #64]\n\t" "str r4, [%[r], #128]\n\t" @@ -1344,7 +1344,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r5\n\t" "smulbt r5, r11, r5\n\t" @@ -1435,7 +1435,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r8\n\t" "smultt r8, r11, r8\n\t" @@ -1525,7 +1525,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r9\n\t" "smultt r9, r11, r9\n\t" @@ -1615,7 +1615,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "str r2, [%[r]]\n\t" "str r3, [%[r], #16]\n\t" "str r4, [%[r], #32]\n\t" @@ -1741,7 +1741,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r7\n\t" "smulbt r7, r11, r7\n\t" @@ -1832,7 +1832,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r8\n\t" "smulbt r8, r11, r8\n\t" @@ -1923,7 +1923,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r9\n\t" "smulbt r9, r11, r9\n\t" @@ -2014,7 +2014,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [sp, #4]\n\t" "add r11, r1, r11, lsr #2\n\t" "ldr r11, [r11, #64]\n\t" @@ -2108,7 +2108,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r5\n\t" "smulbt r5, r11, r5\n\t" @@ -2199,7 +2199,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r8\n\t" "smultt r8, r11, r8\n\t" @@ -2289,7 +2289,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r9\n\t" "smultt r9, r11, r9\n\t" @@ -2379,7 +2379,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [sp, #4]\n\t" "add r11, r1, r11, lsr #1\n\t" "ldr r11, [r11, #128]\n\t" @@ -2473,7 +2473,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r5\n\t" "smultt r5, r11, r5\n\t" @@ -2563,7 +2563,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [sp, #4]\n\t" "add r11, r1, r11, lsr #1\n\t" "ldr r11, [r11, #132]\n\t" @@ -2657,7 +2657,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smultb r12, r11, r9\n\t" "smultt r9, r11, r9\n\t" @@ -2747,7 +2747,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xc0\n\t" @@ -2773,7 +2773,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "mov r10, #0xd01\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r2\n\t" "smulwt lr, r11, r2\n\t" @@ -2809,7 +2809,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r2, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r3\n\t" "smulwt lr, r11, r3\n\t" @@ -2845,7 +2845,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r3, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r4\n\t" "smulwt lr, r11, r4\n\t" @@ -2881,7 +2881,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r4, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r5\n\t" "smulwt lr, r11, r5\n\t" @@ -2917,7 +2917,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r5, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r6\n\t" "smulwt lr, r11, r6\n\t" @@ -2953,7 +2953,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r6, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r7\n\t" "smulwt lr, r11, r7\n\t" @@ -2989,7 +2989,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r7, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r8\n\t" "smulwt lr, r11, r8\n\t" @@ -3025,7 +3025,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r8, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r9\n\t" "smulwt lr, r11, r9\n\t" @@ -3061,7 +3061,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "bfi r9, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r10, #0x1\n\t" @@ -3075,7 +3075,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_ntt(sword16* r) #else "movt r10, #0xcff\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "str r2, [%[r]]\n\t" "str r3, [%[r], #4]\n\t" "str r4, [%[r], #8]\n\t" @@ -3170,7 +3170,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "movt r10, #0xcff\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r3, #0\n\t" "\n" "L_mlkem_invntt_loop_765_%=: \n\t" @@ -3301,7 +3301,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r4, r5\n\t" "sadd16 r4, r4, r5\n\t" @@ -3417,7 +3417,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [sp, #4]\n\t" "add r11, r1, r11, lsr #1\n\t" "ldr r11, [r11, #4]\n\t" @@ -3537,7 +3537,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r8, r9\n\t" "sadd16 r8, r8, r9\n\t" @@ -3653,7 +3653,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [sp, #4]\n\t" "add r11, r1, r11, lsr #2\n\t" "ldr r11, [r11, #128]\n\t" @@ -3773,7 +3773,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r3, r5\n\t" "sadd16 r3, r3, r5\n\t" @@ -3890,7 +3890,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r6, r8\n\t" "sadd16 r6, r6, r8\n\t" @@ -4006,7 +4006,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r7, r9\n\t" "sadd16 r7, r7, r9\n\t" @@ -4122,7 +4122,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [sp, #4]\n\t" "add r11, r1, r11, lsr #3\n\t" "ldr r11, [r11, #192]\n\t" @@ -4242,7 +4242,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r3, r7\n\t" "sadd16 r3, r3, r7\n\t" @@ -4359,7 +4359,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r4, r8\n\t" "sadd16 r4, r4, r8\n\t" @@ -4476,7 +4476,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r5, r9\n\t" "sadd16 r5, r5, r9\n\t" @@ -4593,7 +4593,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xc0\n\t" @@ -4613,7 +4613,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "mov r11, #0x4ebf\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r2\n\t" "smulwt lr, r11, r2\n\t" @@ -4649,7 +4649,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r2, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r3\n\t" "smulwt lr, r11, r3\n\t" @@ -4685,7 +4685,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r3, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r4\n\t" "smulwt lr, r11, r4\n\t" @@ -4721,7 +4721,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r4, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r5\n\t" "smulwt lr, r11, r5\n\t" @@ -4757,7 +4757,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "str r2, [%[r]]\n\t" "str r3, [%[r], #4]\n\t" "str r4, [%[r], #8]\n\t" @@ -4906,7 +4906,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r3, r5\n\t" "sadd16 r3, r3, r5\n\t" @@ -5023,7 +5023,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r6, r8\n\t" "sadd16 r6, r6, r8\n\t" @@ -5139,7 +5139,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r7, r9\n\t" "sadd16 r7, r7, r9\n\t" @@ -5255,7 +5255,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "str r2, [%[r]]\n\t" "str r3, [%[r], #16]\n\t" "str r4, [%[r], #32]\n\t" @@ -5407,7 +5407,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r4, r5\n\t" "sadd16 r4, r4, r5\n\t" @@ -5523,7 +5523,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #244]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r6, r7\n\t" @@ -5641,7 +5641,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r8, r9\n\t" "sadd16 r8, r8, r9\n\t" @@ -5757,7 +5757,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #248]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r2, r4\n\t" @@ -5875,7 +5875,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r3, r5\n\t" "sadd16 r3, r3, r5\n\t" @@ -5992,7 +5992,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r6, r8\n\t" "sadd16 r6, r6, r8\n\t" @@ -6108,7 +6108,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r7, r9\n\t" "sadd16 r7, r7, r9\n\t" @@ -6224,7 +6224,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "mov r11, #0xc0\n\t" @@ -6244,7 +6244,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "mov r11, #0x4ebf\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r2\n\t" "smulwt lr, r11, r2\n\t" @@ -6280,7 +6280,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r2, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r3\n\t" "smulwt lr, r11, r3\n\t" @@ -6316,7 +6316,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r3, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r4\n\t" "smulwt lr, r11, r4\n\t" @@ -6352,7 +6352,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r4, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulwb r12, r11, r5\n\t" "smulwt lr, r11, r5\n\t" @@ -6388,7 +6388,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, lr, #16, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #252]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r2, r6\n\t" @@ -6506,7 +6506,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r3, r7\n\t" "sadd16 r3, r3, r7\n\t" @@ -6623,7 +6623,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r4, r8\n\t" "sadd16 r4, r4, r8\n\t" @@ -6740,7 +6740,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "ssub16 r12, r5, r9\n\t" "sadd16 r5, r5, r9\n\t" @@ -6857,7 +6857,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "ldr r11, [r1, #254]\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r2\n\t" @@ -6941,7 +6941,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r2, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r3\n\t" "smulbt r3, r11, r3\n\t" @@ -7024,7 +7024,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r3, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r4\n\t" "smulbt r4, r11, r4\n\t" @@ -7107,7 +7107,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r4, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r5\n\t" "smulbt r5, r11, r5\n\t" @@ -7190,7 +7190,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r5, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r6\n\t" "smulbt r6, r11, r6\n\t" @@ -7273,7 +7273,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r6, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r7\n\t" "smulbt r7, r11, r7\n\t" @@ -7356,7 +7356,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r7, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r8\n\t" "smulbt r8, r11, r8\n\t" @@ -7439,7 +7439,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r8, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH >= 6) "smulbb r12, r11, r9\n\t" "smulbt r9, r11, r9\n\t" @@ -7522,7 +7522,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_invntt(sword16* r) #else "bfi r9, r12, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "str r2, [%[r]]\n\t" "str r3, [%[r], #64]\n\t" "str r4, [%[r], #128]\n\t" @@ -7620,7 +7620,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r, #else "movt r12, #0xcff\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r8, #0\n\t" "\n" "L_mlkem_basemul_mont_loop_%=: \n\t" @@ -7872,7 +7872,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont(sword16* r, #endif "orr r4, r9, r8, lsr #16\n\t" "orr r5, r11, r10, lsr #16\n\t" -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" "bne L_mlkem_basemul_mont_loop_%=\n\t" @@ -7925,7 +7925,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r, #else "movt r12, #0xcff\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r8, #0\n\t" "\n" "L_mlkem_arm32_basemul_mont_add_loop_%=: \n\t" @@ -8211,7 +8211,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_basemul_mont_add(sword16* r, #else "bfi r5, r10, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "stm %[r]!, {r4, r5}\n\t" "pop {r8}\n\t" "bne L_mlkem_arm32_basemul_mont_add_loop_%=\n\t" @@ -8264,7 +8264,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p) #else "movt lr, #0xd01\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "mov r11, #0x8000\n\t" #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 7) "orr r11, r11, #0x80000000\n\t" @@ -8405,7 +8405,7 @@ WC_OMIT_FRAME_POINTER void mlkem_arm32_csubq(sword16* p) #else "bfi r5, r10, #0, #16\n\t" #endif -#endif /* WOLFSLS_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ +#endif /* WOLFSSL_ARM_ARCH && WOLFSSL_ARM_ARCH >= 6 */ "stm %[p]!, {r2, r3, r4, r5}\n\t" "subs r1, r1, #8\n\t" "bne L_mlkem_arm32_csubq_loop_%=\n\t" diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S index 9a7e11b4be1..701adb24136 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm.S @@ -33,10 +33,10 @@ #ifdef WOLFSSL_SHA3 #ifndef WOLFSSL_ARMASM_NO_NEON .text - .type L_sha3_arm2_neon_rt, %object - .size L_sha3_arm2_neon_rt, 192 + .type L_sha3_arm32_neon_rt, %object + .size L_sha3_arm32_neon_rt, 192 .align 4 -L_sha3_arm2_neon_rt: +L_sha3_arm32_neon_rt: .word 0x1 .word 0x0 .word 0x8082 @@ -92,7 +92,7 @@ L_sha3_arm2_neon_rt: BlockSha3: vpush {d8-d15} sub sp, sp, #16 - adr r1, L_sha3_arm2_neon_rt + adr r1, L_sha3_arm32_neon_rt mov r2, #24 mov r3, sp vld1.8 {d0-d3}, [r0]! @@ -354,10 +354,10 @@ L_sha3_arm32_neon_begin: #endif /* WOLFSSL_ARMASM_NO_NEON */ #ifdef WOLFSSL_ARMASM_NO_NEON .text - .type L_sha3_arm2_rt, %object - .size L_sha3_arm2_rt, 192 + .type L_sha3_arm32_rt, %object + .size L_sha3_arm32_rt, 192 .align 4 -L_sha3_arm2_rt: +L_sha3_arm32_rt: .word 0x1 .word 0x0 .word 0x8082 @@ -413,7 +413,7 @@ L_sha3_arm2_rt: BlockSha3: push {r4, r5, r6, r7, r8, r9, r10, r11, lr} sub sp, sp, #0xcc - adr r1, L_sha3_arm2_rt + adr r1, L_sha3_arm32_rt mov r2, #12 L_sha3_arm32_begin: str r2, [sp, #200] diff --git a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c index fb6fa39cfb9..dd191f26e1f 100644 --- a/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-sha3-asm_c.c @@ -51,7 +51,7 @@ #ifdef WOLFSSL_SHA3 #ifndef WOLFSSL_ARMASM_NO_NEON -static const word64 L_sha3_arm2_neon_rt[] = { +static const word64 L_sha3_arm32_neon_rt[] = { 0x0000000000000001UL, 0x0000000000008082UL, 0x800000000000808aUL, 0x8000000080008000UL, 0x000000000000808bUL, 0x0000000080000001UL, @@ -76,15 +76,15 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word64* state asm ("r0") = (word64*)state_p; - register word64* L_sha3_arm2_neon_rt_c asm ("r1") = - (word64*)&L_sha3_arm2_neon_rt; + register word64* L_sha3_arm32_neon_rt_c asm ("r1") = + (word64*)&L_sha3_arm32_neon_rt; #else - register word64* L_sha3_arm2_neon_rt_c = (word64*)&L_sha3_arm2_neon_rt; + register word64* L_sha3_arm32_neon_rt_c = (word64*)&L_sha3_arm32_neon_rt; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #16\n\t" - "mov r1, %[L_sha3_arm2_neon_rt]\n\t" + "mov r1, %[L_sha3_arm32_neon_rt]\n\t" "mov r2, #24\n\t" "mov r3, sp\n\t" "vld1.8 {d0-d3}, [%[state]]!\n\t" @@ -343,12 +343,12 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "add sp, sp, #16\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [state] "+r" (state), - [L_sha3_arm2_neon_rt] "+r" (L_sha3_arm2_neon_rt_c) + [L_sha3_arm32_neon_rt] "+r" (L_sha3_arm32_neon_rt_c) : #else : : [state] "r" (state), - [L_sha3_arm2_neon_rt] "r" (L_sha3_arm2_neon_rt_c) + [L_sha3_arm32_neon_rt] "r" (L_sha3_arm32_neon_rt_c) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", "d16", @@ -359,7 +359,7 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) #endif /* WOLFSSL_ARMASM_NO_NEON */ #ifdef WOLFSSL_ARMASM_NO_NEON -static const word64 L_sha3_arm2_rt[] = { +static const word64 L_sha3_arm32_rt[] = { 0x0000000000000001UL, 0x0000000000008082UL, 0x800000000000808aUL, 0x8000000080008000UL, 0x000000000000808bUL, 0x0000000080000001UL, @@ -384,14 +384,14 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG register word64* state asm ("r0") = (word64*)state_p; - register word64* L_sha3_arm2_rt_c asm ("r1") = (word64*)&L_sha3_arm2_rt; + register word64* L_sha3_arm32_rt_c asm ("r1") = (word64*)&L_sha3_arm32_rt; #else - register word64* L_sha3_arm2_rt_c = (word64*)&L_sha3_arm2_rt; + register word64* L_sha3_arm32_rt_c = (word64*)&L_sha3_arm32_rt; #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ __asm__ __volatile__ ( "sub sp, sp, #0xcc\n\t" - "mov r1, %[L_sha3_arm2_rt]\n\t" + "mov r1, %[L_sha3_arm32_rt]\n\t" "mov r2, #12\n\t" "\n" "L_sha3_arm32_begin_%=: \n\t" @@ -2355,11 +2355,11 @@ WC_OMIT_FRAME_POINTER void BlockSha3(word64* state) "bne L_sha3_arm32_begin_%=\n\t" "add sp, sp, #0xcc\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG - : [state] "+r" (state), [L_sha3_arm2_rt] "+r" (L_sha3_arm2_rt_c) + : [state] "+r" (state), [L_sha3_arm32_rt] "+r" (L_sha3_arm32_rt_c) : #else : - : [state] "r" (state), [L_sha3_arm2_rt] "r" (L_sha3_arm2_rt_c) + : [state] "r" (state), [L_sha3_arm32_rt] "r" (L_sha3_arm32_rt_c) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ : "memory", "cc", "r2", "r3", "r12", "lr", "r4", "r5", "r6", "r7", "r8", "r9", "r10", "r11" diff --git a/wolfcrypt/src/port/arm/armv8-chacha-asm.S b/wolfcrypt/src/port/arm/armv8-chacha-asm.S index f100572a171..1bc3a294e8d 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha-asm.S +++ b/wolfcrypt/src/port/arm/armv8-chacha-asm.S @@ -961,14 +961,14 @@ L_chacha_crypt_bytes_arm64_lt_16: beq L_chacha_crypt_bytes_arm64_done L_chacha_crypt_bytes_arm64_lt_8: mov x5, v0.d[0] -L_chacha_crypt_bytes_arm64loop_lt_8: +L_chacha_crypt_bytes_arm64_loop_lt_8: # Encipher 1 byte at a time ldrb w6, [x2], #1 eor w6, w6, w5 strb w6, [x1], #1 subs x3, x3, #1 lsr x5, x5, #8 - bgt L_chacha_crypt_bytes_arm64loop_lt_8 + bgt L_chacha_crypt_bytes_arm64_loop_lt_8 L_chacha_crypt_bytes_arm64_done: L_chacha_crypt_bytes_arm64_done_all: st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [x0] diff --git a/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c b/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c index 80d35cba214..e9720680f27 100644 --- a/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-chacha-asm_c.c @@ -918,14 +918,14 @@ void wc_chacha_crypt_bytes(ChaCha* ctx, byte* c, const byte* m, word32 len) "L_chacha_crypt_bytes_arm64_lt_8_%=: \n\t" "mov %[rol8], v0.d[0]\n\t" "\n" - "L_chacha_crypt_bytes_arm64loop_lt_8_%=: \n\t" + "L_chacha_crypt_bytes_arm64_loop_lt_8_%=: \n\t" /* Encipher 1 byte at a time */ "ldrb %w[ctr], [%x[m]], #1\n\t" "eor %w[ctr], %w[ctr], %w[rol8]\n\t" "strb %w[ctr], [%x[c]], #1\n\t" "subs %w[len], %w[len], #1\n\t" "lsr %[rol8], %[rol8], #8\n\t" - "b.gt L_chacha_crypt_bytes_arm64loop_lt_8_%=\n\t" + "b.gt L_chacha_crypt_bytes_arm64_loop_lt_8_%=\n\t" "\n" "L_chacha_crypt_bytes_arm64_done_%=: \n\t" "\n" diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm.S b/wolfcrypt/src/port/arm/thumb2-aes-asm.S index a13c9ee6700..ceea6793caf 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm.S @@ -2188,7 +2188,7 @@ L_AES_CBC_encrypt_block_nr_256: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ POP {r1, r2, lr} LDR r3, [sp] REV r4, r4 @@ -2444,7 +2444,7 @@ L_AES_CBC_encrypt_block_nr_192: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ POP {r1, r2, lr} LDR r3, [sp] REV r4, r4 @@ -2700,7 +2700,7 @@ L_AES_CBC_encrypt_block_nr_128: EOR r5, r5, r9 EOR r6, r6, r10 EOR r7, r7, r11 -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ POP {r1, r2, lr} LDR r3, [sp] REV r4, r4 diff --git a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c index cbd9833d21d..fd89d3973da 100644 --- a/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-aes-asm_c.c @@ -2062,7 +2062,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ "POP {r1, %[len], lr}\n\t" "LDR %[ks], [sp]\n\t" "REV r4, r4\n\t" @@ -2339,7 +2339,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ "POP {r1, %[len], lr}\n\t" "LDR %[ks], [sp]\n\t" "REV r4, r4\n\t" @@ -2616,7 +2616,7 @@ WC_OMIT_FRAME_POINTER void AES_CBC_encrypt(const unsigned char* in, "EOR r5, r5, r9\n\t" "EOR r6, r6, r10\n\t" "EOR r7, r7, r11\n\t" -#endif /* WOLFSSL_ARMASM_AES_BLOCK_INLINE */ +#endif /* !WOLFSSL_ARMASM_AES_BLOCK_INLINE */ "POP {r1, %[len], lr}\n\t" "LDR %[ks], [sp]\n\t" "REV r4, r4\n\t" diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S index ca07434d138..be046d02bb2 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm.S +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm.S @@ -92,9 +92,9 @@ wc_chacha_setkey: STM r0!, {r3, r4, r5, r6} /* Next 16 bytes of key. */ #if defined(__GNUC__) || defined(__ICCARM__) || defined(__IAR_SYSTEMS_ICC__) - BEQ L_chacha_thumb2_setkey_same_keyb_ytes + BEQ L_chacha_thumb2_setkey_same_key_bytes #else - BEQ.N L_chacha_thumb2_setkey_same_keyb_ytes + BEQ.N L_chacha_thumb2_setkey_same_key_bytes #endif /* Update key pointer for next 16 bytes. */ ADD r1, r1, r2 @@ -102,7 +102,7 @@ wc_chacha_setkey: LDR r4, [r1, #4] LDR r5, [r1, #8] LDR r6, [r1, #12] -L_chacha_thumb2_setkey_same_keyb_ytes: +L_chacha_thumb2_setkey_same_key_bytes: STM r0, {r3, r4, r5, r6} POP {r4, r5, r6, r7, pc} /* Cycle Count = 60 */ diff --git a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c index 147c07d593a..4d2627524bc 100644 --- a/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c +++ b/wolfcrypt/src/port/arm/thumb2-chacha-asm_c.c @@ -124,11 +124,11 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "STM %[x]!, {r3, r4, r5, r6}\n\t" /* Next 16 bytes of key. */ #if defined(__GNUC__) - "BEQ L_chacha_thumb2_setkey_same_keyb_ytes_%=\n\t" + "BEQ L_chacha_thumb2_setkey_same_key_bytes_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BEQ.N L_chacha_thumb2_setkey_same_keyb_ytes\n\t" + "BEQ.N L_chacha_thumb2_setkey_same_key_bytes\n\t" #else - "BEQ.N L_chacha_thumb2_setkey_same_keyb_ytes_%=\n\t" + "BEQ.N L_chacha_thumb2_setkey_same_key_bytes_%=\n\t" #endif /* Update key pointer for next 16 bytes. */ "ADD %[key], %[key], %[keySz]\n\t" @@ -138,9 +138,9 @@ WC_OMIT_FRAME_POINTER void wc_chacha_setkey(word32* x, const byte* key, "LDR r6, [%[key], #12]\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_chacha_thumb2_setkey_same_keyb_ytes:\n\t" + "L_chacha_thumb2_setkey_same_key_bytes:\n\t" #else - "L_chacha_thumb2_setkey_same_keyb_ytes_%=:\n\t" + "L_chacha_thumb2_setkey_same_key_bytes_%=:\n\t" #endif "STM %[x], {r3, r4, r5, r6}\n\t" : [x] "+r" (x), [key] "+r" (key), [keySz] "+r" (keySz), diff --git a/wolfcrypt/src/sha512_asm.S b/wolfcrypt/src/sha512_asm.S index 4e05e40770c..00042f95c33 100644 --- a/wolfcrypt/src/sha512_asm.S +++ b/wolfcrypt/src/sha512_asm.S @@ -269,7 +269,7 @@ L_transform_sha512_avx1_start: movq %r10, %rax addq %rcx, %r14 vpaddq %xmm0, %xmm8, %xmm0 - # msg_sched done: 0-3 + # msg_sched done: 0-1 # msg_sched: 2-3 # rnd_0: 0 - 0 rorq $23, %rax @@ -360,7 +360,7 @@ L_transform_sha512_avx1_start: movq %r8, %rax addq %rcx, %r12 vpaddq %xmm1, %xmm8, %xmm1 - # msg_sched done: 2-5 + # msg_sched done: 2-3 # msg_sched: 4-5 # rnd_0: 0 - 0 rorq $23, %rax @@ -451,7 +451,7 @@ L_transform_sha512_avx1_start: movq %r14, %rax addq %rcx, %r10 vpaddq %xmm2, %xmm8, %xmm2 - # msg_sched done: 4-7 + # msg_sched done: 4-5 # msg_sched: 6-7 # rnd_0: 0 - 0 rorq $23, %rax @@ -542,7 +542,7 @@ L_transform_sha512_avx1_start: movq %r12, %rax addq %rcx, %r8 vpaddq %xmm3, %xmm8, %xmm3 - # msg_sched done: 6-9 + # msg_sched done: 6-7 # msg_sched: 8-9 # rnd_0: 0 - 0 rorq $23, %rax @@ -633,7 +633,7 @@ L_transform_sha512_avx1_start: movq %r10, %rax addq %rcx, %r14 vpaddq %xmm4, %xmm8, %xmm4 - # msg_sched done: 8-11 + # msg_sched done: 8-9 # msg_sched: 10-11 # rnd_0: 0 - 0 rorq $23, %rax @@ -724,7 +724,7 @@ L_transform_sha512_avx1_start: movq %r8, %rax addq %rcx, %r12 vpaddq %xmm5, %xmm8, %xmm5 - # msg_sched done: 10-13 + # msg_sched done: 10-11 # msg_sched: 12-13 # rnd_0: 0 - 0 rorq $23, %rax @@ -815,7 +815,7 @@ L_transform_sha512_avx1_start: movq %r14, %rax addq %rcx, %r10 vpaddq %xmm6, %xmm8, %xmm6 - # msg_sched done: 12-15 + # msg_sched done: 12-13 # msg_sched: 14-15 # rnd_0: 0 - 0 rorq $23, %rax @@ -906,7 +906,7 @@ L_transform_sha512_avx1_start: movq %r12, %rax addq %rcx, %r8 vpaddq %xmm7, %xmm8, %xmm7 - # msg_sched done: 14-17 + # msg_sched done: 14-15 subl $0x01, 128(%rsp) jne L_transform_sha512_avx1_start vpaddq (%rsi), %xmm0, %xmm8 @@ -1547,7 +1547,7 @@ L_sha512_len_avx1_start: movq %r10, %rax addq %rcx, %r14 vpaddq %xmm0, %xmm8, %xmm0 - # msg_sched done: 0-3 + # msg_sched done: 0-1 # msg_sched: 2-3 # rnd_0: 0 - 0 rorq $23, %rax @@ -1638,7 +1638,7 @@ L_sha512_len_avx1_start: movq %r8, %rax addq %rcx, %r12 vpaddq %xmm1, %xmm8, %xmm1 - # msg_sched done: 2-5 + # msg_sched done: 2-3 # msg_sched: 4-5 # rnd_0: 0 - 0 rorq $23, %rax @@ -1729,7 +1729,7 @@ L_sha512_len_avx1_start: movq %r14, %rax addq %rcx, %r10 vpaddq %xmm2, %xmm8, %xmm2 - # msg_sched done: 4-7 + # msg_sched done: 4-5 # msg_sched: 6-7 # rnd_0: 0 - 0 rorq $23, %rax @@ -1820,7 +1820,7 @@ L_sha512_len_avx1_start: movq %r12, %rax addq %rcx, %r8 vpaddq %xmm3, %xmm8, %xmm3 - # msg_sched done: 6-9 + # msg_sched done: 6-7 # msg_sched: 8-9 # rnd_0: 0 - 0 rorq $23, %rax @@ -1911,7 +1911,7 @@ L_sha512_len_avx1_start: movq %r10, %rax addq %rcx, %r14 vpaddq %xmm4, %xmm8, %xmm4 - # msg_sched done: 8-11 + # msg_sched done: 8-9 # msg_sched: 10-11 # rnd_0: 0 - 0 rorq $23, %rax @@ -2002,7 +2002,7 @@ L_sha512_len_avx1_start: movq %r8, %rax addq %rcx, %r12 vpaddq %xmm5, %xmm8, %xmm5 - # msg_sched done: 10-13 + # msg_sched done: 10-11 # msg_sched: 12-13 # rnd_0: 0 - 0 rorq $23, %rax @@ -2093,7 +2093,7 @@ L_sha512_len_avx1_start: movq %r14, %rax addq %rcx, %r10 vpaddq %xmm6, %xmm8, %xmm6 - # msg_sched done: 12-15 + # msg_sched done: 12-13 # msg_sched: 14-15 # rnd_0: 0 - 0 rorq $23, %rax @@ -2184,7 +2184,7 @@ L_sha512_len_avx1_start: movq %r12, %rax addq %rcx, %r8 vpaddq %xmm7, %xmm8, %xmm7 - # msg_sched done: 14-17 + # msg_sched done: 14-15 movq 136(%rsp), %rdx vpaddq (%rdx), %xmm0, %xmm8 vpaddq 16(%rdx), %xmm1, %xmm9 @@ -2892,7 +2892,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r14 xorq %r8, %rdx vpaddq %xmm0, %xmm8, %xmm0 - # msg_sched done: 0-3 + # msg_sched done: 0-1 # msg_sched: 2-3 # rnd_0: 0 - 0 rorxq $14, %r10, %rax @@ -2978,7 +2978,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r12 xorq %r14, %rdx vpaddq %xmm1, %xmm8, %xmm1 - # msg_sched done: 2-5 + # msg_sched done: 2-3 # msg_sched: 4-5 # rnd_0: 0 - 0 rorxq $14, %r8, %rax @@ -3064,7 +3064,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r10 xorq %r12, %rdx vpaddq %xmm2, %xmm8, %xmm2 - # msg_sched done: 4-7 + # msg_sched done: 4-5 # msg_sched: 6-7 # rnd_0: 0 - 0 rorxq $14, %r14, %rax @@ -3150,7 +3150,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r8 xorq %r10, %rdx vpaddq %xmm3, %xmm8, %xmm3 - # msg_sched done: 6-9 + # msg_sched done: 6-7 # msg_sched: 8-9 # rnd_0: 0 - 0 rorxq $14, %r12, %rax @@ -3236,7 +3236,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r14 xorq %r8, %rdx vpaddq %xmm4, %xmm8, %xmm4 - # msg_sched done: 8-11 + # msg_sched done: 8-9 # msg_sched: 10-11 # rnd_0: 0 - 0 rorxq $14, %r10, %rax @@ -3322,7 +3322,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r12 xorq %r14, %rdx vpaddq %xmm5, %xmm8, %xmm5 - # msg_sched done: 10-13 + # msg_sched done: 10-11 # msg_sched: 12-13 # rnd_0: 0 - 0 rorxq $14, %r8, %rax @@ -3408,7 +3408,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r10 xorq %r12, %rdx vpaddq %xmm6, %xmm8, %xmm6 - # msg_sched done: 12-15 + # msg_sched done: 12-13 # msg_sched: 14-15 # rnd_0: 0 - 0 rorxq $14, %r14, %rax @@ -3494,7 +3494,7 @@ L_transform_sha512_avx1_rorx_start: addq %rax, %r8 xorq %r10, %rdx vpaddq %xmm7, %xmm8, %xmm7 - # msg_sched done: 14-17 + # msg_sched done: 14-15 vpaddq (%rsi), %xmm0, %xmm8 vpaddq 16(%rsi), %xmm1, %xmm9 vmovdqu %xmm8, (%rsp) @@ -4099,7 +4099,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r14 xorq %r8, %rdx vpaddq %xmm0, %xmm8, %xmm0 - # msg_sched done: 0-3 + # msg_sched done: 0-1 # msg_sched: 2-3 # rnd_0: 0 - 0 rorxq $14, %r10, %rax @@ -4185,7 +4185,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r12 xorq %r14, %rdx vpaddq %xmm1, %xmm8, %xmm1 - # msg_sched done: 2-5 + # msg_sched done: 2-3 # msg_sched: 4-5 # rnd_0: 0 - 0 rorxq $14, %r8, %rax @@ -4271,7 +4271,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r10 xorq %r12, %rdx vpaddq %xmm2, %xmm8, %xmm2 - # msg_sched done: 4-7 + # msg_sched done: 4-5 # msg_sched: 6-7 # rnd_0: 0 - 0 rorxq $14, %r14, %rax @@ -4357,7 +4357,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r8 xorq %r10, %rdx vpaddq %xmm3, %xmm8, %xmm3 - # msg_sched done: 6-9 + # msg_sched done: 6-7 # msg_sched: 8-9 # rnd_0: 0 - 0 rorxq $14, %r12, %rax @@ -4443,7 +4443,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r14 xorq %r8, %rdx vpaddq %xmm4, %xmm8, %xmm4 - # msg_sched done: 8-11 + # msg_sched done: 8-9 # msg_sched: 10-11 # rnd_0: 0 - 0 rorxq $14, %r10, %rax @@ -4529,7 +4529,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r12 xorq %r14, %rdx vpaddq %xmm5, %xmm8, %xmm5 - # msg_sched done: 10-13 + # msg_sched done: 10-11 # msg_sched: 12-13 # rnd_0: 0 - 0 rorxq $14, %r8, %rax @@ -4615,7 +4615,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r10 xorq %r12, %rdx vpaddq %xmm6, %xmm8, %xmm6 - # msg_sched done: 12-15 + # msg_sched done: 12-13 # msg_sched: 14-15 # rnd_0: 0 - 0 rorxq $14, %r14, %rax @@ -4701,7 +4701,7 @@ L_sha512_len_avx1_rorx_start: addq %rax, %r8 xorq %r10, %rdx vpaddq %xmm7, %xmm8, %xmm7 - # msg_sched done: 14-17 + # msg_sched done: 14-15 movq 136(%rsp), %rcx vpaddq (%rcx), %xmm0, %xmm8 vpaddq 16(%rcx), %xmm1, %xmm9 @@ -6585,7 +6585,7 @@ L_sha512_len_avx2_start: movq %r10, %rax addq %rcx, %r14 vpaddq %ymm0, %ymm8, %ymm0 - # msg_sched done: 0-3 + # msg_sched done: 0-1 # msg_sched: 4-5 rorq $23, %rax vpalignr $8, %ymm1, %ymm2, %ymm12 @@ -6662,7 +6662,7 @@ L_sha512_len_avx2_start: movq %r8, %rax addq %rcx, %r12 vpaddq %ymm1, %ymm8, %ymm1 - # msg_sched done: 4-7 + # msg_sched done: 4-5 # msg_sched: 8-9 rorq $23, %rax vpalignr $8, %ymm2, %ymm3, %ymm12 @@ -6739,7 +6739,7 @@ L_sha512_len_avx2_start: movq %r14, %rax addq %rcx, %r10 vpaddq %ymm2, %ymm8, %ymm2 - # msg_sched done: 8-11 + # msg_sched done: 8-9 # msg_sched: 12-13 rorq $23, %rax vpalignr $8, %ymm3, %ymm4, %ymm12 @@ -6816,7 +6816,7 @@ L_sha512_len_avx2_start: movq %r12, %rax addq %rcx, %r8 vpaddq %ymm3, %ymm8, %ymm3 - # msg_sched done: 12-15 + # msg_sched done: 12-13 # msg_sched: 16-17 rorq $23, %rax vpalignr $8, %ymm4, %ymm5, %ymm12 @@ -6893,7 +6893,7 @@ L_sha512_len_avx2_start: movq %r10, %rax addq %rcx, %r14 vpaddq %ymm4, %ymm8, %ymm4 - # msg_sched done: 16-19 + # msg_sched done: 16-17 # msg_sched: 20-21 rorq $23, %rax vpalignr $8, %ymm5, %ymm6, %ymm12 @@ -6970,7 +6970,7 @@ L_sha512_len_avx2_start: movq %r8, %rax addq %rcx, %r12 vpaddq %ymm5, %ymm8, %ymm5 - # msg_sched done: 20-23 + # msg_sched done: 20-21 # msg_sched: 24-25 rorq $23, %rax vpalignr $8, %ymm6, %ymm7, %ymm12 @@ -7047,7 +7047,7 @@ L_sha512_len_avx2_start: movq %r14, %rax addq %rcx, %r10 vpaddq %ymm6, %ymm8, %ymm6 - # msg_sched done: 24-27 + # msg_sched done: 24-25 # msg_sched: 28-29 rorq $23, %rax vpalignr $8, %ymm7, %ymm0, %ymm12 @@ -7124,7 +7124,7 @@ L_sha512_len_avx2_start: movq %r12, %rax addq %rcx, %r8 vpaddq %ymm7, %ymm8, %ymm7 - # msg_sched done: 28-31 + # msg_sched done: 28-29 addq $0x100, %rsi addq $0x100, %rbp cmpq L_avx2_sha512_k_2_end(%rip), %rsi @@ -9381,7 +9381,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r14 xorq %r8, %rdx vpaddq %ymm0, %ymm8, %ymm0 - # msg_sched done: 0-3 + # msg_sched done: 0-1 # msg_sched: 4-5 rorxq $14, %r10, %rax rorxq $18, %r10, %rcx @@ -9454,7 +9454,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r12 xorq %r14, %rdx vpaddq %ymm1, %ymm8, %ymm1 - # msg_sched done: 4-7 + # msg_sched done: 4-5 # msg_sched: 8-9 rorxq $14, %r8, %rax rorxq $18, %r8, %rcx @@ -9527,7 +9527,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r10 xorq %r12, %rdx vpaddq %ymm2, %ymm8, %ymm2 - # msg_sched done: 8-11 + # msg_sched done: 8-9 # msg_sched: 12-13 rorxq $14, %r14, %rax rorxq $18, %r14, %rcx @@ -9600,7 +9600,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r8 xorq %r10, %rdx vpaddq %ymm3, %ymm8, %ymm3 - # msg_sched done: 12-15 + # msg_sched done: 12-13 # msg_sched: 16-17 rorxq $14, %r12, %rax rorxq $18, %r12, %rcx @@ -9673,7 +9673,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r14 xorq %r8, %rdx vpaddq %ymm4, %ymm8, %ymm4 - # msg_sched done: 16-19 + # msg_sched done: 16-17 # msg_sched: 20-21 rorxq $14, %r10, %rax rorxq $18, %r10, %rcx @@ -9746,7 +9746,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r12 xorq %r14, %rdx vpaddq %ymm5, %ymm8, %ymm5 - # msg_sched done: 20-23 + # msg_sched done: 20-21 # msg_sched: 24-25 rorxq $14, %r8, %rax rorxq $18, %r8, %rcx @@ -9819,7 +9819,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r10 xorq %r12, %rdx vpaddq %ymm6, %ymm8, %ymm6 - # msg_sched done: 24-27 + # msg_sched done: 24-25 # msg_sched: 28-29 rorxq $14, %r14, %rax rorxq $18, %r14, %rcx @@ -9892,7 +9892,7 @@ L_sha512_len_avx2_rorx_start: addq %rax, %r8 xorq %r10, %rdx vpaddq %ymm7, %ymm8, %ymm7 - # msg_sched done: 28-31 + # msg_sched done: 28-29 addq $0x100, %rbp addq $0x100, %rsi cmpq L_avx2_rorx_sha512_k_2_end(%rip), %rbp diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index f2c5b5586a0..c6450f75b64 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -5656,7 +5656,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0x100\n\t" "\n" - "L_sp_2048_sub_in_pkace_64_word_%=: \n\t" + "L_sp_2048_sub_in_place_64_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -5667,7 +5667,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "bne L_sp_2048_sub_in_place_64_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -6162,7 +6162,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0x80\n\t" "\n" - "L_sp_2048_sub_in_pkace_32_word_%=: \n\t" + "L_sp_2048_sub_in_place_32_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -6173,7 +6173,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "bne L_sp_2048_sub_in_place_32_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -9943,15 +9943,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( #endif "ldr r10, [%[a], #124]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -9959,11 +9959,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_32( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -14909,15 +14909,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( #endif "ldr r10, [%[a], #252]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -14925,11 +14925,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_2048_mont_reduce_64( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -29071,7 +29071,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0x180\n\t" "\n" - "L_sp_3072_sub_in_pkace_96_word_%=: \n\t" + "L_sp_3072_sub_in_place_96_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -29082,7 +29082,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "bne L_sp_3072_sub_in_place_96_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -29577,7 +29577,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0xc0\n\t" "\n" - "L_sp_3072_sub_in_pkace_48_word_%=: \n\t" + "L_sp_3072_sub_in_place_48_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -29588,7 +29588,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "bne L_sp_3072_sub_in_place_48_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -34902,15 +34902,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( #endif "ldr r10, [%[a], #188]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -34918,11 +34918,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_48( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -41804,15 +41804,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( #endif "ldr r10, [%[a], #380]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -41820,11 +41820,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_3072_mont_reduce_96( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -47639,7 +47639,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0x200\n\t" "\n" - "L_sp_4096_sub_in_pkace_128_word_%=: \n\t" + "L_sp_4096_sub_in_place_128_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -47650,7 +47650,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "bne L_sp_4096_sub_in_place_128_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -56588,15 +56588,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( #endif "ldr r10, [%[a], #508]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -56604,11 +56604,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_4096_mont_reduce_128( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -73172,15 +73172,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, #endif "ldr r10, [%[a], #28]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -73188,11 +73188,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_8(sp_digit* a, "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -73860,15 +73860,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( #endif "ldr r10, [%[a], #28]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -73876,11 +73876,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_256_mont_reduce_order_8( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -78284,7 +78284,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #32\n\t" "\n" - "L_sp_256_sub_in_pkace_8_word_%=: \n\t" + "L_sp_256_sub_in_place_8_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -78295,7 +78295,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_256_sub_in_pkace_8_word_%=\n\t" + "bne L_sp_256_sub_in_place_8_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -91143,15 +91143,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, #endif "ldr r10, [%[a], #44]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -91159,11 +91159,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_384_mont_reduce_12(sp_digit* a, "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -96341,7 +96341,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #48\n\t" "\n" - "L_sp_384_sub_in_pkace_12_word_%=: \n\t" + "L_sp_384_sub_in_place_12_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -96352,7 +96352,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_384_sub_in_pkace_12_word_%=\n\t" + "bne L_sp_384_sub_in_place_12_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -118074,15 +118074,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( #endif "ldr r10, [%[a], #64]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "adcs r5, r3, #0\n\t" + "adcs r5, r5, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -118090,11 +118090,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_521_mont_reduce_order_17( "adcs r5, r5, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" @@ -119416,9 +119416,23 @@ WC_OMIT_FRAME_POINTER static void sp_521_mont_tpl_17(sp_digit* r, "sub %[r], %[r], #0x44\n\t" "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "adds r4, r4, r2\n\t" - "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ldm %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "adcs r4, r4, #0\n\t" + "adcs r5, r5, #0\n\t" + "adcs r6, r6, #0\n\t" + "adcs r7, r7, #0\n\t" + "adcs r8, r8, #0\n\t" + "adcs r9, r9, #0\n\t" + "adcs r10, r10, #0\n\t" + "adcs r11, r11, #0\n\t" "stm %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ldm %[r], {r4}\n\t" "adcs r4, r4, #0\n\t" @@ -124063,77 +124077,77 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r, "lsr r5, r5, %[n]\n\t" "orr r4, r4, r3\n\t" "ldr r6, [%[a], #8]\n\t" - "str r4, [%[a]]\n\t" + "str r4, [%[r]]\n\t" "lsl r3, r6, r12\n\t" "lsr r6, r6, %[n]\n\t" "orr r5, r5, r3\n\t" "ldr r4, [%[a], #12]\n\t" - "str r5, [%[a], #4]\n\t" + "str r5, [%[r], #4]\n\t" "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" "orr r6, r6, r3\n\t" "ldr r5, [%[a], #16]\n\t" - "str r6, [%[a], #8]\n\t" + "str r6, [%[r], #8]\n\t" "lsl r3, r5, r12\n\t" "lsr r5, r5, %[n]\n\t" "orr r4, r4, r3\n\t" "ldr r6, [%[a], #20]\n\t" - "str r4, [%[a], #12]\n\t" + "str r4, [%[r], #12]\n\t" "lsl r3, r6, r12\n\t" "lsr r6, r6, %[n]\n\t" "orr r5, r5, r3\n\t" "ldr r4, [%[a], #24]\n\t" - "str r5, [%[a], #16]\n\t" + "str r5, [%[r], #16]\n\t" "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" "orr r6, r6, r3\n\t" "ldr r5, [%[a], #28]\n\t" - "str r6, [%[a], #20]\n\t" + "str r6, [%[r], #20]\n\t" "lsl r3, r5, r12\n\t" "lsr r5, r5, %[n]\n\t" "orr r4, r4, r3\n\t" "ldr r6, [%[a], #32]\n\t" - "str r4, [%[a], #24]\n\t" + "str r4, [%[r], #24]\n\t" "lsl r3, r6, r12\n\t" "lsr r6, r6, %[n]\n\t" "orr r5, r5, r3\n\t" "ldr r4, [%[a], #36]\n\t" - "str r5, [%[a], #28]\n\t" + "str r5, [%[r], #28]\n\t" "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" "orr r6, r6, r3\n\t" "ldr r5, [%[a], #40]\n\t" - "str r6, [%[a], #32]\n\t" + "str r6, [%[r], #32]\n\t" "lsl r3, r5, r12\n\t" "lsr r5, r5, %[n]\n\t" "orr r4, r4, r3\n\t" "ldr r6, [%[a], #44]\n\t" - "str r4, [%[a], #36]\n\t" + "str r4, [%[r], #36]\n\t" "lsl r3, r6, r12\n\t" "lsr r6, r6, %[n]\n\t" "orr r5, r5, r3\n\t" "ldr r4, [%[a], #48]\n\t" - "str r5, [%[a], #40]\n\t" + "str r5, [%[r], #40]\n\t" "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" "orr r6, r6, r3\n\t" "ldr r5, [%[a], #52]\n\t" - "str r6, [%[a], #44]\n\t" + "str r6, [%[r], #44]\n\t" "lsl r3, r5, r12\n\t" "lsr r5, r5, %[n]\n\t" "orr r4, r4, r3\n\t" "ldr r6, [%[a], #56]\n\t" - "str r4, [%[a], #48]\n\t" + "str r4, [%[r], #48]\n\t" "lsl r3, r6, r12\n\t" "lsr r6, r6, %[n]\n\t" "orr r5, r5, r3\n\t" "ldr r4, [%[a], #60]\n\t" - "str r5, [%[a], #52]\n\t" + "str r5, [%[r], #52]\n\t" "lsl r3, r4, r12\n\t" "lsr r4, r4, %[n]\n\t" "orr r6, r6, r3\n\t" "ldr r5, [%[a], #64]\n\t" - "str r6, [%[a], #56]\n\t" + "str r6, [%[r], #56]\n\t" "lsl r3, r5, r12\n\t" "lsr r5, r5, %[n]\n\t" "orr r4, r4, r3\n\t" @@ -124541,7 +124555,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0x40\n\t" "\n" - "L_sp_521_sub_in_pkace_17_word_%=: \n\t" + "L_sp_521_sub_in_place_17_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -124552,7 +124566,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_521_sub_in_pkace_17_word_%=\n\t" + "bne L_sp_521_sub_in_place_17_word_%=\n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2}\n\t" "ldm %[b]!, {r6}\n\t" @@ -144329,7 +144343,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "mov r12, #0\n\t" "add lr, %[a], #0x80\n\t" "\n" - "L_sp_1024_sub_in_pkace_32_word_%=: \n\t" + "L_sp_1024_sub_in_place_32_word_%=: \n\t" "rsbs r12, r12, #0\n\t" "ldm %[a], {r2, r3, r4, r5}\n\t" "ldm %[b]!, {r6, r7, r8, r9}\n\t" @@ -144340,7 +144354,7 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "stm %[a]!, {r2, r3, r4, r5}\n\t" "sbc r12, r12, r12\n\t" "cmp %[a], lr\n\t" - "bne L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "bne L_sp_1024_sub_in_place_32_word_%=\n\t" "mov %[a], r12\n\t" #ifndef WOLFSSL_NO_VAR_ASSIGN_REG : [a] "+r" (a), [b] "+r" (b) @@ -147711,15 +147725,15 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( #endif "ldr r10, [%[a], #124]\n\t" "lsl r6, r8, #16\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "lsr r7, r7, #16\n\t" "mul r7, r6, r7\n\t" "adds r5, r5, r7\n\t" - "adcs r4, r3, #0\n\t" + "adcs r4, r4, #0\n\t" "mov r3, #0\n\t" "adc r3, r3, r3\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" "lsr r7, r6, #16\n\t" "lsl r6, r6, #16\n\t" @@ -147727,11 +147741,11 @@ WC_OMIT_FRAME_POINTER static SP_NOINLINE void sp_1024_mont_reduce_32( "adcs r4, r4, r7\n\t" "adc r3, r3, #0\n\t" "mov r6, r8\n\t" - "lsr r7, r11, #16\n\t" + "lsr r7, r7, #16\n\t" "lsr r6, r6, #16\n\t" "mul r7, r6, r7\n\t" "adds r4, r4, r7\n\t" - "lsl r7, r11, #16\n\t" + "lsl r7, r7, #16\n\t" "adc r3, r3, #0\n\t" "lsr r7, r7, #16\n\t" "mul r6, r7, r6\n\t" diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 8de63d514b1..4ed704a64d4 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -2357,9 +2357,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "ADD r11, %[a], #0x100\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_2048_sub_in_pkace_64_word:\n\t" + "L_sp_2048_sub_in_place_64_word:\n\t" #else - "L_sp_2048_sub_in_pkace_64_word_%=:\n\t" + "L_sp_2048_sub_in_place_64_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -2372,11 +2372,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_64(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "BNE L_sp_2048_sub_in_place_64_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_2048_sub_in_pkace_64_word\n\t" + "BNE.N L_sp_2048_sub_in_place_64_word\n\t" #else - "BNE.N L_sp_2048_sub_in_pkace_64_word_%=\n\t" + "BNE.N L_sp_2048_sub_in_place_64_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -2737,9 +2737,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "ADD r11, %[a], #0x80\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_2048_sub_in_pkace_32_word:\n\t" + "L_sp_2048_sub_in_place_32_word:\n\t" #else - "L_sp_2048_sub_in_pkace_32_word_%=:\n\t" + "L_sp_2048_sub_in_place_32_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -2752,11 +2752,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_2048_sub_in_place_32(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "BNE L_sp_2048_sub_in_place_32_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_2048_sub_in_pkace_32_word\n\t" + "BNE.N L_sp_2048_sub_in_place_32_word\n\t" #else - "BNE.N L_sp_2048_sub_in_pkace_32_word_%=\n\t" + "BNE.N L_sp_2048_sub_in_place_32_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -13347,9 +13347,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "ADD r11, %[a], #0x180\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_3072_sub_in_pkace_96_word:\n\t" + "L_sp_3072_sub_in_place_96_word:\n\t" #else - "L_sp_3072_sub_in_pkace_96_word_%=:\n\t" + "L_sp_3072_sub_in_place_96_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -13362,11 +13362,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_96(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "BNE L_sp_3072_sub_in_place_96_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_3072_sub_in_pkace_96_word\n\t" + "BNE.N L_sp_3072_sub_in_place_96_word\n\t" #else - "BNE.N L_sp_3072_sub_in_pkace_96_word_%=\n\t" + "BNE.N L_sp_3072_sub_in_place_96_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -13727,9 +13727,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "ADD r11, %[a], #0xc0\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_3072_sub_in_pkace_48_word:\n\t" + "L_sp_3072_sub_in_place_48_word:\n\t" #else - "L_sp_3072_sub_in_pkace_48_word_%=:\n\t" + "L_sp_3072_sub_in_place_48_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -13742,11 +13742,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_3072_sub_in_place_48(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "BNE L_sp_3072_sub_in_place_48_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_3072_sub_in_pkace_48_word\n\t" + "BNE.N L_sp_3072_sub_in_place_48_word\n\t" #else - "BNE.N L_sp_3072_sub_in_pkace_48_word_%=\n\t" + "BNE.N L_sp_3072_sub_in_place_48_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -23690,9 +23690,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "ADD r11, %[a], #0x200\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_4096_sub_in_pkace_128_word:\n\t" + "L_sp_4096_sub_in_place_128_word:\n\t" #else - "L_sp_4096_sub_in_pkace_128_word_%=:\n\t" + "L_sp_4096_sub_in_place_128_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -23705,11 +23705,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_4096_sub_in_place_128(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "BNE L_sp_4096_sub_in_place_128_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_4096_sub_in_pkace_128_word\n\t" + "BNE.N L_sp_4096_sub_in_place_128_word\n\t" #else - "BNE.N L_sp_4096_sub_in_pkace_128_word_%=\n\t" + "BNE.N L_sp_4096_sub_in_place_128_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -39666,9 +39666,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, "ADD r11, %[a], #0x20\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_256_sub_in_pkace_8_word:\n\t" + "L_sp_256_sub_in_place_8_word:\n\t" #else - "L_sp_256_sub_in_pkace_8_word_%=:\n\t" + "L_sp_256_sub_in_place_8_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -39681,11 +39681,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_256_sub_in_place_8(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_256_sub_in_pkace_8_word_%=\n\t" + "BNE L_sp_256_sub_in_place_8_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_256_sub_in_pkace_8_word\n\t" + "BNE.N L_sp_256_sub_in_place_8_word\n\t" #else - "BNE.N L_sp_256_sub_in_pkace_8_word_%=\n\t" + "BNE.N L_sp_256_sub_in_place_8_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -49643,9 +49643,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, "ADD r11, %[a], #0x30\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_384_sub_in_pkace_12_word:\n\t" + "L_sp_384_sub_in_place_12_word:\n\t" #else - "L_sp_384_sub_in_pkace_12_word_%=:\n\t" + "L_sp_384_sub_in_place_12_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -49658,11 +49658,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_384_sub_in_place_12(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_384_sub_in_pkace_12_word_%=\n\t" + "BNE L_sp_384_sub_in_place_12_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_384_sub_in_pkace_12_word\n\t" + "BNE.N L_sp_384_sub_in_place_12_word\n\t" #else - "BNE.N L_sp_384_sub_in_pkace_12_word_%=\n\t" + "BNE.N L_sp_384_sub_in_place_12_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) @@ -57454,9 +57454,23 @@ WC_OMIT_FRAME_POINTER SP_NOINLINE static void sp_521_mont_tpl_17(sp_digit* r, "SUB %[r], %[r], #0x44\n\t" "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "ADDS r4, r4, r2\n\t" - "ADCS r4, r4, #0x0\n\t" + "ADCS r5, r5, #0x0\n\t" + "ADCS r6, r6, #0x0\n\t" + "ADCS r7, r7, #0x0\n\t" + "ADCS r8, r8, #0x0\n\t" + "ADCS r9, r9, #0x0\n\t" + "ADCS r10, r10, #0x0\n\t" + "ADCS r11, r11, #0x0\n\t" "STM %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "LDM %[r], {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" + "ADCS r4, r4, #0x0\n\t" + "ADCS r5, r5, #0x0\n\t" + "ADCS r6, r6, #0x0\n\t" + "ADCS r7, r7, #0x0\n\t" + "ADCS r8, r8, #0x0\n\t" + "ADCS r9, r9, #0x0\n\t" + "ADCS r10, r10, #0x0\n\t" + "ADCS r11, r11, #0x0\n\t" "STM %[r]!, {r4, r5, r6, r7, r8, r9, r10, r11}\n\t" "LDM %[r], {r4}\n\t" "ADCS r4, r4, #0x0\n\t" @@ -62068,77 +62082,77 @@ WC_OMIT_FRAME_POINTER static void sp_521_rshift_17(sp_digit* r, "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" "LDR r6, [%[a], #8]\n\t" - "STR r4, [%[a]]\n\t" + "STR r4, [%[r]]\n\t" "LSL r3, r6, r7\n\t" "LSR r6, r6, %[n]\n\t" "ORR r5, r5, r3\n\t" "LDR r4, [%[a], #12]\n\t" - "STR r5, [%[a], #4]\n\t" + "STR r5, [%[r], #4]\n\t" "LSL r3, r4, r7\n\t" "LSR r4, r4, %[n]\n\t" "ORR r6, r6, r3\n\t" "LDR r5, [%[a], #16]\n\t" - "STR r6, [%[a], #8]\n\t" + "STR r6, [%[r], #8]\n\t" "LSL r3, r5, r7\n\t" "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" "LDR r6, [%[a], #20]\n\t" - "STR r4, [%[a], #12]\n\t" + "STR r4, [%[r], #12]\n\t" "LSL r3, r6, r7\n\t" "LSR r6, r6, %[n]\n\t" "ORR r5, r5, r3\n\t" "LDR r4, [%[a], #24]\n\t" - "STR r5, [%[a], #16]\n\t" + "STR r5, [%[r], #16]\n\t" "LSL r3, r4, r7\n\t" "LSR r4, r4, %[n]\n\t" "ORR r6, r6, r3\n\t" "LDR r5, [%[a], #28]\n\t" - "STR r6, [%[a], #20]\n\t" + "STR r6, [%[r], #20]\n\t" "LSL r3, r5, r7\n\t" "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" "LDR r6, [%[a], #32]\n\t" - "STR r4, [%[a], #24]\n\t" + "STR r4, [%[r], #24]\n\t" "LSL r3, r6, r7\n\t" "LSR r6, r6, %[n]\n\t" "ORR r5, r5, r3\n\t" "LDR r4, [%[a], #36]\n\t" - "STR r5, [%[a], #28]\n\t" + "STR r5, [%[r], #28]\n\t" "LSL r3, r4, r7\n\t" "LSR r4, r4, %[n]\n\t" "ORR r6, r6, r3\n\t" "LDR r5, [%[a], #40]\n\t" - "STR r6, [%[a], #32]\n\t" + "STR r6, [%[r], #32]\n\t" "LSL r3, r5, r7\n\t" "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" "LDR r6, [%[a], #44]\n\t" - "STR r4, [%[a], #36]\n\t" + "STR r4, [%[r], #36]\n\t" "LSL r3, r6, r7\n\t" "LSR r6, r6, %[n]\n\t" "ORR r5, r5, r3\n\t" "LDR r4, [%[a], #48]\n\t" - "STR r5, [%[a], #40]\n\t" + "STR r5, [%[r], #40]\n\t" "LSL r3, r4, r7\n\t" "LSR r4, r4, %[n]\n\t" "ORR r6, r6, r3\n\t" "LDR r5, [%[a], #52]\n\t" - "STR r6, [%[a], #44]\n\t" + "STR r6, [%[r], #44]\n\t" "LSL r3, r5, r7\n\t" "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" "LDR r6, [%[a], #56]\n\t" - "STR r4, [%[a], #48]\n\t" + "STR r4, [%[r], #48]\n\t" "LSL r3, r6, r7\n\t" "LSR r6, r6, %[n]\n\t" "ORR r5, r5, r3\n\t" "LDR r4, [%[a], #60]\n\t" - "STR r5, [%[a], #52]\n\t" + "STR r5, [%[r], #52]\n\t" "LSL r3, r4, r7\n\t" "LSR r4, r4, %[n]\n\t" "ORR r6, r6, r3\n\t" "LDR r5, [%[a], #64]\n\t" - "STR r6, [%[a], #56]\n\t" + "STR r6, [%[r], #56]\n\t" "LSL r3, r5, r7\n\t" "LSR r5, r5, %[n]\n\t" "ORR r4, r4, r3\n\t" @@ -62527,9 +62541,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, "ADD r11, %[a], #0x40\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_521_sub_in_pkace_17_word:\n\t" + "L_sp_521_sub_in_place_17_word:\n\t" #else - "L_sp_521_sub_in_pkace_17_word_%=:\n\t" + "L_sp_521_sub_in_place_17_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -62542,11 +62556,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_521_sub_in_place_17(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_521_sub_in_pkace_17_word_%=\n\t" + "BNE L_sp_521_sub_in_place_17_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_521_sub_in_pkace_17_word\n\t" + "BNE.N L_sp_521_sub_in_place_17_word\n\t" #else - "BNE.N L_sp_521_sub_in_pkace_17_word_%=\n\t" + "BNE.N L_sp_521_sub_in_place_17_word_%=\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2}\n\t" @@ -68852,9 +68866,9 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "ADD r11, %[a], #0x80\n\t" "\n" #if defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "L_sp_1024_sub_in_pkace_32_word:\n\t" + "L_sp_1024_sub_in_place_32_word:\n\t" #else - "L_sp_1024_sub_in_pkace_32_word_%=:\n\t" + "L_sp_1024_sub_in_place_32_word_%=:\n\t" #endif "RSBS r10, r10, #0x0\n\t" "LDM %[a], {r2, r3, r4, r5}\n\t" @@ -68867,11 +68881,11 @@ WC_OMIT_FRAME_POINTER static sp_digit sp_1024_sub_in_place_32(sp_digit* a, "SBC r10, r10, r10\n\t" "CMP %[a], r11\n\t" #if defined(__GNUC__) - "BNE L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "BNE L_sp_1024_sub_in_place_32_word_%=\n\t" #elif defined(__IAR_SYSTEMS_ICC__) && (__VER__ < 9000000) - "BNE.N L_sp_1024_sub_in_pkace_32_word\n\t" + "BNE.N L_sp_1024_sub_in_place_32_word\n\t" #else - "BNE.N L_sp_1024_sub_in_pkace_32_word_%=\n\t" + "BNE.N L_sp_1024_sub_in_place_32_word_%=\n\t" #endif "MOV %[a], r10\n\t" : [a] "+r" (a), [b] "+r" (b) diff --git a/wolfcrypt/src/sp_x86_64_asm.S b/wolfcrypt/src/sp_x86_64_asm.S index 4dfc307ffa3..1e907aee460 100644 --- a/wolfcrypt/src/sp_x86_64_asm.S +++ b/wolfcrypt/src/sp_x86_64_asm.S @@ -66434,7 +66434,7 @@ _sp_521_mont_add_9: /* Double a Montgomery form number (r = a + a % m). * * r Result of addition. - * a Number to souble in Montgomery form. + * a Number to double in Montgomery form. * m Modulus (prime). */ #ifndef __APPLE__ @@ -76656,7 +76656,7 @@ _sp_1024_mont_add_16: /* Double a Montgomery form number (r = a + a % m). * * r Result of addition. - * a Number to souble in Montgomery form. + * a Number to double in Montgomery form. * m Modulus (prime). */ #ifndef __APPLE__ @@ -76830,7 +76830,7 @@ _sp_1024_mont_dbl_16: /* Triple a Montgomery form number (r = a + a + a % m). * * r Result of addition. - * a Number to souble in Montgomery form. + * a Number to double in Montgomery form. * m Modulus (prime). */ #ifndef __APPLE__ @@ -77984,7 +77984,7 @@ _sp_1024_mont_add_avx2_16: /* Double a Montgomery form number (r = a + a % m). * * r Result of addition. - * a Number to souble in Montgomery form. + * a Number to double in Montgomery form. * m Modulus (prime). */ #ifndef __APPLE__ @@ -78142,7 +78142,7 @@ _sp_1024_mont_dbl_avx2_16: /* Triple a Montgomery form number (r = a + a + a % m). * * r Result of addition. - * a Number to souble in Montgomery form. + * a Number to double in Montgomery form. * m Modulus (prime). */ #ifndef __APPLE__ diff --git a/wolfcrypt/src/sp_x86_64_asm.asm b/wolfcrypt/src/sp_x86_64_asm.asm index 3f04d445c26..c91ccfa48b5 100644 --- a/wolfcrypt/src/sp_x86_64_asm.asm +++ b/wolfcrypt/src/sp_x86_64_asm.asm @@ -64790,7 +64790,7 @@ _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. -; * a Number to souble in Montgomery form. +; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA @@ -74754,7 +74754,7 @@ _text ENDS ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. -; * a Number to souble in Montgomery form. +; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA @@ -74919,7 +74919,7 @@ _text ENDS ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of addition. -; * a Number to souble in Montgomery form. +; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA @@ -76037,7 +76037,7 @@ IFDEF HAVE_INTEL_AVX2 ; /* Double a Montgomery form number (r = a + a % m). ; * ; * r Result of addition. -; * a Number to souble in Montgomery form. +; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA @@ -76186,7 +76186,7 @@ IFDEF HAVE_INTEL_AVX2 ; /* Triple a Montgomery form number (r = a + a + a % m). ; * ; * r Result of addition. -; * a Number to souble in Montgomery form. +; * a Number to double in Montgomery form. ; * m Modulus (prime). ; */ _text SEGMENT READONLY PARA