diff --git a/.wolfssl_known_macro_extras b/.wolfssl_known_macro_extras index 144ea2ff02b..b672888ac0d 100644 --- a/.wolfssl_known_macro_extras +++ b/.wolfssl_known_macro_extras @@ -1,4 +1,8 @@ +AES_CR_CCFC AES_GCM_GMULT_NCT +AES_ICR_CCF +AES_ISR_CCF +AES_SR_CCF AFX_RESOURCE_DLL AFX_TARG_ENU ALLOW_BINARY_MISMATCH_INTROSPECTION @@ -266,7 +270,11 @@ HARDWARE_CACHE_COHERENCY HASH_AlgoMode_HASH HASH_AlgoMode_HMAC HASH_BYTE_SWAP +HASH_CR_ALGO_1 +HASH_CR_DATATYPE_0 +HASH_CR_DATATYPE_1 HASH_CR_LKEY +HASH_CR_MODE HASH_DIGEST HASH_DataType_8b HASH_IMR_DCIE @@ -496,6 +504,14 @@ PTHREAD_STACK_MIN QAT_ENABLE_HASH QAT_ENABLE_RNG QAT_USE_POLLING_CHECK +RCC_AHB1ENR_PKAEN +RCC_AHB2ENR1_AESEN +RCC_AHB2ENR1_HASHEN +RCC_AHB2ENR1_PKAEN +RCC_AHB2ENR_HASHEN +RCC_AHB2ENR_PKAEN +RCC_AHB2ENR_SAESEN +RCC_AHB3ENR_AESEN RC_NO_RNG REDIRECTION_IN3_KEYELMID REDIRECTION_IN3_KEYID @@ -678,6 +694,11 @@ WC_SLHDSA_KERNEL_ASM WC_SLHDSA_NO_ASM WC_SLHDSA_VERBOSE_DEBUG WC_SSIZE_TYPE +WC_STM32_AES_CLK_ENABLE_INST +WC_STM32_AES_INST +WC_STM32_HAS_DHUK +WC_STM32_SAES_CLK_DISABLE +WC_STM32_SAES_CLK_ENABLE WC_STRICT_SIG WC_USE_PIE_FENCEPOSTS_FOR_FIPS WC_WANT_FLAG_DONT_USE_VECTOR_OPS @@ -739,6 +760,9 @@ WOLFSSL_CLANG_TIDY WOLFSSL_CLIENT_EXAMPLE WOLFSSL_CONTIKI WOLFSSL_CRL_ALLOW_MISSING_CDP +WOLFSSL_DHUK +WOLFSSL_DHUK_DEVID +WOLFSSL_DHUK_WRAPPED_DEVID WOLFSSL_DILITHIUM_ASSIGN_KEY WOLFSSL_DILITHIUM_NO_ASN1 WOLFSSL_DILITHIUM_NO_CHECK_KEY @@ -895,6 +919,7 @@ WOLFSSL_RNG_USE_FULL_SEED WOLFSSL_RSA_CHECK_D_ON_DECRYPT WOLFSSL_RSA_DECRYPT_TO_0_LEN WOLFSSL_RW_THREADED +WOLFSSL_SAES_DEVID WOLFSSL_SAKKE_SMALL WOLFSSL_SAKKE_SMALL_MODEXP WOLFSSL_SE050_AUTO_ERASE @@ -920,8 +945,11 @@ WOLFSSL_SP_ARM32_UDIV WOLFSSL_SP_FAST_NCT_EXPTMOD WOLFSSL_SP_INT_SQR_VOLATILE WOLFSSL_STACK_CHECK +WOLFSSL_STM32C5 WOLFSSL_STM32F427_RNG WOLFSSL_STM32U5_DHUK +WOLFSSL_STM32_BARE +WOLFSSL_STM32_USE_SAES WOLFSSL_STRONGEST_HASH_SIG WOLFSSL_STSAFE_TAKES_SLOT WOLFSSL_TELIT_M2MB diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 8fb72e55982..118fd5d0d52 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -227,6 +227,10 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesEncrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal driver handles mutex, clock and key/IV internally. */ + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -241,13 +245,13 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits return ret; #endif - #ifdef WOLFSSL_STM32U5_DHUK + #ifdef WOLFSSL_DHUK ret = wolfSSL_CryptHwMutexLock(); if (ret != 0) return ret; /* Handle making use of wrapped key */ - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { + if (aes->devId == WOLFSSL_DHUK_WRAPPED_DEVID) { CRYP_ConfigTypeDef Config = {0}; ret = wc_Stm32_Aes_UnWrap(aes, &hcryp, (const byte*)aes->key, @@ -367,6 +371,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ @@ -375,6 +380,9 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits static WARN_UNUSED_RESULT int wc_AesDecrypt( Aes* aes, const byte* inBlock, byte* outBlock) { + #ifdef WOLFSSL_STM32_BARE + return wc_Stm32_Aes_Ecb(aes, outBlock, inBlock, WC_AES_BLOCK_SIZE, 0); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -389,13 +397,13 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits return ret; #endif - #ifdef WOLFSSL_STM32U5_DHUK + #ifdef WOLFSSL_DHUK ret = wolfSSL_CryptHwMutexLock(); if (ret != 0) return ret; /* Handle making use of wrapped key */ - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { + if (aes->devId == WOLFSSL_DHUK_WRAPPED_DEVID) { CRYP_ConfigTypeDef Config; XMEMSET(&Config, 0, sizeof(Config)); @@ -521,6 +529,7 @@ block cipher mechanism that uses n-bit binary string parameter key with 128-bits wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } #endif /* WOLFSSL_AES_DIRECT */ #endif /* HAVE_AES_DECRYPT */ @@ -5576,7 +5585,34 @@ int wc_AesSetIV(Aes* aes, const byte* iv) #ifdef HAVE_AES_CBC #if defined(STM32_CRYPTO) -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_STM32_BARE + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % WC_AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } + #endif + if (sz == 0) { + return 0; + } + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 1); + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + #ifdef WOLFSSL_AES_CBC_LENGTH_CHECKS + if (sz % WC_AES_BLOCK_SIZE) { + return BAD_LENGTH_E; + } + #endif + if (sz == 0) { + return 0; + } + return wc_Stm32_Aes_Cbc(aes, out, in, sz, 0); + } + #endif /* HAVE_AES_DECRYPT */ +#elif defined(WOLFSSL_DHUK) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { int ret = 0; @@ -5596,7 +5632,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) return ret; } - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { + if (aes->devId == WOLFSSL_DHUK_WRAPPED_DEVID) { CRYP_ConfigTypeDef Config; XMEMSET(&Config, 0, sizeof(Config)); @@ -5662,7 +5698,7 @@ int wc_AesSetIV(Aes* aes, const byte* iv) return ret; } - if (aes->devId == WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID) { + if (aes->devId == WOLFSSL_DHUK_WRAPPED_DEVID) { CRYP_ConfigTypeDef Config; XMEMSET(&Config, 0, sizeof(Config)); @@ -6956,6 +6992,11 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) { + #ifdef WOLFSSL_STM32_BARE + /* CTR per-block transform: ECB-encrypt the counter (passed in + * 'in'); aes.c handles counter increment and XOR with plaintext. */ + return wc_Stm32_Aes_Ecb(aes, out, in, WC_AES_BLOCK_SIZE, 1); + #else int ret = 0; #ifdef WOLFSSL_STM32_CUBEMX CRYP_HandleTypeDef hcryp; @@ -7066,6 +7107,7 @@ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) wolfSSL_CryptHwMutexUnLock(); wc_Stm32_Aes_Cleanup(); return ret; + #endif /* !WOLFSSL_STM32_BARE */ } @@ -10142,6 +10184,15 @@ int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, authIn, authInSz); #endif +#if defined(WOLFSSL_STM32_BARE) && defined(STM32_CRYPTO) + ret = wc_Stm32_Aes_Gcm(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, + authIn, authInSz, 1 /* enc */); + if (ret != WC_NO_ERR_TRACE(CRYPTOCB_UNAVAILABLE)) + return ret; + /* fall through to SW GCM (still uses HW AES via wc_AesEncrypt) */ +#endif /* WOLFSSL_STM32_BARE && STM32_CRYPTO */ + #ifdef STM32_CRYPTO_AES_GCM return wc_AesGcmEncrypt_STM32( aes, out, in, sz, iv, ivSz, @@ -10871,6 +10922,10 @@ int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, #endif + /* BARE: GCM decrypt always uses SW path (with HW AES blocks via + * wc_AesEncrypt). Encrypt is HW-accelerated above; decrypt + tag + * verification stays in well-tested SW for now. */ + #ifdef STM32_CRYPTO_AES_GCM /* The STM standard peripheral library API's doesn't support partial blocks */ return wc_AesGcmDecrypt_STM32( @@ -13695,7 +13750,7 @@ int wc_AesInit(Aes* aes, void* heap, int devId) aes->heap = heap; -#if defined(WOLF_CRYPTO_CB) || defined(WOLFSSL_STM32U5_DHUK) +#if defined(WOLF_CRYPTO_CB) || defined(WOLFSSL_DHUK) aes->devId = devId; aes->devCtx = NULL; #else diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 8cdfd6f8066..a2625698469 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -286,8 +286,12 @@ ECC Curve Sizes: #if !defined(WOLFSSL_ATECC508A) && !defined(WOLFSSL_ATECC608A) && \ !defined(WOLFSSL_CRYPTOCELL) && !defined(WOLFSSL_SILABS_SE_ACCEL) && \ !defined(WOLFSSL_KCAPI_ECC) && !defined(WOLFSSL_SE050) && \ - !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && !defined(WOLFSSL_STM32_PKA) && \ + !defined(WOLFSSL_XILINX_CRYPT_VERSAL) && \ + !(defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE)) && \ !defined(WOLFSSL_PSOC6_CRYPTO) + /* WOLFSSL_STM32_BARE+PKA still uses the SW ECDSA helper paths + * (sign/verify) since the bare-metal driver only implements ECCMul + * HW; the SP-less SW ECDSA fallback then drives that HW. */ #undef HAVE_ECC_VERIFY_HELPER #define HAVE_ECC_VERIFY_HELPER #endif @@ -6947,7 +6951,12 @@ static int deterministic_sign_helper(const byte* in, word32 inlen, ecc_key* key) #endif /* WOLFSSL_ECDSA_DETERMINISTIC_K || WOLFSSL_ECDSA_DETERMINISTIC_K_VARIANT */ -#if defined(WOLFSSL_STM32_PKA) +/* Under WOLFSSL_STM32_BARE the bare-metal PKA driver implements only + * ECCMul HW (the building block used by ECDH and the SP-less SW ECDSA + * path). HW ECDSA sign/verify is intentionally not wired up in v1 of + * the bare driver -- fall back to the standard SW ECDSA which itself + * calls wc_ecc_mulmod_ex2() (HW-accelerated). */ +#if defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE) int wc_ecc_sign_hash_ex(const byte* in, word32 inlen, WC_RNG* rng, ecc_key* key, mp_int *r, mp_int *s) { @@ -8763,7 +8772,8 @@ int wc_ecc_verify_hash(const byte* sig, word32 siglen, const byte* hash, #ifndef WOLF_CRYPTO_CB_ONLY_ECC -#if !defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_PSOC6_CRYPTO) && \ +#if !(defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE)) && \ + !defined(WOLFSSL_PSOC6_CRYPTO) && \ !defined(WOLF_CRYPTO_CB_ONLY_ECC) static int wc_ecc_check_r_s_range(ecc_key* key, mp_int* r, mp_int* s) { @@ -9279,7 +9289,10 @@ static int ecc_verify_hash(mp_int *r, mp_int *s, const byte* hash, int wc_ecc_verify_hash_ex(mp_int *r, mp_int *s, const byte* hash, word32 hashlen, int* res, ecc_key* key) { -#if defined(WOLFSSL_STM32_PKA) +#if defined(WOLFSSL_STM32_PKA) && !defined(WOLFSSL_STM32_BARE) + /* See comment above wc_ecc_sign_hash_ex(): BARE uses SW ECDSA + * verify which internally accelerates the scalar muls via the + * bare-metal HW wc_ecc_mulmod_ex2(). */ return stm32_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); #elif defined(WOLFSSL_PSOC6_CRYPTO) return psoc6_ecc_verify_hash_ex(r, s, hash, hashlen, res, key); diff --git a/wolfcrypt/src/port/st/stm32.c b/wolfcrypt/src/port/st/stm32.c index 644b85634f7..6547735ba48 100644 --- a/wolfcrypt/src/port/st/stm32.c +++ b/wolfcrypt/src/port/st/stm32.c @@ -46,6 +46,13 @@ #ifdef WOLFSSL_STM32_PKA #include +#ifdef WOLFSSL_STM32_BARE +/* Bare-metal: CMSIS device header is pulled in by settings.h. The + * PKA_HandleTypeDef and the PKA_ECC / PKA_ECDSA IO typedefs are + * provided by above. The HAL_PKA_* + * entry points are implemented further down in this file under the + * matching guard. */ +#else #if defined(WOLFSSL_STM32L5) #include #include @@ -76,7 +83,101 @@ #else #error Please add the hal_pk.h include #endif +#endif /* !WOLFSSL_STM32_BARE */ + +#if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_PKA) + +#include + +/* Bare-metal stand-ins for the slice of HAL surface that wc_ecc_*() and + * the local HAL_PKA_* shims reference. Kept private to this translation + * unit so they don't collide with ST HAL headers in projects that include + * those for non-crypto code. */ +typedef enum { + HAL_OK = 0x00U, + HAL_ERROR = 0x01U, + HAL_BUSY = 0x02U, + HAL_TIMEOUT = 0x03U +} HAL_StatusTypeDef; + +#ifndef HAL_MAX_DELAY +#define HAL_MAX_DELAY 0xFFFFFFFFU +#endif + +typedef struct { + PKA_TypeDef *Instance; +} PKA_HandleTypeDef; + +typedef struct { + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coefA; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *primeOrder; /* V2 only */ + uint32_t scalarMulSize; + const uint8_t *scalarMul; + const uint8_t *pointX; + const uint8_t *pointY; +} PKA_ECCMulInTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECCMulOutTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *pPubKeyCurvePtX; + const uint8_t *pPubKeyCurvePtY; + const uint8_t *RSign; + const uint8_t *SSign; + const uint8_t *hash; +} PKA_ECDSAVerifInTypeDef; + +typedef struct { + uint32_t primeOrderSize; + uint32_t modulusSize; + uint32_t coefSign; + const uint8_t *coef; + const uint8_t *coefB; /* V2 only */ + const uint8_t *modulus; + const uint8_t *basePointX; + const uint8_t *basePointY; + const uint8_t *primeOrder; + const uint8_t *hash; + const uint8_t *integer; + const uint8_t *privateKey; +} PKA_ECDSASignInTypeDef; + +typedef struct { + uint8_t *RSign; + uint8_t *SSign; +} PKA_ECDSASignOutTypeDef; + +typedef struct { + uint8_t *ptX; + uint8_t *ptY; +} PKA_ECDSASignOutExtParamTypeDef; + +#endif /* WOLFSSL_STM32_BARE && WOLFSSL_STM32_PKA */ + +#ifdef WOLFSSL_STM32_BARE +/* Provide the global PKA handle that the wc_ecc_mulmod_ex2() and + * stm32_ecc_*_hash_ex() paths reference via &hpka. Under HAL builds, + * the application supplies this; under BARE we own it (file-local). */ +static PKA_HandleTypeDef hpka = { 0 }; +#else extern PKA_HandleTypeDef hpka; +#endif #if !defined(WOLFSSL_STM32_PKA_V2) && defined(PKA_ECC_SCALAR_MUL_IN_B_COEFF) /* PKA hardware like in U5 added coefB and primeOrder */ @@ -92,6 +193,508 @@ extern PKA_HandleTypeDef hpka; #define WOLFSSL_HAVE_ECC_KEY_GET_PRIV #endif #endif /* HAVE_ECC */ + +/* ------------------------------------------------------------------------ + * Bare-metal HAL_PKA_* shims + * + * These provide just the slice of the ST HAL surface that the existing + * wolfssl PKA path (below) calls into. Direct register access only; no + * HAL/StdPeriph dependency. Reference: STM32WBxx_HAL_Driver/Src/ + * stm32wbxx_hal_pka.c (PKA_ECCMul_Set, HAL_PKA_ECCMul, PKA_Process, + * HAL_PKA_ECCMul_GetResult, etc). + * + * Layout-wise this matches the V1 PKA (WB55, WL, MP13). For V2 PKA (H5, + * U5 with PKA, WBA), the additional input slots (coefB, primeOrder, + * pointCheck) live at different word offsets but the start sequence and + * the SR/CLRFR bit names are identical, so the same code path applies + * once WOLFSSL_STM32_PKA_V2 is set (auto-detected via the device-header + * macro PKA_ECC_SCALAR_MUL_IN_B_COEFF below). + * --------------------------------------------------------------------- */ +#ifdef WOLFSSL_STM32_BARE + +/* PKA RAM occupies addresses PKA_BASE+0x400 .. PKA_BASE+0x11F4 on V1 and + * a slightly larger window on V2. The CMSIS device header sizes the + * RAM[] array correctly for the part. */ +#ifndef PKA_RAM_PARAM_END +/* HAL writes one zero word past the last valid byte (microcode reads + * the parameter until it hits a zero word). */ +#define PKA_RAM_PARAM_END(RAM, IDX) \ + do { (RAM)[(IDX)] = 0UL; } while (0) +#endif + +/* Mode encoding constants (from stm32wbxx_hal_pka.h and equivalent). + * Same numeric values across V1 and V2. */ +#ifndef PKA_MODE_ECC_MUL +#define PKA_MODE_ECC_MUL (0x00000020U) +#endif +#ifndef PKA_MODE_ECDSA_VERIFICATION +#define PKA_MODE_ECDSA_VERIFICATION (0x00000026U) +#endif +#ifndef PKA_MODE_ECDSA_SIGNATURE +#define PKA_MODE_ECDSA_SIGNATURE (0x00000024U) +#endif + +/* Number of word slots in the PKA RAM array (per the CMSIS device + * header; e.g. 894 on WB55 V1). */ +#define WC_STM32_PKA_RAM_WORDS \ + (sizeof(((PKA_TypeDef*)0)->RAM) / sizeof(((PKA_TypeDef*)0)->RAM[0])) + +/* Big-endian byte buffer -> PKA RAM (little-endian word order). The + * destination is the PKA RAM slot indexed by 'word_idx'; n is the byte + * count of the source. Mirrors PKA_Memcpy_u8_to_u32 in the HAL. */ +static void wc_stm32_pka_load_be(volatile uint32_t* dst, const uint8_t* src, + uint32_t n) +{ + uint32_t index = 0; + if (dst == NULL || src == NULL) return; + + for (; index < (n / 4U); index++) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16) | + ((uint32_t)src[(n - (index * 4U) - 4U)] << 24); + } + if ((n % 4U) == 1U) { + dst[index] = (uint32_t)src[(n - (index * 4U) - 1U)]; + } + else if ((n % 4U) == 2U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8); + } + else if ((n % 4U) == 3U) { + dst[index] = + ((uint32_t)src[(n - (index * 4U) - 1U)]) | + ((uint32_t)src[(n - (index * 4U) - 2U)] << 8) | + ((uint32_t)src[(n - (index * 4U) - 3U)] << 16); + } +} + +/* PKA RAM (little-endian word order) -> big-endian byte buffer. */ +static void wc_stm32_pka_read_be(uint8_t* dst, volatile const uint32_t* src, + uint32_t n) +{ + uint32_t i = 0; + if (dst == NULL || src == NULL) return; + + for (; i < (n / 4U); i++) { + uint32_t off = n - 4U - (i * 4U); + dst[off + 3U] = (uint8_t)((src[i] ) & 0xFFU); + dst[off + 2U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[off + 1U] = (uint8_t)((src[i] >> 16) & 0xFFU); + dst[off + 0U] = (uint8_t)((src[i] >> 24) & 0xFFU); + } + if ((n % 4U) == 1U) { + dst[0U] = (uint8_t)(src[i] & 0xFFU); + } + else if ((n % 4U) == 2U) { + dst[1U] = (uint8_t)((src[i] ) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 8) & 0xFFU); + } + else if ((n % 4U) == 3U) { + dst[2U] = (uint8_t)((src[i] ) & 0xFFU); + dst[1U] = (uint8_t)((src[i] >> 8) & 0xFFU); + dst[0U] = (uint8_t)((src[i] >> 16) & 0xFFU); + } +} + +/* Optimal bit-size: bytes * 8 minus the leading-zero count of the MSB + * (matches PKA_GetOptBitSize_u8 in the HAL). */ +static uint32_t wc_stm32_pka_optbits(uint32_t byteNumber, uint8_t msb) +{ + uint32_t pos = 0; + uint32_t v = msb; + while (v != 0U) { + v >>= 1; + pos++; + } + if (byteNumber == 0U) { + return 0U; + } + return ((byteNumber - 1U) * 8U) + pos; +} + +static HAL_StatusTypeDef HAL_PKA_Init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) { + return HAL_ERROR; + } + if (hpkah->Instance == NULL) { + hpkah->Instance = PKA; + } + +#ifdef WC_STM32_PKA_CLK_ENABLE + WC_STM32_PKA_CLK_ENABLE(); +#endif + + /* Reset CR, enable the PKA, clear any pending flags. */ + hpkah->Instance->CR = PKA_CR_EN; + hpkah->Instance->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_OK; +} + +/* Lazy one-shot init helper. Safe to call from every entry point. */ +static void wc_stm32_pka_ensure_init(PKA_HandleTypeDef *hpkah) +{ + if (hpkah == NULL) return; + if (hpkah->Instance == NULL) { + (void)HAL_PKA_Init(hpkah); + } +} + +static void HAL_PKA_RAMReset(PKA_HandleTypeDef *hpkah) +{ + uint32_t i; + if (hpkah == NULL || hpkah->Instance == NULL) return; + for (i = 0; i < WC_STM32_PKA_RAM_WORDS; i++) { + hpkah->Instance->RAM[i] = 0UL; + } +} + +/* Generic start-and-poll sequence with bounded timeout. The default + * spin budget covers a P-521 scalar mul on a slow PKA (worst case on + * the parts wolfSSL targets is ~2 sec; the budget here is well above + * that). Override at compile time via WC_STM32_PKA_TIMEOUT_LOOPS. */ +#ifndef WC_STM32_PKA_TIMEOUT_LOOPS +#define WC_STM32_PKA_TIMEOUT_LOOPS 0x10000000U +#endif + +static HAL_StatusTypeDef wc_stm32_pka_process(PKA_HandleTypeDef *hpkah, + uint32_t mode) +{ + PKA_TypeDef *p; + uint32_t cr, t; + + if (hpkah == NULL || hpkah->Instance == NULL) { + return HAL_ERROR; + } + p = hpkah->Instance; + + /* PKA must be enabled before MODE/START are written. */ + if ((p->CR & PKA_CR_EN) == 0U) { + p->CR = PKA_CR_EN; + } + + /* Update the mode field in CR; clear interrupt enables. */ + cr = p->CR; + cr &= ~(PKA_CR_MODE | PKA_CR_PROCENDIE | PKA_CR_RAMERRIE | PKA_CR_ADDRERRIE); + cr |= (mode << PKA_CR_MODE_Pos) & PKA_CR_MODE; + p->CR = cr; + + /* Start the operation. */ + p->CR = cr | PKA_CR_START; + + /* Wait for end-of-operation flag, OR an error flag, OR timeout. */ + t = 0; + while ((p->SR & PKA_SR_PROCENDF) == 0U) { + if ((p->SR & (PKA_SR_RAMERRF | PKA_SR_ADDRERRF)) != 0U) { + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_ERROR; + } + if (++t >= WC_STM32_PKA_TIMEOUT_LOOPS) { + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | + PKA_CLRFR_ADDRERRFC; + return HAL_TIMEOUT; + } + } + + /* Clear all status flags. */ + p->CLRFR = PKA_CLRFR_PROCENDFC | PKA_CLRFR_RAMERRFC | PKA_CLRFR_ADDRERRFC; + + return HAL_OK; +} + +static HAL_StatusTypeDef HAL_PKA_ECCMul(PKA_HandleTypeDef *hpkah, + PKA_ECCMulInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + /* Scalar 'k' bit length, modulus bit length, and 'a' coefficient + * sign indicator -- exactly as the HAL writes them. */ + RAM[PKA_ECC_SCALAR_MUL_IN_EXP_NB_BITS] = + wc_stm32_pka_optbits(in->scalarMulSize, *(in->scalarMul)); + RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF_SIGN] = in->coefSign; + + /* |a|, modulus p, scalar k, base point (X,Y). */ + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_A_COEFF], + in->coefA, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_K], + in->scalarMul, in->scalarMulSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_K + ((in->scalarMulSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X], + in->pointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_X + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y], + in->pointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_INITIAL_POINT_Y + + ((in->modulusSize + 3U) / 4U)); + +#ifdef WOLFSSL_STM32_PKA_V2 + /* V2 PKA also requires the curve order n and 'b' coefficient. */ + if (in->coefB != NULL) { + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_B_COEFF], + in->coefB, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_B_COEFF + + ((in->modulusSize + 3U) / 4U)); + } + if (in->primeOrder != NULL) { + wc_stm32_pka_load_be(&RAM[PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER], + in->primeOrder, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECC_SCALAR_MUL_IN_N_PRIME_ORDER + + ((in->modulusSize + 3U) / 4U)); + } +#endif /* WOLFSSL_STM32_PKA_V2 */ + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECC_MUL); +} + +static void HAL_PKA_ECCMul_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECCMulOutTypeDef *out) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL || out == NULL) return; + RAM = hpkah->Instance->RAM; + + /* The HAL recomputes the byte size from the saved IN_OP_NB_BITS + * slot. We do the same. */ + size = (RAM[PKA_ECC_SCALAR_MUL_IN_OP_NB_BITS] + 7U) / 8U; + + if (out->ptX != NULL) { + wc_stm32_pka_read_be(out->ptX, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_X], size); + } + if (out->ptY != NULL) { + wc_stm32_pka_read_be(out->ptY, + &RAM[PKA_ECC_SCALAR_MUL_OUT_RESULT_Y], size); + } +} + +static HAL_StatusTypeDef HAL_PKA_ECDSAVerif(PKA_HandleTypeDef *hpkah, + PKA_ECDSAVerifInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + RAM[PKA_ECDSA_VERIF_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_VERIF_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_VERIF_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_A_COEFF], + in->coef, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_INITIAL_POINT_X], + in->basePointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_INITIAL_POINT_X + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y], + in->basePointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_INITIAL_POINT_Y + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X], + in->pPubKeyCurvePtX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_X + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y], + in->pPubKeyCurvePtY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_PUBLIC_KEY_POINT_Y + + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_SIGNATURE_R], + in->RSign, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_SIGNATURE_R + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_SIGNATURE_S], + in->SSign, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_SIGNATURE_S + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_HASH_E], + in->hash, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_HASH_E + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_VERIF_IN_ORDER_N], + in->primeOrder, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_VERIF_IN_ORDER_N + ((in->primeOrderSize + 3U) / 4U)); + + return wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_VERIFICATION); +} + +static uint32_t HAL_PKA_ECDSAVerif_IsValidSignature(PKA_HandleTypeDef const *const hpkah) +{ + if (hpkah == NULL || hpkah->Instance == NULL) return 0U; + /* HAL semantic: PKA_ECDSA_VERIF_OUT_RESULT == 0 means valid. */ + return (hpkah->Instance->RAM[PKA_ECDSA_VERIF_OUT_RESULT] == 0UL) ? 1U : 0U; +} + +static HAL_StatusTypeDef HAL_PKA_ECDSASign(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignInTypeDef *in, uint32_t Timeout) +{ + volatile uint32_t *RAM; + HAL_StatusTypeDef st; + + (void)Timeout; + if (hpkah == NULL || in == NULL) { + return HAL_ERROR; + } + wc_stm32_pka_ensure_init(hpkah); + if (hpkah->Instance == NULL) { + return HAL_ERROR; + } + RAM = hpkah->Instance->RAM; + + RAM[PKA_ECDSA_SIGN_IN_ORDER_NB_BITS] = + wc_stm32_pka_optbits(in->primeOrderSize, *(in->primeOrder)); + RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] = + wc_stm32_pka_optbits(in->modulusSize, *(in->modulus)); + RAM[PKA_ECDSA_SIGN_IN_A_COEFF_SIGN] = in->coefSign; + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_A_COEFF], + in->coef, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_A_COEFF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_MOD_GF], + in->modulus, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_MOD_GF + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_K], + in->integer, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_K + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_INITIAL_POINT_X], + in->basePointX, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_INITIAL_POINT_X + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y], + in->basePointY, in->modulusSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_INITIAL_POINT_Y + ((in->modulusSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_HASH_E], + in->hash, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_HASH_E + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D], + in->privateKey, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_PRIVATE_KEY_D + ((in->primeOrderSize + 3U) / 4U)); + + wc_stm32_pka_load_be(&RAM[PKA_ECDSA_SIGN_IN_ORDER_N], + in->primeOrder, in->primeOrderSize); + PKA_RAM_PARAM_END(RAM, + PKA_ECDSA_SIGN_IN_ORDER_N + ((in->primeOrderSize + 3U) / 4U)); + + st = wc_stm32_pka_process(hpkah, PKA_MODE_ECDSA_SIGNATURE); + if (st != HAL_OK) { + return st; + } + /* Sign reports failure via PKA_ECDSA_SIGN_OUT_ERROR != 0 (e.g. when + * the random k is unsuitable). The caller is expected to retry with + * a fresh k. */ + if (RAM[PKA_ECDSA_SIGN_OUT_ERROR] != 0UL) { + return HAL_ERROR; + } + return HAL_OK; +} + +static void HAL_PKA_ECDSASign_GetResult(PKA_HandleTypeDef *hpkah, + PKA_ECDSASignOutTypeDef *out, + PKA_ECDSASignOutExtParamTypeDef *outExt) +{ + uint32_t size; + volatile const uint32_t *RAM; + + if (hpkah == NULL || hpkah->Instance == NULL) return; + RAM = hpkah->Instance->RAM; + size = (RAM[PKA_ECDSA_SIGN_IN_MOD_NB_BITS] + 7U) / 8U; + + if (out != NULL) { + if (out->RSign != NULL) { + wc_stm32_pka_read_be(out->RSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_R], size); + } + if (out->SSign != NULL) { + wc_stm32_pka_read_be(out->SSign, + &RAM[PKA_ECDSA_SIGN_OUT_SIGNATURE_S], size); + } + } + if (outExt != NULL) { + if (outExt->ptX != NULL) { + wc_stm32_pka_read_be(outExt->ptX, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_X], size); + } + if (outExt->ptY != NULL) { + wc_stm32_pka_read_be(outExt->ptY, + &RAM[PKA_ECDSA_SIGN_OUT_FINAL_POINT_Y], size); + } + } +} + +#endif /* WOLFSSL_STM32_BARE */ + #endif /* WOLFSSL_STM32_PKA */ @@ -99,11 +702,17 @@ extern PKA_HandleTypeDef hpka; /* #define DEBUG_STM32_HASH */ +#if defined(WOLFSSL_STM32_BARE) && !defined(WC_STM32_HASH_CLK_ENABLE) + #error "WOLFSSL_STM32_BARE: HASH clock-enable not mapped for this STM32 family. Add WC_STM32_HASH_CLK_ENABLE() to wolfssl/wolfcrypt/port/st/stm32.h, or define NO_STM32_HASH." +#endif + /* User can override STM32_HASH_CLOCK_ENABLE and STM32_HASH_CLOCK_DISABLE */ #ifndef STM32_HASH_CLOCK_ENABLE static WC_INLINE void wc_Stm32_Hash_Clock_Enable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_ENABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_ENABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, ENABLE); @@ -116,7 +725,9 @@ extern PKA_HandleTypeDef hpka; #ifndef STM32_HASH_CLOCK_DISABLE static WC_INLINE void wc_Stm32_Hash_Clock_Disable(STM32_HASH_Context* stmCtx) { - #ifdef WOLFSSL_STM32_CUBEMX + #if defined(WOLFSSL_STM32_BARE) + WC_STM32_HASH_CLK_DISABLE(); + #elif defined(WOLFSSL_STM32_CUBEMX) __HAL_RCC_HASH_CLK_DISABLE(); #else RCC_AHB2PeriphClockCmd(RCC_AHB2Periph_HASH, DISABLE); @@ -224,9 +835,16 @@ static void wc_Stm32_Hash_GetDigest(byte* hash, int digestSize) sz = digestSize; while (sz > 0) { - /* first 20 bytes come from instance HR */ + /* first 20 bytes come from the instance digest registers. The CMSIS + * device header for the H5 family renames this from HR[5] to HRA[5] + * (extended HASH IP that adds a separate HASH_DIGEST->HR[16] for the + * full digest); the older F4/F7/L4 layout still uses HR[5]. */ if (i < 5) { + #if defined(WOLFSSL_STM32H5) + digest[i] = HASH->HRA[i]; + #else digest[i] = HASH->HR[i]; + #endif } #ifdef HASH_DIGEST /* reset comes from HASH_DIGEST */ @@ -643,9 +1261,860 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #ifdef STM32_CRYPTO #ifndef NO_AES -#ifdef WOLFSSL_STM32_CUBEMX +#ifdef WOLFSSL_STM32_BARE + +#if !defined(WC_STM32_AES_CLK_ENABLE) + #error "WOLFSSL_STM32_BARE: AES clock-enable not mapped for this STM32 family. Add WC_STM32_AES_CLK_ENABLE() to wolfssl/wolfcrypt/port/st/stm32.h, or define NO_STM32_CRYPTO." +#endif + +/* ===== Bare-metal direct-register AES driver ===== + * No HAL or StdPeriph. Two IP variants: + * - CRYP (FIFO-based): F2/F4/F7/H7/MP13 + * - AES/SAES (TinyAES): L4/L5/U5/H573/G0/G4/WB/WL/WBA + * Variant selected via family ifdefs below. */ + +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32H7) || \ + defined(WOLFSSL_STM32MP13) +/* ----- CRYP IP (FIFO-based) ----- */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* DATATYPE = 10b (byte) so CRYP byte-swaps DR/DOUT for us; key/IV regs are + * still big-endian. Key arrives pre-reversed via wc_AesSetKey (aes.c:4161); + * IV is byte-reversed locally before write. */ +#define STM32_CRYP_DATATYPE_BYTE CRYP_CR_DATATYPE_1 + +static int Stm32AesWaitBusy(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_BUSY) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitInNotFull(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_IFNF) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static int Stm32AesWaitOutNotEmpty(void) +{ + int t = 0; + while ((CRYP->SR & CRYP_SR_OFNE) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 24) { + return CRYP_CR_KEYSIZE_0; /* 192-bit */ + } + if (keyLen == 32) { + return CRYP_CR_KEYSIZE_1; /* 256-bit */ + } + return 0; /* 128-bit */ +} + +/* aes->key is pre-byte-reversed by wc_AesSetKey under BARE (aes.c:4161), + * so the key words go straight into the K registers in big-endian form. */ +static void Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + CRYP->K2LR = key[0]; CRYP->K2RR = key[1]; + CRYP->K3LR = key[2]; CRYP->K3RR = key[3]; + } + else if (keyLen == 24) { + CRYP->K1LR = key[0]; CRYP->K1RR = key[1]; + CRYP->K2LR = key[2]; CRYP->K2RR = key[3]; + CRYP->K3LR = key[4]; CRYP->K3RR = key[5]; + } + else { /* 32 */ + CRYP->K0LR = key[0]; CRYP->K0RR = key[1]; + CRYP->K1LR = key[2]; CRYP->K1RR = key[3]; + CRYP->K2LR = key[4]; CRYP->K2RR = key[5]; + CRYP->K3LR = key[6]; CRYP->K3RR = key[7]; + } +} + +/* aes->reg (IV) is NOT pre-reversed by wc_AesSetIV, so byte-reverse here so + * the IV registers see big-endian words. */ +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + CRYP->IV0LR = v[0]; CRYP->IV0RR = v[1]; + CRYP->IV1LR = v[2]; CRYP->IV1RR = v[3]; +} + +/* Push 4 input words then drain 4 output words. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + /* Local word-aligned copy so callers may pass byte-aligned ptrs. */ + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + return ret; + } + buf[i] = CRYP->DOUT; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + return 0; +} + +/* CBC/ECB decrypt requires a key-prep pass first (per F4/H7 reference manual: + * load key, run ALGOMODE=AES_KEY, wait BUSY=0, then start the actual op). */ +static int Stm32AesPrepareKey(word32 keyLen) +{ + int ret; + + CRYP->CR = CRYP_CR_ALGOMODE_AES_KEY | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR |= CRYP_CR_CRYPEN; + ret = Stm32AesWaitBusy(); + CRYP->CR &= ~CRYP_CR_CRYPEN; + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + + cr = CRYP_CR_ALGOMODE_AES_ECB | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + WC_STM32_AES_CLK_ENABLE(); + + Stm32AesLoadKey(aes->key, keyLen); + if (!isEnc) { + ret = Stm32AesPrepareKey(keyLen); + if (ret != 0) { + goto exit; + } + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + cr = CRYP_CR_ALGOMODE_AES_CBC | + STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + if (!isEnc) { + cr |= CRYP_CR_ALGODIR; + } + CRYP->CR = cr; + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + + /* For in-place decrypt (out == in) the block loop overwrites the + * source ciphertext, so the next-IV ciphertext block has to be + * captured before processing starts. */ + if (!isEnc) { + XMEMCPY(aes->tmp, in + sz - WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + } + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + /* Update aes->reg with new IV (last cipher block for enc; saved + * pre-loop ciphertext for dec). aes.c CBC dispatcher expects + * aes->reg updated for the next call. */ + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, aes->tmp, WC_AES_BLOCK_SIZE); + } + } + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* CTR: handled via the ECB-as-transform path in aes.c (XTRANSFORM_AESCTRBLOCK). + * Each per-block ECB call comes through wc_Stm32_Aes_Ecb above; aes.c manages + * the counter and the XOR with plaintext. */ + +/* === HW GCM (CRYP IP phase machine) =========================================== + * Native HW GCM for the case the CRYP IP supports directly: + * - IV is 96 bits (12 bytes) -- the standard GCM IV + * - AAD and PT lengths are whole 16-byte blocks (no partial last block) + * Returns CRYPTOCB_UNAVAILABLE for unsupported parameter combos, so the + * caller (aes.c BARE GCM dispatcher) falls back to SW GHASH + HW ECB. */ +static int Stm32AesXferDiscardOut(const byte* in) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitInNotFull(); + if (ret != 0) { + return ret; + } + CRYP->DIN = buf[i]; + } + return Stm32AesWaitBusy(); +} + +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + int ret, t; + word32 keyLen, b, blocks, i; + word32 cr_base; + word32 ivBuf[4]; + word32 hwTag[4]; + word64 aadBits, ptBits; + word32 aadBitsHi, aadBitsLo, ptBitsHi, ptBitsLo; + + /* Argument and capability check */ + if (aes == NULL || iv == NULL || tag == NULL) { + return BAD_FUNC_ARG; + } + if (sz > 0 && (in == NULL || out == NULL)) { + return BAD_FUNC_ARG; + } + /* HW only supports 12-byte IV (J0 = IV || 0x00000001 form) */ + if (ivSz != GCM_NONCE_MID_SZ) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (ivSz=%u not 12)\n", ivSz); + #endif + return CRYPTOCB_UNAVAILABLE; + } + /* CRYP IP v1 (F4) cannot natively handle partial last blocks. Force + * SW fallback for those cases. AAD partial is handled by HW (we pad + * with zeros and the GHASH is correct because GHASH only uses bitlen). + * PT partial would produce wrong CT bytes -- bail out to SW. */ + if (sz % WC_AES_BLOCK_SIZE != 0) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> SW (sz=%u not whole-block)\n", sz); + #endif + return CRYPTOCB_UNAVAILABLE; + } +#ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE GCM] -> HW (sz=%u aadSz=%u)\n", sz, aadSz); +#endif + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE(); + + /* Set CR (ALGOMODE=AES-GCM, DATATYPE, KEYSIZE, phase=init) BEFORE + * loading key/IV. H7 reference HAL sets ALGOMODE first, then writes + * the K and IV registers. Doing it in the other order on H7 produces + * a wrong tag even though CT comes out right. */ + cr_base = CRYP_CR_ALGOMODE_AES_GCM | STM32_CRYP_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen); + CRYP->CR = cr_base | (0u << CRYP_CR_GCM_CCMPH_Pos); + + /* Load key (already pre-reversed by wc_AesSetKey under BARE). */ + Stm32AesLoadKey(aes->key, keyLen); + + /* Build the IV register value. CRYP expects the 12-byte IV concatenated + * with the initial 32-bit counter = 0x00000002 (HW pre-increments to 2 + * for the first payload block; at GCMPH=00 init phase HW sets up J0). */ + XMEMSET(ivBuf, 0, 16); + XMEMCPY(ivBuf, iv, 12); + ((byte*)ivBuf)[15] = 0x02; + ByteReverseWords(ivBuf, ivBuf, 16); + CRYP->IV0LR = ivBuf[0]; CRYP->IV0RR = ivBuf[1]; + CRYP->IV1LR = ivBuf[2]; CRYP->IV1RR = ivBuf[3]; + + /* === Phase 1: Init (GCMPH=00) === + * Enable CRYP and wait for CRYPEN to auto-clear -- this is the H7- + * documented mechanism for end-of-init-phase. F4 also auto-clears + * CRYPEN after init phase, so the same wait works on both IPs. */ + CRYP->CR |= CRYP_CR_FFLUSH; + CRYP->CR |= CRYP_CR_CRYPEN; + t = 0; + while ((CRYP->CR & CRYP_CR_CRYPEN) != 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + ret = WC_TIMEOUT_E; + goto exit; + } + } + + /* === Phase 2: Header / AAD (GCMPH=01) === + * Whole AAD blocks: feed via DIN, no DOUT to read. Partial last AAD + * block: pad with zeros (AES HW absorbs them; GHASH math uses aadSz + * bits in the final phase to truncate). */ + if (aadSz > 0) { + word32 aadBlocks = aadSz / WC_AES_BLOCK_SIZE; + word32 aadPartial = aadSz % WC_AES_BLOCK_SIZE; + + CRYP->CR = cr_base | (1u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < aadBlocks; b++) { + ret = Stm32AesXferDiscardOut(aad + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + goto exit; + } + } + if (aadPartial > 0) { + byte pad[WC_AES_BLOCK_SIZE]; + XMEMSET(pad, 0, sizeof(pad)); + XMEMCPY(pad, aad + aadBlocks * WC_AES_BLOCK_SIZE, aadPartial); + ret = Stm32AesXferDiscardOut(pad); + if (ret != 0) { + goto exit; + } + } + ret = Stm32AesWaitBusy(); + if (ret != 0) { + goto exit; + } + CRYP->CR &= ~CRYP_CR_CRYPEN; + } + + /* === Phase 3: Payload (GCMPH=10) === */ + if (sz > 0) { + blocks = sz / WC_AES_BLOCK_SIZE; + CRYP->CR = cr_base | (2u << CRYP_CR_GCM_CCMPH_Pos); + if (!isEnc) { + CRYP->CR |= CRYP_CR_ALGODIR; + } + CRYP->CR |= CRYP_CR_CRYPEN; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + goto exit; + } + } + ret = Stm32AesWaitBusy(); + if (ret != 0) { + goto exit; + } + CRYP->CR &= ~CRYP_CR_CRYPEN; + } + + /* === Phase 4: Final (GCMPH=11) === + * Feed 64-bit AAD-bit-len then 64-bit PT-bit-len, then read 4 DOUT + * words for the tag. + * + * H7 rev.B+ / MP13 (CRYP_VER_2_2): DIN final-phase writes are taken + * "normally" (HW does the DATATYPE swap). Write plain uint32s. + * + * F2/F4/F7 (older CRYP IP, behaves like H7 rev.A): DATATYPE swap + * does NOT apply to the final-phase length block; SW must pre-swap + * via __REV (byte-reverse the 32-bit value). The two HAL families + * disagree on this and so do their reference drivers -- match each. + */ + aadBits = (word64)aadSz * 8u; + ptBits = (word64)sz * 8u; + aadBitsHi = (word32)(aadBits >> 32); + aadBitsLo = (word32)aadBits; + ptBitsHi = (word32)(ptBits >> 32); + ptBitsLo = (word32)ptBits; +#if defined(WOLFSSL_STM32F2) || defined(WOLFSSL_STM32F4) || \ + defined(WOLFSSL_STM32F7) + aadBitsHi = __REV(aadBitsHi); + aadBitsLo = __REV(aadBitsLo); + ptBitsHi = __REV(ptBitsHi); + ptBitsLo = __REV(ptBitsLo); +#endif + + CRYP->CR = cr_base | (3u << CRYP_CR_GCM_CCMPH_Pos); + CRYP->CR |= CRYP_CR_CRYPEN; + + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = aadBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = aadBitsLo; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = ptBitsHi; + ret = Stm32AesWaitInNotFull(); if (ret != 0) goto exit; + CRYP->DIN = ptBitsLo; + + for (i = 0; i < 4; i++) { + ret = Stm32AesWaitOutNotEmpty(); + if (ret != 0) { + goto exit; + } + hwTag[i] = CRYP->DOUT; + } + XMEMCPY(tag, hwTag, tagSz < 16 ? tagSz : 16); + +exit: + CRYP->CR &= ~CRYP_CR_CRYPEN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +#else /* TinyAES IP (L4/L5/U5/H5/H573/G0/G4/WB/WL/WBA) */ + +/* ----- TinyAES IP (single-register, polled) ----- + * Different from CRYP: no FIFO; one DINR / DOUTR pair processed per + * 16-byte block. KEYRx are written in *reversed* word order + * (KEYR3 = MSB key word for 128-bit; KEYR7 = MSB for 256-bit). + * AES-192 not supported by hardware (only 128 and 256). */ + +#ifndef STM32_BARE_AES_TIMEOUT + #define STM32_BARE_AES_TIMEOUT 0x10000 +#endif + +/* CCF (computation-complete flag) clear: newer TinyAES IPs (U3/U5/L4/L5/H5 + * etc.) have a dedicated AES_ICR register with a CCF bit. Older TinyAES IPs + * (WB/WL/G0) don't -- CCF is cleared by writing 1 to AES_CR.CCFC. The + * trailing __DMB() ensures the clear write retires before the next config + * write or DINR push -- without it the C5 TinyAES IP at PLL clock has been + * observed to race the next CCF poll against an in-flight clear. */ +#if defined(AES_ICR_CCF) + #define STM32_AES_CLEAR_CCF() do { WC_STM32_AES_INST->ICR = AES_ICR_CCF; __DMB(); } while (0) +#elif defined(AES_CR_CCFC) + #define STM32_AES_CLEAR_CCF() do { WC_STM32_AES_INST->CR |= AES_CR_CCFC; __DMB(); } while (0) +#else + #error "STM32 AES IP variant: no CCF-clear mechanism known" +#endif + +/* CCF wait: newest TinyAES IPs (C5 family) split status into an interrupt + * status register AES_ISR (with AES_ISR_CCF). Older TinyAES variants put + * CCF in AES_SR. Pick whichever the CMSIS device header exposes. */ +#if defined(AES_ISR_CCF) + #define STM32_AES_CCF_BIT AES_ISR_CCF + #define STM32_AES_CCF_REG ISR +#elif defined(AES_SR_CCF) + #define STM32_AES_CCF_BIT AES_SR_CCF + #define STM32_AES_CCF_REG SR +#else + #error "STM32 AES IP variant: no CCF status register known" +#endif + +#define STM32_AES_DATATYPE_BYTE AES_CR_DATATYPE_1 /* 0b10 */ +#define STM32_AES_CHMOD_ECB 0u +#define STM32_AES_CHMOD_CBC AES_CR_CHMOD_0 +#define STM32_AES_CHMOD_CTR AES_CR_CHMOD_1 +#define STM32_AES_CHMOD_GCM (AES_CR_CHMOD_0 | AES_CR_CHMOD_1) +#define STM32_AES_MODE_ENC 0u +#define STM32_AES_MODE_KEYDERIVE AES_CR_MODE_0 +#define STM32_AES_MODE_DEC AES_CR_MODE_1 +#define STM32_AES_MODE_KD_DEC (AES_CR_MODE_0 | AES_CR_MODE_1) + +static int Stm32AesWaitCCF(void) +{ + int t = 0; + /* Force prior config / DINR writes to retire before we start polling. + * Required on the C5 family at PLL clock -- without the barrier the + * write buffer can defer the last DINR write past the first CCF read, + * latching us on a stale (still-zero) status. */ + __DMB(); + while ((WC_STM32_AES_INST->STM32_AES_CCF_REG & STM32_AES_CCF_BIT) == 0) { + if (++t >= STM32_BARE_AES_TIMEOUT) { + #ifdef DEBUG_STM32_BARE_GCM + printf("[STM32 BARE AES] CCF timeout: SR=0x%08lx CR=0x%08lx\n", + (unsigned long)WC_STM32_AES_INST->STM32_AES_CCF_REG, + (unsigned long)WC_STM32_AES_INST->CR); + #endif + return WC_TIMEOUT_E; + } + } + return 0; +} + +static word32 Stm32AesKeySizeBits(word32 keyLen) +{ + if (keyLen == 32) { + return AES_CR_KEYSIZE; /* 256-bit */ + } + return 0; /* 128-bit (192 not supported by HW) */ +} + +/* Key registers must be written in increasing register order (KEYR0 first + * per RM). KEYR(N-1) holds the MSB word. + * aes->key arrives pre-byte-reversed (per wc_AesSetKey under BARE), so the + * highest-significance byte of the AES key is in the MSB of aes->key[0]. */ +static int Stm32AesLoadKey(const word32* key, word32 keyLen) +{ + if (keyLen == 16) { + WC_STM32_AES_INST->KEYR0 = key[3]; + WC_STM32_AES_INST->KEYR1 = key[2]; + WC_STM32_AES_INST->KEYR2 = key[1]; + WC_STM32_AES_INST->KEYR3 = key[0]; + return 0; + } + if (keyLen == 32) { + WC_STM32_AES_INST->KEYR0 = key[7]; + WC_STM32_AES_INST->KEYR1 = key[6]; + WC_STM32_AES_INST->KEYR2 = key[5]; + WC_STM32_AES_INST->KEYR3 = key[4]; + WC_STM32_AES_INST->KEYR4 = key[3]; + WC_STM32_AES_INST->KEYR5 = key[2]; + WC_STM32_AES_INST->KEYR6 = key[1]; + WC_STM32_AES_INST->KEYR7 = key[0]; + return 0; + } + /* AES-192 not supported by TinyAES hardware */ + return BAD_FUNC_ARG; +} + +static void Stm32AesLoadIV(const byte* iv, word32 ivLen) +{ + word32 v[4]; + word32 copyLen = (ivLen > 16) ? 16 : ivLen; + + XMEMSET(v, 0, sizeof(v)); + if (iv != NULL && copyLen > 0) { + XMEMCPY(v, iv, copyLen); + ByteReverseWords(v, v, 16); + } + /* IVRx ordering matches keyword: IVR3 = MSB */ + WC_STM32_AES_INST->IVR3 = v[0]; + WC_STM32_AES_INST->IVR2 = v[1]; + WC_STM32_AES_INST->IVR1 = v[2]; + WC_STM32_AES_INST->IVR0 = v[3]; +} + +/* One 16-byte block in / out. */ +static int Stm32AesXferBlock(const byte* in, byte* out) +{ + int ret; + word32 i; + word32 buf[WC_AES_BLOCK_SIZE/sizeof(word32)]; + + XMEMCPY(buf, in, WC_AES_BLOCK_SIZE); + for (i = 0; i < 4; i++) { + WC_STM32_AES_INST->DINR = buf[i]; + } + ret = Stm32AesWaitCCF(); + if (ret != 0) { + return ret; + } + for (i = 0; i < 4; i++) { + buf[i] = WC_STM32_AES_INST->DOUTR; + } + XMEMCPY(out, buf, WC_AES_BLOCK_SIZE); + /* Clear CCF for next block */ + STM32_AES_CLEAR_CCF(); + return 0; +} + +/* Run the key-derivation pass before decrypt (CBC/ECB). */ +static int Stm32AesPrepareKey(word32 keyLen, word32 chmod) +{ + int ret; + word32 cr = STM32_AES_MODE_KEYDERIVE | STM32_AES_DATATYPE_BYTE | + Stm32AesKeySizeBits(keyLen) | chmod; + WC_STM32_AES_INST->CR = cr; + WC_STM32_AES_INST->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + return ret; +} + +int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; /* TinyAES has no 192-bit support */ + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE_INST(); + + /* Disable AES first; configure CR (with KEYSIZE) BEFORE writing the + * key registers (per RM the AES must know the key size before keys + * are loaded). Then enable. */ + WC_STM32_AES_INST->CR = 0; + + cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + STM32_AES_CHMOD_ECB | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + WC_STM32_AES_INST->CR = cr; + STM32_AES_CLEAR_CCF(); /* clear any stale CCF/ERR */ + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) { + goto exit; + } + if (!isEnc) { + /* Key derivation pass: temporarily set MODE=01 (key derive) */ + WC_STM32_AES_INST->CR = (cr & ~AES_CR_MODE_Msk) | STM32_AES_MODE_KEYDERIVE; + WC_STM32_AES_INST->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + if (ret != 0) { + goto exit; + } + /* Restore decrypt mode */ + WC_STM32_AES_INST->CR = cr; + } + + WC_STM32_AES_INST->CR |= AES_CR_EN; + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + +exit: + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc) +{ + int ret; + word32 keyLen, blocks, b; + word32 cr; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + if (sz == 0 || (sz % WC_AES_BLOCK_SIZE) != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keyLen); + if (ret != 0) { + return ret; + } + if (keyLen != 16 && keyLen != 32) { + return BAD_FUNC_ARG; + } + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + WC_STM32_AES_CLK_ENABLE_INST(); + + /* Configure CR (with KEYSIZE) BEFORE loading keys -- per RM. */ + WC_STM32_AES_INST->CR = 0; + + cr = STM32_AES_DATATYPE_BYTE | Stm32AesKeySizeBits(keyLen) | + STM32_AES_CHMOD_CBC | + (isEnc ? STM32_AES_MODE_ENC : STM32_AES_MODE_DEC); + WC_STM32_AES_INST->CR = cr; + STM32_AES_CLEAR_CCF(); + + ret = Stm32AesLoadKey(aes->key, keyLen); + if (ret != 0) { + goto exit; + } + if (!isEnc) { + /* Key derivation pass for decrypt */ + WC_STM32_AES_INST->CR = (cr & ~AES_CR_MODE_Msk) | STM32_AES_MODE_KEYDERIVE; + WC_STM32_AES_INST->CR |= AES_CR_EN; + ret = Stm32AesWaitCCF(); + STM32_AES_CLEAR_CCF(); + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + if (ret != 0) { + goto exit; + } + /* Restore decrypt mode */ + WC_STM32_AES_INST->CR = cr; + } + Stm32AesLoadIV((const byte*)aes->reg, WC_AES_BLOCK_SIZE); + + WC_STM32_AES_INST->CR |= AES_CR_EN; + + /* For in-place decrypt (out == in) the block loop overwrites the + * source ciphertext, so the next-IV ciphertext block has to be + * captured before processing starts. */ + if (!isEnc) { + XMEMCPY(aes->tmp, in + sz - WC_AES_BLOCK_SIZE, WC_AES_BLOCK_SIZE); + } + + blocks = sz / WC_AES_BLOCK_SIZE; + for (b = 0; b < blocks; b++) { + ret = Stm32AesXferBlock(in + b * WC_AES_BLOCK_SIZE, + out + b * WC_AES_BLOCK_SIZE); + if (ret != 0) { + break; + } + } + + if (ret == 0) { + if (isEnc) { + XMEMCPY(aes->reg, out + (blocks - 1) * WC_AES_BLOCK_SIZE, + WC_AES_BLOCK_SIZE); + } + else { + XMEMCPY(aes->reg, aes->tmp, WC_AES_BLOCK_SIZE); + } + } + +exit: + WC_STM32_AES_INST->CR &= ~AES_CR_EN; + wolfSSL_CryptHwMutexUnLock(); + return ret; +} + +/* TinyAES HW GCM: deferred. Falls back to software GCM (with HW ECB + * blocks via wc_AesEncrypt -> wc_Stm32_Aes_Ecb). */ +int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc) +{ + (void)aes; (void)out; (void)in; (void)sz; + (void)iv; (void)ivSz; + (void)tag; (void)tagSz; + (void)aad; (void)aadSz; (void)isEnc; + return CRYPTOCB_UNAVAILABLE; +} + +#endif /* CRYP IP vs TinyAES IP */ + +#elif defined(WOLFSSL_STM32_CUBEMX) -#if defined(WOLFSSL_STM32U5_DHUK) +#if defined(WOLFSSL_DHUK) /* Set the DHUK IV to be used when unwrapping an AES key * return 0 on success */ int wc_Stm32_Aes_SetDHUK_IV(struct Aes* aes, const byte* iv, int ivSz) @@ -768,10 +2237,10 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) break; } -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_DHUK /* Use hardware key */ - if (useSaes && (aes->devId == WOLFSSL_STM32U5_DHUK_DEVID || - aes->devId == WOLFSSL_STM32U5_SAES_DEVID)) { + if (useSaes && (aes->devId == WOLFSSL_DHUK_DEVID || + aes->devId == WOLFSSL_SAES_DEVID)) { /* SAES requires use of the RNG -- HAL_RNG_DeInit() calls from random.c turn off the RNG clock -- re-enable the clock here */ @@ -781,7 +2250,7 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_HandleTypeDef* hcryp, int useSaes) hcryp->Init.DataType = CRYP_DATATYPE_8B; /* Key select (HW, or Normal) */ - if (aes->devId == WOLFSSL_STM32U5_DHUK_DEVID) { + if (aes->devId == WOLFSSL_DHUK_DEVID) { hcryp->Init.KeySelect = CRYP_KEYSEL_HW; } else { @@ -878,7 +2347,7 @@ int wc_Stm32_Aes_Init(Aes* aes, CRYP_InitTypeDef* cryptInit, void wc_Stm32_Aes_Cleanup(void) { } -#endif /* WOLFSSL_STM32_CUBEMX */ +#endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index d26c1e54a4a..db947401016 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -4243,6 +4243,9 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #elif defined(STM32_RNG) /* Generate a RNG seed using the hardware random number generator * on the STM32F2/F4/F7/L4. */ + #include + /* Pulls in WC_STM32_RNG_CLK_ENABLE for WOLFSSL_STM32_BARE builds */ + #ifdef WOLFSSL_STM32_CUBEMX int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) @@ -4311,6 +4314,21 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #endif + /* Bounded poll for DRDY, plus recovery from SECS / CECS. The + * unbounded `while (DRDY == 0)` loop in the original code spins + * forever on chips where the RNG kernel clock is unstable + * (e.g. WL55 with RNGSEL = MSI under sustained ECDSA-key-gen + * load), because once the IP latches a Seed-error or Clock- + * error condition it stops asserting DRDY. Per the STM32 RM + * recovery sequence: clear SEIS/CEIS, toggle RNGEN, discard the + * stale words sitting in the RNG output, then retry. */ + #ifndef STM32_BARE_RNG_BYTE_TIMEOUT + #define STM32_BARE_RNG_BYTE_TIMEOUT 0x40000 + #endif + #ifndef STM32_BARE_RNG_MAX_RETRIES + #define STM32_BARE_RNG_MAX_RETRIES 8 + #endif + /* Generate a RNG seed using the hardware RNG on the STM32F427 * directly, following steps outlined in STM32F4 Reference * Manual (Chapter 24) for STM32F4xx family. */ @@ -4318,6 +4336,9 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) { int ret; word32 i; + word32 t; + word32 retries; + word32 sr; (void)os; ret = wolfSSL_CryptHwMutexLock(); @@ -4327,7 +4348,49 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) #ifndef STM32_NUTTX_RNG /* enable RNG peripheral clock */ - RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #ifdef WC_STM32_RNG_CLK_ENABLE + WC_STM32_RNG_CLK_ENABLE(); + #else + /* Default for F4/F7/L4/L5/U5/H5/H7 -- RNG on AHB2 */ + RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; + #endif + #endif + + /* On the new-gen STM32C5 RNG IP the CR register is locked at + * reset (CONFIGLOCK clear, but the IP refuses to produce data + * until a NIST-compliant CONFIG1/2/3 + NSCR + HTCR sequence + * has been written under CONDRST). The HAL ships canonical + * candidate values in the device header (RNG_CAND_NIST_*). + * Detect the family by presence of that symbol -- on chips + * without it (F4/F7/L4/U5/H7/H5/WL/etc.) skip. Do this only + * on the first call (RNGEN clear) so subsequent calls don't + * disturb a running peripheral. */ + #if defined(RNG_CAND_NIST_CR_VALUE) && defined(RNG_CR_CONDRST) + if ((WC_RNG_CR & RNG_CR_RNGEN) == 0U) { + WC_RNG_CR = (uint32_t)RNG_CAND_NIST_CR_VALUE | + (uint32_t)RNG_CR_CONDRST; + #ifdef RNG_CAND_NIST_NSCR_VALUE + RNG->NSCR = (uint32_t)RNG_CAND_NIST_NSCR_VALUE; + #endif + #ifdef RNG_CAND_NIST_HTCR_VALUE + RNG->HTCR[0] = (uint32_t)RNG_CAND_NIST_HTCR_VALUE; + #endif + /* Clear CONDRST. Hardware mirrors the conditioning back + * once it lands. */ + WC_RNG_CR &= ~(uint32_t)RNG_CR_CONDRST; + #ifdef RNG_SR_BUSY + /* Wait for conditioning to finish (BUSY drops). Bounded + * so a misconfigured kernel clock returns a clean error + * instead of hanging. */ + t = 0; + while ((WC_RNG_SR & RNG_SR_BUSY) != 0U) { + if (++t >= STM32_BARE_RNG_BYTE_TIMEOUT) { + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } + } + #endif + } #endif /* enable RNG interrupt, set IE bit in RNG->CR register */ @@ -4345,11 +4408,58 @@ int wc_GenerateSeed(OS_Seed* os, byte* output, word32 sz) } for (i = 0; i < sz; i++) { - /* wait until RNG number is ready */ - while ((WC_RNG_SR & RNG_SR_DRDY) == 0) { } + retries = 0; + for (;;) { + t = 0; + /* Bounded DRDY poll -- breaks on either DRDY or any + * error indication (SECS/CECS). */ + while (t < STM32_BARE_RNG_BYTE_TIMEOUT) { + sr = WC_RNG_SR; + if ((sr & (RNG_SR_DRDY | RNG_SR_SECS | + RNG_SR_CECS)) != 0U) { + break; + } + t++; + } - /* get value */ - output[i] = WC_RNG_DR; + /* Happy path: data ready and no error. */ + if ((sr & RNG_SR_DRDY) != 0U && + (sr & (RNG_SR_SECS | RNG_SR_CECS)) == 0U) { + output[i] = WC_RNG_DR; + break; + } + + /* Either timed out or an error latched. Recover. */ + if (++retries > STM32_BARE_RNG_MAX_RETRIES) { + wolfSSL_CryptHwMutexUnLock(); + return RNG_FAILURE_E; + } + + /* Recovery sequence (per STM32 RM RNG chapter): + * 1. Clear SEIS / CEIS interrupt status by writing + * zeros to those bits. + * 2. Toggle RNGEN off then on to drop any stale + * LFSR state that may be tainted by the error. + * 3. Discard four DR reads to flush the pipeline + * (only meaningful when DRDY is set; otherwise + * the reads are harmless and bounded). */ + WC_RNG_SR = WC_RNG_SR & ~(RNG_SR_SEIS | RNG_SR_CEIS); + WC_RNG_CR &= ~RNG_CR_RNGEN; + WC_RNG_CR |= RNG_CR_RNGEN; + t = 0; + while (t < 4U) { + if ((WC_RNG_SR & RNG_SR_DRDY) != 0U) { + (void)WC_RNG_DR; + t++; + } + else if ((WC_RNG_SR & + (RNG_SR_SECS | RNG_SR_CECS)) != 0U) { + /* Clock-error during recovery -- bail and + * let the outer retry handle it. */ + break; + } + } + } } wolfSSL_CryptHwMutexUnLock(); diff --git a/wolfssl/wolfcrypt/aes.h b/wolfssl/wolfcrypt/aes.h index b01aa3b473b..891bd5131c6 100644 --- a/wolfssl/wolfcrypt/aes.h +++ b/wolfssl/wolfcrypt/aes.h @@ -301,7 +301,7 @@ struct Aes { #endif #ifdef HAVE_AESGCM Gcm gcm; -#ifdef WOLFSSL_STM32U5_DHUK +#ifdef WOLFSSL_DHUK byte dhukIV[16]; /* Used when unwrapping an encrypted key */ int dhukIVLen; #endif @@ -338,7 +338,7 @@ struct Aes { byte use_sha3_hw_crypto; #endif #endif /* __aarch64__ && WOLFSSL_ARMASM && !WOLFSSL_ARMASM_NO_HW_CRYPTO */ -#if defined(WOLF_CRYPTO_CB) || defined(WOLFSSL_STM32U5_DHUK) +#if defined(WOLF_CRYPTO_CB) || defined(WOLFSSL_DHUK) int devId; void* devCtx; /* Opaque handle for CryptoCB device */ #endif diff --git a/wolfssl/wolfcrypt/port/st/stm32.h b/wolfssl/wolfcrypt/port/st/stm32.h index 9aa0d418ae1..0a40407d151 100644 --- a/wolfssl/wolfcrypt/port/st/stm32.h +++ b/wolfssl/wolfcrypt/port/st/stm32.h @@ -23,11 +23,273 @@ #define _WOLFPORT_STM32_H_ /* Generic STM32 Hashing and Crypto Functions */ -/* Supports CubeMX HAL or Standard Peripheral Library */ +/* Supports CubeMX HAL, Standard Peripheral Library, or bare-metal direct + * register access (WOLFSSL_STM32_BARE). */ #include #include /* for MATH_INT_T */ +#ifdef WOLFSSL_STM32_BARE +/* Per-family direct-register clock-enable macros. CMSIS device header is + * already included via settings.h. RCC->...ENR bit names come from CMSIS. */ +#if defined(WOLFSSL_STM32H5) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32F4) || defined(WOLFSSL_STM32F7) || \ + defined(WOLFSSL_STM32H7) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_CRYPEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_CRYPEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32L4) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3 RCC uses AHB2ENR1 (not AHB2ENR). AES bit only present on + * variants that have the peripheral (U585+, U385+). */ + #ifdef RCC_AHB2ENR1_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_AESEN; (void)RCC->AHB2ENR1; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR1 &= ~RCC_AHB2ENR1_AESEN; } while (0) + #endif + #ifdef RCC_AHB2ENR1_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_HASHEN; (void)RCC->AHB2ENR1; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR1 &= ~RCC_AHB2ENR1_HASHEN; } while (0) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_RNGEN; (void)RCC->AHB2ENR1; } while (0) +#elif defined(WOLFSSL_STM32G0) + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHBENR |= RCC_AHBENR_AESEN; (void)RCC->AHBENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHBENR &= ~RCC_AHBENR_AESEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHBENR |= RCC_AHBENR_RNGEN; (void)RCC->AHBENR; } while (0) +#elif defined(WOLFSSL_STM32WB) + /* WB55 dual-core: AES1 is the M4 (CPU1) application AES, on AHB2. + * AES2 sits on AHB4/AHB3 and is reserved for the M0+ side / shared use. + * The wolfcrypt port maps CRYP -> AES1 (see CRYP alias above), so use + * AES1's clock-enable bit. RNG is on AHB3. */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AES1EN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AES1EN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_RNGEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32WL) + /* WL55 dual-core: TinyAES + RNG + PKA on M4 (CPU1) side. AES on AHB3, + * RNG on AHB3, PKA on AHB3. No HASH peripheral. V1 PKA layout. */ + #ifdef RCC_AHB3ENR_AESEN + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_AESEN; (void)RCC->AHB3ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB3ENR &= ~RCC_AHB3ENR_AESEN; } while (0) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_RNGEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32G4) + /* G4: TinyAES + RNG + PKA on AHB2. No HASH peripheral. */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32WBA) + /* WBA: TinyAES + HASH + RNG + PKA + SAES on AHB2 (PKA on AHB1). */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #if defined(WOLFSSL_STM32_USE_SAES) && defined(RCC_AHB2ENR_SAESEN) + #define WC_STM32_SAES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_SAESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_SAES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_SAESEN; } while (0) + #endif + #ifdef RCC_AHB2ENR_HASHEN + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #endif + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#elif defined(WOLFSSL_STM32C5) + /* C5: TinyAES + HASH + RNG + SAES + PKA all on AHB2. New-gen HASH IP + * (4-bit ALGO field, same as H5/U3/N6). */ + #define WC_STM32_AES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_AESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_AES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_AESEN; } while (0) + #if defined(WOLFSSL_STM32_USE_SAES) && defined(RCC_AHB2ENR_SAESEN) + #define WC_STM32_SAES_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_SAESEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_SAES_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_SAESEN; } while (0) + #endif + #define WC_STM32_HASH_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_HASHEN; (void)RCC->AHB2ENR; } while (0) + #define WC_STM32_HASH_CLK_DISABLE() \ + do { RCC->AHB2ENR &= ~RCC_AHB2ENR_HASHEN; } while (0) + #define WC_STM32_RNG_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_RNGEN; (void)RCC->AHB2ENR; } while (0) +#endif + +/* Build-time AES IP instance selector. Default is the regular AES/CRYP + * peripheral; defining WOLFSSL_STM32_USE_SAES routes the BARE TinyAES + * register-access block to SAES (Secure AES) instead. The TinyAES + * register layout is identical for AES and SAES on H5/U5/WBA/C5. */ +#ifndef WC_STM32_AES_INST + #if defined(WOLFSSL_STM32_USE_SAES) && defined(SAES) + #define WC_STM32_AES_INST SAES + #else + #define WC_STM32_AES_INST CRYP + #endif +#endif + +/* Companion macro for the IP-instance clock enable. Routes to + * WC_STM32_SAES_CLK_ENABLE when WOLFSSL_STM32_USE_SAES is set and the + * family arm above provided the SAES variant; otherwise falls back to + * the regular AES clock. The two are separate AHB enable bits on + * H5/U5/WBA/C5, so toggling the wrong one leaves the IP disabled. */ +#ifndef WC_STM32_AES_CLK_ENABLE_INST + #if defined(WOLFSSL_STM32_USE_SAES) && defined(WC_STM32_SAES_CLK_ENABLE) + #define WC_STM32_AES_CLK_ENABLE_INST() WC_STM32_SAES_CLK_ENABLE() + #else + #define WC_STM32_AES_CLK_ENABLE_INST() WC_STM32_AES_CLK_ENABLE() + #endif +#endif + +/* Per-family direct-register clock-enable macro for the PKA peripheral. */ +#if defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32WL) + /* WB55 / WL55: PKA clock is on AHB3 (V1 layout) */ + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB3ENR |= RCC_AHB3ENR_PKAEN; (void)RCC->AHB3ENR; } while (0) +#elif defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) + /* U5 / U3: AHB2ENR1.PKAEN */ + #ifdef RCC_AHB2ENR1_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR1 |= RCC_AHB2ENR1_PKAEN; (void)RCC->AHB2ENR1; } while (0) + #endif +#elif defined(WOLFSSL_STM32H5) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32G4) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32WBA) + #ifdef RCC_AHB1ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB1ENR |= RCC_AHB1ENR_PKAEN; (void)RCC->AHB1ENR; } while (0) + #endif +#elif defined(WOLFSSL_STM32C5) + #ifdef RCC_AHB2ENR_PKAEN + #define WC_STM32_PKA_CLK_ENABLE() \ + do { RCC->AHB2ENR |= RCC_AHB2ENR_PKAEN; (void)RCC->AHB2ENR; } while (0) + #endif +#endif + +/* HAL-legacy macros that the existing direct-register HASH path depends on. + * Without HAL these aren't otherwise visible. */ +#if defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32MP13) || \ + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H7S) || \ + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32C5) + /* New-generation HASH IP: 4-bit ALGO field at bits 20:17 */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #define HASH_ALGOSELECTION_SHA384 (HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1 | \ + HASH_CR_ALGO_2 | HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_224 (HASH_CR_ALGO_0 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) + #define HASH_ALGOSELECTION_SHA512_256 (HASH_CR_ALGO_1 | HASH_CR_ALGO_2 | \ + HASH_CR_ALGO_3) +#else + /* Older HASH IP (F4/F7/L4 family) ALGO bit mapping (per HAL): + * SHA1 = 0 + * MD5 = ALGO_0 + * SHA224 = ALGO_1 + * SHA256 = ALGO_0 | ALGO_1 + */ + #define HASH_ALGOSELECTION_SHA1 0u + #define HASH_ALGOSELECTION_MD5 HASH_CR_ALGO_0 + #ifdef HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA224 HASH_CR_ALGO_1 + #define HASH_ALGOSELECTION_SHA256 (HASH_CR_ALGO_0 | HASH_CR_ALGO_1) + #endif +#endif + +/* Legacy CamelCase aliases */ +#ifdef HASH_ALGOSELECTION_SHA1 + #define HASH_AlgoSelection_SHA1 HASH_ALGOSELECTION_SHA1 +#endif +#ifdef HASH_ALGOSELECTION_SHA224 + #define HASH_AlgoSelection_SHA224 HASH_ALGOSELECTION_SHA224 +#endif +#ifdef HASH_ALGOSELECTION_SHA256 + #define HASH_AlgoSelection_SHA256 HASH_ALGOSELECTION_SHA256 +#endif +#ifdef HASH_ALGOSELECTION_SHA384 + #define HASH_AlgoSelection_SHA384 HASH_ALGOSELECTION_SHA384 +#endif +#ifdef HASH_ALGOSELECTION_SHA512 + #define HASH_AlgoSelection_SHA512 HASH_ALGOSELECTION_SHA512 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_224 + #define HASH_AlgoSelection_SHA512_224 HASH_ALGOSELECTION_SHA512_224 +#endif +#ifdef HASH_ALGOSELECTION_SHA512_256 + #define HASH_AlgoSelection_SHA512_256 HASH_ALGOSELECTION_SHA512_256 +#endif +#ifdef HASH_ALGOSELECTION_MD5 + #define HASH_AlgoSelection_MD5 HASH_ALGOSELECTION_MD5 +#endif + +#define HASH_ALGOMODE_HASH 0u +#ifdef HASH_CR_MODE + #define HASH_ALGOMODE_HMAC HASH_CR_MODE +#endif +/* Byte-stream input (auto byte-swap) */ +#ifdef HASH_CR_DATATYPE_1 + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_1 +#elif defined(HASH_CR_DATATYPE_0) + #define HASH_DATATYPE_8B HASH_CR_DATATYPE_0 +#endif + +#endif /* WOLFSSL_STM32_BARE */ + + #ifdef STM32_HASH #include /* for uint32_t */ @@ -38,7 +300,8 @@ /* The HASH_DIGEST register indicates SHA224/SHA256 support */ #define STM32_HASH_SHA2 #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) #define HASH_CR_SIZE 103 #define HASH_MAX_DIGEST 64 /* Up to SHA512 */ @@ -68,7 +331,8 @@ /* These HASH HAL's have no MD5 implementation */ #if defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) + defined(WOLFSSL_STM32N6) || defined(WOLFSSL_STM32H5) || \ + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32C5) #define STM32_NOMD5 #endif @@ -163,7 +427,8 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #ifndef NO_AES - #if !defined(STM32_CRYPTO_AES_GCM) && (defined(WOLFSSL_STM32F4) || \ + #if !defined(STM32_CRYPTO_AES_GCM) && !defined(WOLFSSL_STM32_BARE) && \ + (defined(WOLFSSL_STM32F4) || \ defined(WOLFSSL_STM32F7) || defined(WOLFSSL_STM32L4) || \ defined(WOLFSSL_STM32L5) || defined(WOLFSSL_STM32H7) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ @@ -173,6 +438,13 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, /* Hardware supports AES GCM acceleration */ #define STM32_CRYPTO_AES_GCM #endif + /* Under WOLFSSL_STM32_BARE on the CRYP IP (F2/F4/F7/H7/MP13), the GCM + * HW phase machine (init/header/payload/final) is engaged for whole- + * block PT with a 12-byte IV; partial blocks and non-12B IVs return + * CRYPTOCB_UNAVAILABLE so aes.c falls back to SW GHASH + HW ECB. On + * the TinyAES IP the BARE driver always returns CRYPTOCB_UNAVAILABLE + * for GCM (no HW phase machine) and the SW GHASH + HW ECB path is + * used. GCM decrypt is always SW + HW ECB on both IPs in v1. */ #if defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32WL) || \ defined(WOLFSSL_STM32WBA) @@ -186,12 +458,14 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #endif #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ - defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) || defined(WOLFSSL_STM32C5) #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32U5) || \ - defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) + defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32G0) || \ + defined(WOLFSSL_STM32G4) || defined(WOLFSSL_STM32C5) #define STM32_CRYPTO_AES_ONLY /* crypto engine only supports AES */ #endif - #if defined(WOLFSSL_STM32H5) + #if defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32C5) #define __HAL_RCC_CRYP_CLK_DISABLE __HAL_RCC_AES_CLK_DISABLE #define __HAL_RCC_CRYP_CLK_ENABLE __HAL_RCC_AES_CLK_ENABLE #endif @@ -234,7 +508,23 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, #define STM32_GCM_IV_START 2 struct Aes; - #ifdef WOLFSSL_STM32_CUBEMX + #ifdef WOLFSSL_STM32_BARE + /* Bare-metal direct-register AES driver. ECB and CBC are HW-native; + * CTR is provided automatically via the ECB-as-transform path in + * aes.c (XTRANSFORM_AESCTRBLOCK); GCM is HW-native for the case + * the CRYP IP supports (12-byte IV + whole-block PT) and returns + * CRYPTOCB_UNAVAILABLE otherwise so aes.c can fall back to SW + * GHASH (which still uses HW ECB for the underlying AES blocks). */ + int wc_Stm32_Aes_Ecb(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Cbc(struct Aes* aes, byte* out, const byte* in, + word32 sz, int isEnc); + int wc_Stm32_Aes_Gcm(struct Aes* aes, byte* out, const byte* in, + word32 sz, + const byte* iv, word32 ivSz, + byte* tag, word32 tagSz, + const byte* aad, word32 aadSz, int isEnc); + #elif defined(WOLFSSL_STM32_CUBEMX) int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_HandleTypeDef* hcryp, int useSAES); void wc_Stm32_Aes_Cleanup(void); @@ -242,15 +532,42 @@ int wc_Stm32_Hmac_Final(STM32_HASH_Context* stmCtx, word32 algo, int wc_Stm32_Aes_Init(struct Aes* aes, CRYP_InitTypeDef* cryptInit, CRYP_KeyInitTypeDef* keyInit); void wc_Stm32_Aes_Cleanup(void); - #endif /* WOLFSSL_STM32_CUBEMX */ + #endif /* WOLFSSL_STM32_BARE / WOLFSSL_STM32_CUBEMX / StdPeriph */ #endif /* !NO_AES */ #endif /* STM32_CRYPTO */ -#if defined(WOLFSSL_STM32U5_DHUK) && !defined(WOLFSSL_STM32U5_DHUK_DEVID) - #define WOLFSSL_STM32U5_DHUK_DEVID 808 - #define WOLFSSL_STM32U5_SAES_DEVID 807 - #define WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID 809 +/* DHUK (Device Hardware Unique Key) -- SAES key wrap / unwrap using a + * silicon-bound key. Originally introduced for STM32U5 only; the + * underlying SAES + DHUK infrastructure is also present on U3, H5, + * WBA, and C5. Use WOLFSSL_DHUK going forward; WOLFSSL_STM32U5_DHUK + * is kept as a backwards-compatible alias for one release cycle. */ +#if defined(WOLFSSL_STM32U5_DHUK) && !defined(WOLFSSL_DHUK) + #define WOLFSSL_DHUK +#endif +#if defined(WOLFSSL_DHUK) && !defined(WOLFSSL_STM32U5_DHUK) + #define WOLFSSL_STM32U5_DHUK +#endif + +/* Family gate: only families that actually have SAES + DHUK silicon. */ +#if defined(WOLFSSL_DHUK) && \ + (defined(WOLFSSL_STM32U5) || defined(WOLFSSL_STM32U3) || \ + defined(WOLFSSL_STM32H5) || defined(WOLFSSL_STM32WBA) || \ + defined(WOLFSSL_STM32C5)) + #define WC_STM32_HAS_DHUK +#endif + +#if defined(WOLFSSL_DHUK) && !defined(WOLFSSL_DHUK_DEVID) + /* Generic DHUK / SAES device IDs (used with WOLF_CRYPTO_CB devId + * dispatch in aes.c hot paths). */ + #define WOLFSSL_DHUK_DEVID 808 + #define WOLFSSL_SAES_DEVID 807 + #define WOLFSSL_DHUK_WRAPPED_DEVID 809 + /* Legacy aliases preserved verbatim for any out-of-tree caller. */ + #define WOLFSSL_STM32U5_DHUK_DEVID WOLFSSL_DHUK_DEVID + #define WOLFSSL_STM32U5_SAES_DEVID WOLFSSL_SAES_DEVID + #define WOLFSSL_STM32U5_DHUK_WRAPPED_DEVID WOLFSSL_DHUK_WRAPPED_DEVID + int wc_Stm32_Aes_Wrap(struct Aes* aes, const byte* in, word32 inSz, byte* out, word32* outSz, const byte* iv, int ivSz); int wc_Stm32_Aes_UnWrap(struct Aes* aes, CRYP_HandleTypeDef* hcryp, const byte* in, diff --git a/wolfssl/wolfcrypt/settings.h b/wolfssl/wolfcrypt/settings.h index ea6d86c8dda..c196e859311 100644 --- a/wolfssl/wolfcrypt/settings.h +++ b/wolfssl/wolfcrypt/settings.h @@ -2200,7 +2200,8 @@ extern void uITRON4_free(void *p) ; defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32H5) || \ defined(WOLFSSL_STM32WL) || defined(WOLFSSL_STM32G4) || \ defined(WOLFSSL_STM32MP13) || defined(WOLFSSL_STM32H7S) || \ - defined(WOLFSSL_STM32WBA) || defined(WOLFSSL_STM32N6) + defined(WOLFSSL_STM32WBA) || defined(WOLFSSL_STM32N6) || \ + defined(WOLFSSL_STM32C5) #define SIZEOF_LONG_LONG 8 #ifndef CHAR_BIT @@ -2222,7 +2223,7 @@ extern void uITRON4_free(void *p) ; #if defined(WOLFSSL_STM32L4) || defined(WOLFSSL_STM32L5) || \ defined(WOLFSSL_STM32WB) || defined(WOLFSSL_STM32U5) || \ defined(WOLFSSL_STM32U3) || defined(WOLFSSL_STM32WL) || \ - defined(WOLFSSL_STM32WBA) + defined(WOLFSSL_STM32WBA) || defined(WOLFSSL_STM32C5) #define NO_AES_192 /* hardware does not support 192-bit */ #endif #endif @@ -2238,7 +2239,62 @@ extern void uITRON4_free(void *p) ; #define KEIL_INTRINSICS #endif #define NO_OLD_RNGNAME - #ifdef WOLFSSL_STM32_CUBEMX + + #if defined(WOLFSSL_STM32_BARE) && defined(WOLFSSL_STM32_CUBEMX) + #error "WOLFSSL_STM32_BARE and WOLFSSL_STM32_CUBEMX are mutually exclusive" + #endif + /* WOLFSSL_STM32_PKA is now supported under WOLFSSL_STM32_BARE via the + * direct-register PKA driver in wolfcrypt/src/port/st/stm32.c. */ + + #ifdef WOLFSSL_STM32_BARE + /* Direct register access; no HAL or StdPeriph driver. Pull in only the + * CMSIS device header. Existing direct-register HASH path is reused; + * RNG goes through the existing WOLFSSL_STM32_RNG_NOLIB path. */ + #ifndef WOLFSSL_STM32_RNG_NOLIB + #define WOLFSSL_STM32_RNG_NOLIB + #endif + #if defined(WOLFSSL_STM32F1) + #include "stm32f1xx.h" + #elif defined(WOLFSSL_STM32F2) + #include "stm32f2xx.h" + #elif defined(WOLFSSL_STM32F4) + #include "stm32f4xx.h" + #elif defined(WOLFSSL_STM32F7) + #include "stm32f7xx.h" + #elif defined(WOLFSSL_STM32L4) + #include "stm32l4xx.h" + #elif defined(WOLFSSL_STM32L5) + #include "stm32l5xx.h" + #elif defined(WOLFSSL_STM32H7S) + #include "stm32h7rsxx.h" + #elif defined(WOLFSSL_STM32H7) + #include "stm32h7xx.h" + #elif defined(WOLFSSL_STM32WB) + #include "stm32wbxx.h" + #elif defined(WOLFSSL_STM32WL) + #include "stm32wlxx.h" + #elif defined(WOLFSSL_STM32G0) + #include "stm32g0xx.h" + #elif defined(WOLFSSL_STM32G4) + #include "stm32g4xx.h" + #elif defined(WOLFSSL_STM32U5) + #include "stm32u5xx.h" + #elif defined(WOLFSSL_STM32U3) + #include "stm32u3xx.h" + #elif defined(WOLFSSL_STM32H5) + #include "stm32h5xx.h" + #elif defined(WOLFSSL_STM32C5) + #include "stm32c5xx.h" + #elif defined(WOLFSSL_STM32N6) + #include "stm32n6xx.h" + #elif defined(WOLFSSL_STM32MP13) + #ifndef __ASSEMBLER__ + #include "stm32mp13xx.h" + #endif + #elif defined(WOLFSSL_STM32WBA) + #include "stm32wbaxx.h" + #endif + #elif defined(WOLFSSL_STM32_CUBEMX) #if defined(WOLFSSL_STM32F1) #include "stm32f1xx_hal.h" #elif defined(WOLFSSL_STM32F2) @@ -2271,6 +2327,8 @@ extern void uITRON4_free(void *p) ; #include "stm32u3xx_hal.h" #elif defined(WOLFSSL_STM32H5) #include "stm32h5xx_hal.h" + #elif defined(WOLFSSL_STM32C5) + #include "stm32c5xx_hal.h" #elif defined(WOLFSSL_STM32N6) #include "stm32n6xx_hal.h" #elif defined(WOLFSSL_STM32MP13)