diff --git a/src/excrypt_aes.c b/src/excrypt_aes.c index ea073b4..28fab58 100644 --- a/src/excrypt_aes.c +++ b/src/excrypt_aes.c @@ -1,5 +1,8 @@ #include -#include //for intrinsics for AES-NI +#ifdef _M_AMD64 + #include //for intrinsics for AES-NI +#endif +#include #ifdef __WIN32 #include // cpuid @@ -20,7 +23,64 @@ typedef void(*rijndaelCrypt_fn)(const uint32_t*, int, const uint8_t*, uint8_t*); rijndaelCrypt_fn AesEnc = rijndaelEncrypt; rijndaelCrypt_fn AesDec = rijndaelDecrypt; +#include + +void rijndaelDecrypt_ARMv8(const uint32_t* rk, int nrounds, const uint8_t* ciphertext, uint8_t* plaintext) +{ + const uint8_t* round_keys = (const uint8_t*)rk; + uint8x16_t block = vld1q_u8(ciphertext); + + // Initial AddRoundKey + block = veorq_u8(block, vld1q_u8(round_keys)); + round_keys += 16; + + // Main rounds + for (int round = 1; round < nrounds; ++round) + { + block = vaesdq_u8(block, vld1q_u8(round_keys)); + block = vaesimcq_u8(block); + round_keys += 16; + } + + // Final round (no InvMixColumns) + block = vaesdq_u8(block, vld1q_u8(round_keys)); + round_keys += 16; + block = veorq_u8(block, vld1q_u8(round_keys)); + + vst1q_u8(plaintext, block); +} + +#include + +void rijndaelEncrypt_ARMv8(const uint32_t* rk, int nrounds, const uint8_t* plaintext, uint8_t* ciphertext) +{ + const uint8_t* round_keys = (const uint8_t*)rk; + uint8x16_t block = vld1q_u8(plaintext); + + // Initial AddRoundKey + block = veorq_u8(block, vld1q_u8(round_keys)); + round_keys += 16; + + // Main rounds + for (int round = 1; round < nrounds; ++round) + { + block = vaeseq_u8(block, vld1q_u8(round_keys)); + block = vaesmcq_u8(block); + round_keys += 16; + } + + // Final round (no MixColumns) + block = vaeseq_u8(block, vld1q_u8(round_keys)); + round_keys += 16; + block = veorq_u8(block, vld1q_u8(round_keys)); + + vst1q_u8(ciphertext, block); +} + + + /* AESNI code based on https://gist.github.com/acapola/d5b940da024080dfaf5f */ +#ifdef _M_AMD64 void rijndaelEncrypt_AESNI(const uint32_t* rk, int nrounds, const uint8_t* plaintext, uint8_t* ciphertext) { __m128i block = _mm_loadu_si128((const __m128i*)plaintext); @@ -60,9 +120,11 @@ void rijndaelDecrypt_AESNI(const uint32_t* rk, int nrounds, const uint8_t* ciphe _mm_storeu_si128((__m128i*)plaintext, block); } +#endif #define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon)) +#ifdef _M_AMD64 static __m128i aes_128_key_expansion(__m128i key, __m128i keygened) { keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3, 3, 3, 3)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); @@ -70,13 +132,14 @@ static __m128i aes_128_key_expansion(__m128i key, __m128i keygened) { key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); return _mm_xor_si128(key, keygened); } +#endif static int aesni_supported = 0; int aesni_get_supported() { #ifndef _M_AMD64 return 0; // AES-NI only works properly in x64? -#endif +#else int regs[4]; __cpuid(regs, 1); aesni_supported = (regs[2] >> 25) & 1; @@ -87,10 +150,16 @@ int aesni_get_supported() AesDec = rijndaelDecrypt_AESNI; } return aesni_supported; +#endif } void ExCryptAesKey(EXCRYPT_AES_STATE* state, const uint8_t* key) { +#ifndef _M_AMD64 + rijndaelSetupEncrypt((uint32_t*)state->keytabenc, key, 128); + memcpy(state->keytabdec, state->keytabenc, sizeof(state->keytabdec)); + rijndaelSetupDecrypt((uint32_t*)state->keytabdec, key, 128); +#else if (aesni_supported || aesni_get_supported()) { __m128i* enc_table = (__m128i*)state->keytabenc; @@ -127,6 +196,7 @@ void ExCryptAesKey(EXCRYPT_AES_STATE* state, const uint8_t* key) memcpy(state->keytabdec, state->keytabenc, sizeof(state->keytabdec)); rijndaelSetupDecrypt((uint32_t*)state->keytabdec, key, 128); } +#endif } void ExCryptAesEcb(const EXCRYPT_AES_STATE* state, const uint8_t* input, uint8_t* output, uint8_t encrypt)