Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 72 additions & 2 deletions src/excrypt_aes.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#include <string.h>
#include <wmmintrin.h> //for intrinsics for AES-NI
#ifdef _M_AMD64
#include <wmmintrin.h> //for intrinsics for AES-NI
#endif
#include <arm_neon.h>

#ifdef __WIN32
#include <intrin.h> // cpuid
Expand All @@ -20,7 +23,64 @@ typedef void(*rijndaelCrypt_fn)(const uint32_t*, int, const uint8_t*, uint8_t*);
rijndaelCrypt_fn AesEnc = rijndaelEncrypt;
rijndaelCrypt_fn AesDec = rijndaelDecrypt;

#include <arm_neon.h>

void rijndaelDecrypt_ARMv8(const uint32_t* rk, int nrounds, const uint8_t* ciphertext, uint8_t* plaintext)
{
const uint8_t* round_keys = (const uint8_t*)rk;
uint8x16_t block = vld1q_u8(ciphertext);

// Initial AddRoundKey
block = veorq_u8(block, vld1q_u8(round_keys));
round_keys += 16;

// Main rounds
for (int round = 1; round < nrounds; ++round)
{
block = vaesdq_u8(block, vld1q_u8(round_keys));
block = vaesimcq_u8(block);
round_keys += 16;
}

// Final round (no InvMixColumns)
block = vaesdq_u8(block, vld1q_u8(round_keys));
round_keys += 16;
block = veorq_u8(block, vld1q_u8(round_keys));

vst1q_u8(plaintext, block);
}

#include <arm_neon.h>

void rijndaelEncrypt_ARMv8(const uint32_t* rk, int nrounds, const uint8_t* plaintext, uint8_t* ciphertext)
{
const uint8_t* round_keys = (const uint8_t*)rk;
uint8x16_t block = vld1q_u8(plaintext);

// Initial AddRoundKey
block = veorq_u8(block, vld1q_u8(round_keys));
round_keys += 16;

// Main rounds
for (int round = 1; round < nrounds; ++round)
{
block = vaeseq_u8(block, vld1q_u8(round_keys));
block = vaesmcq_u8(block);
round_keys += 16;
}

// Final round (no MixColumns)
block = vaeseq_u8(block, vld1q_u8(round_keys));
round_keys += 16;
block = veorq_u8(block, vld1q_u8(round_keys));

vst1q_u8(ciphertext, block);
}



/* AESNI code based on https://gist.github.com/acapola/d5b940da024080dfaf5f */
#ifdef _M_AMD64
void rijndaelEncrypt_AESNI(const uint32_t* rk, int nrounds, const uint8_t* plaintext, uint8_t* ciphertext)
{
__m128i block = _mm_loadu_si128((const __m128i*)plaintext);
Expand Down Expand Up @@ -60,23 +120,26 @@ void rijndaelDecrypt_AESNI(const uint32_t* rk, int nrounds, const uint8_t* ciphe

_mm_storeu_si128((__m128i*)plaintext, block);
}
#endif

#define AES_128_key_exp(k, rcon) aes_128_key_expansion(k, _mm_aeskeygenassist_si128(k, rcon))

#ifdef _M_AMD64
static __m128i aes_128_key_expansion(__m128i key, __m128i keygened) {
keygened = _mm_shuffle_epi32(keygened, _MM_SHUFFLE(3, 3, 3, 3));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
return _mm_xor_si128(key, keygened);
}
#endif

static int aesni_supported = 0;
int aesni_get_supported()
{
#ifndef _M_AMD64
return 0; // AES-NI only works properly in x64?
#endif
#else
int regs[4];
__cpuid(regs, 1);
aesni_supported = (regs[2] >> 25) & 1;
Expand All @@ -87,10 +150,16 @@ int aesni_get_supported()
AesDec = rijndaelDecrypt_AESNI;
}
return aesni_supported;
#endif
}

void ExCryptAesKey(EXCRYPT_AES_STATE* state, const uint8_t* key)
{
#ifndef _M_AMD64
rijndaelSetupEncrypt((uint32_t*)state->keytabenc, key, 128);
memcpy(state->keytabdec, state->keytabenc, sizeof(state->keytabdec));
rijndaelSetupDecrypt((uint32_t*)state->keytabdec, key, 128);
#else
if (aesni_supported || aesni_get_supported())
{
__m128i* enc_table = (__m128i*)state->keytabenc;
Expand Down Expand Up @@ -127,6 +196,7 @@ void ExCryptAesKey(EXCRYPT_AES_STATE* state, const uint8_t* key)
memcpy(state->keytabdec, state->keytabenc, sizeof(state->keytabdec));
rijndaelSetupDecrypt((uint32_t*)state->keytabdec, key, 128);
}
#endif
}

void ExCryptAesEcb(const EXCRYPT_AES_STATE* state, const uint8_t* input, uint8_t* output, uint8_t encrypt)
Expand Down