/* Copyright (C) 2000-2009 Peter Selinger. This file is part of ccrypt. It is free software and it is covered by the GNU general public license. See the file COPYING for details. */ /* rijndael.c - optimized version of the Rijndeal cipher */ /* $Id: rijndael.c 258 2009-08-26 17:46:10Z selinger $ */ /* derived from original source: rijndael-alg-ref.c v2.0 August '99 * Reference ANSI C code for NIST competition * authors: Paulo Barreto * Vincent Rijmen */ #ifndef AO_AES_TEST #include #endif #include #include "ao_aes_int.h" static const int xshifts[3][2][4] = { {{0, 1, 2, 3}, {0, 3, 2, 1}}, {{0, 1, 2, 3}, {0, 5, 4, 3}}, {{0, 1, 3, 4}, {0, 7, 5, 4}}, }; /* Exor corresponding text input and round key input bytes */ /* the result is written to res, which can be the same as a */ static inline void xKeyAddition(word32 res[MAXBC], word32 a[MAXBC], word32 rk[MAXBC], int BC) { int j; for (j = 0; j < BC; j++) { res[j] = a[j] ^ rk[j]; } } #if 0 /* code included for reference */ /* shift rows a, return result in res. This avoids having to copy a tmp array back to a. res must not be a. */ static inline void xShiftRow(word32 res[MAXBC], word32 a[MAXBC], int shift[4], int BC) { word8 (*a8)[4] = (word8 (*)[4]) a; word8 (*res8)[4] = (word8 (*)[4]) res; /* Row 0 remains unchanged * The other three rows are shifted a variable amount */ int i, j; int s; for (j = 0; j < BC; j++) { res8[j][0] = a8[j][0]; } for (i = 1; i < 4; i++) { s = shift[i]; for (j = 0; j < BC; j++) { res8[j][i] = a8[(j + s) % BC][i]; } } } static inline void xSubstitution(word32 a[MAXBC], word8 box[256], int BC) { word8 (*a8)[4] = (word8 (*)[4]) a; /* Replace every byte of the input by the byte at that place * in the nonlinear S-box */ int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < BC; j++) { a8[j][i] = box[a[j][i]]; } } } #endif /* code included for reference */ /* profiling shows that the ccrypt program spends about 50% of its time in the function xShiftSubst. Splitting the inner "for" statement into two parts - versus using the expensive "%" modulo operation, makes this function about 44% faster, thereby making the entire program about 28% faster. With -O3 optimization, the time savings are even more dramatic - ccrypt runs between 55% and 65% faster on most platforms. */ /* do ShiftRow and Substitution together. res must not be a. */ static inline void xShiftSubst(word32 res[MAXBC], word32 a[MAXBC], int shift[4], int BC, const word8 box[256]) { int i, j; int s; word8 (*a8)[4] = (word8 (*)[4]) a; word8 (*res8)[4] = (word8 (*)[4]) res; for (j = 0; j < BC; j++) { res8[j][0] = box[a8[j][0]]; } for (i = 1; i < 4; i++) { s = shift[i]; for (j = 0; j < BC - s; j++) { res8[j][i] = box[a8[(j + s)][i]]; } for (j = BC - s; j < BC; j++) { res8[j][i] = box[a8[(j + s) - BC][i]]; } } } #if 0 /* code included for reference */ /* Mix the four bytes of every column in a linear way */ /* the result is written to res, which may equal a */ static inline void xMixColumn(word32 res[MAXBC], word32 a[MAXBC], int BC) { int j; word32 b; word8 (*a8)[4] = (word8 (*)[4]) a; for (j = 0; j < BC; j++) { b = M0[0][a8[j][0]].w32; b ^= M0[1][a8[j][1]].w32; b ^= M0[2][a8[j][2]].w32; b ^= M0[3][a8[j][3]].w32; res[j] = b; } } #endif /* code included for reference */ /* do MixColumn and KeyAddition together */ static inline void xMixAdd(word32 res[MAXBC], word32 a[MAXBC], word32 rk[MAXBC], int BC) { int j; word32 b; word8 (*a8)[4] = (word8 (*)[4]) a; for (j = 0; j < BC; j++) { b = M0[0][a8[j][0]].w32; b ^= M0[1][a8[j][1]].w32; b ^= M0[2][a8[j][2]].w32; b ^= M0[3][a8[j][3]].w32; b ^= rk[j]; res[j] = b; } } /* Mix the four bytes of every column in a linear way * This is the opposite operation of xMixColumn */ /* the result is written to res, which may equal a */ static inline void xInvMixColumn(word32 res[MAXBC], word32 a[MAXBC], int BC) { int j; word32 b; word8 (*a8)[4] = (word8 (*)[4]) a; for (j = 0; j < BC; j++) { b = M1[0][a8[j][0]].w32; b ^= M1[1][a8[j][1]].w32; b ^= M1[2][a8[j][2]].w32; b ^= M1[3][a8[j][3]].w32; res[j] = b; } } #if 0 /* code included for reference */ /* do KeyAddition and InvMixColumn together */ static inline void xAddInvMix(word32 res[MAXBC], word32 a[MAXBC], word32 rk[MAXBC], int BC) { int j; word32 b; word8 (*a8)[4] = (word8 (*)[4]) a; for (j = 0; j < BC; j++) { a[j] = a[j] ^ rk[j]; b = M1[0][a8[j][0]].w32; b ^= M1[1][a8[j][1]].w32; b ^= M1[2][a8[j][2]].w32; b ^= M1[3][a8[j][3]].w32; res[j] = b; } } #endif /* code included for reference */ static int xrijndaelKeySched(word32 key[], int keyBits, int blockBits, roundkey *rkk) { /* Calculate the necessary round keys * The number of calculations depends on keyBits and blockBits */ int KC, BC, ROUNDS; int i, j, t, rconpointer = 0; word8 (*k8)[4] = (word8 (*)[4]) key; switch (keyBits) { case 128: KC = 4; break; case 192: KC = 6; break; case 256: KC = 8; break; default: return -1; } switch (blockBits) { case 128: BC = 4; break; case 192: BC = 6; break; case 256: BC = 8; break; default: return -2; } ROUNDS = KC > BC ? KC + 6 : BC + 6; t = 0; /* copy values into round key array */ for (j = 0; (j < KC) && (t < (ROUNDS + 1) * BC); j++, t++) rkk->rk[t] = key[j]; while (t < (ROUNDS + 1) * BC) { /* while not enough round key material */ /* calculate new values */ for (i = 0; i < 4; i++) { k8[0][i] ^= xS[k8[KC - 1][(i + 1) % 4]]; } k8[0][0] ^= xrcon[rconpointer++]; if (KC != 8) { for (j = 1; j < KC; j++) { key[j] ^= key[j - 1]; } } else { for (j = 1; j < 4; j++) { key[j] ^= key[j - 1]; } for (i = 0; i < 4; i++) { k8[4][i] ^= xS[k8[3][i]]; } for (j = 5; j < 8; j++) { key[j] ^= key[j - 1]; } } /* copy values into round key array */ for (j = 0; (j < KC) && (t < (ROUNDS + 1) * BC); j++, t++) { rkk->rk[t] = key[j]; } } /* make roundkey structure */ rkk->BC = BC; rkk->KC = KC; rkk->ROUNDS = ROUNDS; for (i = 0; i < 2; i++) { for (j = 0; j < 4; j++) { rkk->shift[i][j] = xshifts[(BC - 4) >> 1][i][j]; } } return 0; } /* Encryption of one block. */ static void xrijndaelEncrypt(word32 block[], roundkey *rkk) { word32 block2[MAXBC]; /* hold intermediate result */ int r; int *shift = rkk->shift[0]; int BC = rkk->BC; int ROUNDS = rkk->ROUNDS; word32 *rp = rkk->rk; /* begin with a key addition */ xKeyAddition(block, block, rp, BC); rp += BC; /* ROUNDS-1 ordinary rounds */ for (r = 1; r < ROUNDS; r++) { xShiftSubst(block2, block, shift, BC, xS); xMixAdd(block, block2, rp, BC); rp += BC; } /* Last round is special: there is no xMixColumn */ xShiftSubst(block2, block, shift, BC, xS); xKeyAddition(block, block2, rp, BC); } #if NOTUSED /* We don't actually need this in AltOS, so don't bother including it */ /* Decryption of one block. */ static void xrijndaelDecrypt(word32 block[], roundkey *rkk) { word32 block2[MAXBC]; /* hold intermediate result */ int r; int *shift = rkk->shift[1]; int BC = rkk->BC; int ROUNDS = rkk->ROUNDS; word32 *rp = rkk->rk + ROUNDS * BC; /* To decrypt: apply the inverse operations of the encrypt routine, * in opposite order * * (xKeyAddition is an involution: it's equal to its inverse) * (the inverse of xSubstitution with table S is xSubstitution with the * inverse table of S) * (the inverse of xShiftRow is xShiftRow over a suitable distance) */ /* First the special round: * without xInvMixColumn * with extra xKeyAddition */ xKeyAddition(block2, block, rp, BC); xShiftSubst(block, block2, shift, BC, xSi); rp -= BC; /* ROUNDS-1 ordinary rounds */ for (r = ROUNDS - 1; r > 0; r--) { xKeyAddition(block, block, rp, BC); xInvMixColumn(block2, block, BC); xShiftSubst(block, block2, shift, BC, xSi); rp -= BC; } /* End with the extra key addition */ xKeyAddition(block, block, rp, BC); } #endif uint8_t ao_aes_mutex; static word32 key[16/4]; static roundkey rkk; static word32 iv[16/4]; void ao_aes_set_mode(enum ao_aes_mode mode) { (void) mode; /* we only do CBC_MAC anyways... */ } void ao_aes_set_key(__xdata uint8_t *in) { memcpy(key, in, 16); xrijndaelKeySched((word32 *) key, 128, 128, &rkk); } void ao_aes_zero_iv(void) { memset(iv, '\0', sizeof (iv)); } void ao_aes_run(__xdata uint8_t *in, __xdata uint8_t *out) { uint8_t i; uint8_t *_iv = (uint8_t *) iv; for (i = 0; i < 16; i++) _iv[i] ^= in[i]; xrijndaelEncrypt(iv, &rkk); if (out) memcpy(out, iv, 16); } void ao_aes_init(void) { }