| 1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
 | /* Copyright (C) 2000-2009 Peter Selinger.
   This file is part of ccrypt. It is free software and it is covered
   by the GNU general public license. See the file COPYING for details. */
/* rijndael.c - optimized version of the Rijndeal cipher */
/* $Id: rijndael.c 258 2009-08-26 17:46:10Z selinger $ */
/* derived from original source: rijndael-alg-ref.c   v2.0   August '99
 * Reference ANSI C code for NIST competition
 * authors: Paulo Barreto
 *          Vincent Rijmen
 */
#ifndef AO_AES_TEST
#include <ao.h>
#endif
#include <ao_aes.h>
#include "ao_aes_int.h"
static const int xshifts[3][2][4] = {
  {{0, 1, 2, 3},
   {0, 3, 2, 1}},
  {{0, 1, 2, 3},
   {0, 5, 4, 3}},
  {{0, 1, 3, 4},
   {0, 7, 5, 4}},
};
/* Exor corresponding text input and round key input bytes */
/* the result is written to res, which can be the same as a */
static inline void xKeyAddition(word32 res[MAXBC], word32 a[MAXBC],
			 word32 rk[MAXBC], int BC)
{
  int j;
  for (j = 0; j < BC; j++) {
    res[j] = a[j] ^ rk[j];
  }
}
#if 0				/* code included for reference */
/* shift rows a, return result in res. This avoids having to copy a
   tmp array back to a. res must not be a. */
static inline void xShiftRow(word32 res[MAXBC], word32 a[MAXBC], int shift[4],
		      int BC)
{
  word8 (*a8)[4] = (word8 (*)[4]) a;
  word8 (*res8)[4] = (word8 (*)[4]) res;
  /* Row 0 remains unchanged
   * The other three rows are shifted a variable amount
   */
  int i, j;
  int s;
  for (j = 0; j < BC; j++) {
    res8[j][0] = a8[j][0];
  }
  for (i = 1; i < 4; i++) {
    s = shift[i];
    for (j = 0; j < BC; j++) {
      res8[j][i] = a8[(j + s) % BC][i];
    }
  }
}
static inline void xSubstitution(word32 a[MAXBC], word8 box[256], int BC)
{
  word8 (*a8)[4] = (word8 (*)[4]) a;
  /* Replace every byte of the input by the byte at that place
   * in the nonlinear S-box
   */
  int i, j;
  for (i = 0; i < 4; i++) {
    for (j = 0; j < BC; j++) {
      a8[j][i] = box[a[j][i]];
    }
  }
}
#endif				/* code included for reference */
/* profiling shows that the ccrypt program spends about 50% of its
   time in the function xShiftSubst. Splitting the inner "for"
   statement into two parts - versus using the expensive "%" modulo
   operation, makes this function about 44% faster, thereby making the
   entire program about 28% faster. With -O3 optimization, the time
   savings are even more dramatic - ccrypt runs between 55% and 65%
   faster on most platforms. */
/* do ShiftRow and Substitution together. res must not be a. */
static inline void xShiftSubst(word32 res[MAXBC], word32 a[MAXBC],
			int shift[4], int BC, const word8 box[256])
{
  int i, j;
  int s;
  word8 (*a8)[4] = (word8 (*)[4]) a;
  word8 (*res8)[4] = (word8 (*)[4]) res;
  for (j = 0; j < BC; j++) {
    res8[j][0] = box[a8[j][0]];
  }
  for (i = 1; i < 4; i++) {
    s = shift[i];
    for (j = 0; j < BC - s; j++) {
      res8[j][i] = box[a8[(j + s)][i]];
    }
    for (j = BC - s; j < BC; j++) {
      res8[j][i] = box[a8[(j + s) - BC][i]];
    }
  }
}
#if 0				/* code included for reference */
/* Mix the four bytes of every column in a linear way */
/* the result is written to res, which may equal a */
static inline void xMixColumn(word32 res[MAXBC], word32 a[MAXBC], int BC)
{
  int j;
  word32 b;
  word8 (*a8)[4] = (word8 (*)[4]) a;
  for (j = 0; j < BC; j++) {
    b = M0[0][a8[j][0]].w32;
    b ^= M0[1][a8[j][1]].w32;
    b ^= M0[2][a8[j][2]].w32;
    b ^= M0[3][a8[j][3]].w32;
    res[j] = b;
  }
}
#endif				/* code included for reference */
/* do MixColumn and KeyAddition together */
static inline void xMixAdd(word32 res[MAXBC], word32 a[MAXBC],
		    word32 rk[MAXBC], int BC)
{
  int j;
  word32 b;
  word8 (*a8)[4] = (word8 (*)[4]) a;
  for (j = 0; j < BC; j++) {
    b = M0[0][a8[j][0]].w32;
    b ^= M0[1][a8[j][1]].w32;
    b ^= M0[2][a8[j][2]].w32;
    b ^= M0[3][a8[j][3]].w32;
    b ^= rk[j];
    res[j] = b;
  }
}
/* Mix the four bytes of every column in a linear way
 * This is the opposite operation of xMixColumn */
/* the result is written to res, which may equal a */
static inline void xInvMixColumn(word32 res[MAXBC], word32 a[MAXBC], int BC)
{
  int j;
  word32 b;
  word8 (*a8)[4] = (word8 (*)[4]) a;
  for (j = 0; j < BC; j++) {
    b = M1[0][a8[j][0]].w32;
    b ^= M1[1][a8[j][1]].w32;
    b ^= M1[2][a8[j][2]].w32;
    b ^= M1[3][a8[j][3]].w32;
    res[j] = b;
  }
}
#if 0				/* code included for reference */
/* do KeyAddition and InvMixColumn together */
static inline void xAddInvMix(word32 res[MAXBC], word32 a[MAXBC],
		       word32 rk[MAXBC], int BC)
{
  int j;
  word32 b;
  word8 (*a8)[4] = (word8 (*)[4]) a;
  for (j = 0; j < BC; j++) {
    a[j] = a[j] ^ rk[j];
    b = M1[0][a8[j][0]].w32;
    b ^= M1[1][a8[j][1]].w32;
    b ^= M1[2][a8[j][2]].w32;
    b ^= M1[3][a8[j][3]].w32;
    res[j] = b;
  }
}
#endif				/* code included for reference */
static
int xrijndaelKeySched(word32 key[], int keyBits, int blockBits,
		      roundkey *rkk)
{
  /* Calculate the necessary round keys
   * The number of calculations depends on keyBits and blockBits */
  int KC, BC, ROUNDS;
  int i, j, t, rconpointer = 0;
  word8 (*k8)[4] = (word8 (*)[4]) key;
  switch (keyBits) {
  case 128:
    KC = 4;
    break;
  case 192:
    KC = 6;
    break;
  case 256:
    KC = 8;
    break;
  default:
    return -1;
  }
  switch (blockBits) {
  case 128:
    BC = 4;
    break;
  case 192:
    BC = 6;
    break;
  case 256:
    BC = 8;
    break;
  default:
    return -2;
  }
  ROUNDS = KC > BC ? KC + 6 : BC + 6;
  t = 0;
  /* copy values into round key array */
  for (j = 0; (j < KC) && (t < (ROUNDS + 1) * BC); j++, t++)
    rkk->rk[t] = key[j];
  while (t < (ROUNDS + 1) * BC) {  /* while not enough round key material */
    /* calculate new values */
    for (i = 0; i < 4; i++) {
      k8[0][i] ^= xS[k8[KC - 1][(i + 1) % 4]];
    }
    k8[0][0] ^= xrcon[rconpointer++];
    if (KC != 8) {
      for (j = 1; j < KC; j++) {
	key[j] ^= key[j - 1];
      }
    } else {
      for (j = 1; j < 4; j++) {
	key[j] ^= key[j - 1];
      }
      for (i = 0; i < 4; i++) {
	k8[4][i] ^= xS[k8[3][i]];
      }
      for (j = 5; j < 8; j++) {
	key[j] ^= key[j - 1];
      }
    }
    /* copy values into round key array */
    for (j = 0; (j < KC) && (t < (ROUNDS + 1) * BC); j++, t++) {
      rkk->rk[t] = key[j];
    }
  }
  /* make roundkey structure */
  rkk->BC = BC;
  rkk->KC = KC;
  rkk->ROUNDS = ROUNDS;
  for (i = 0; i < 2; i++) {
    for (j = 0; j < 4; j++) {
      rkk->shift[i][j] = xshifts[(BC - 4) >> 1][i][j];
    }
  }
  return 0;
}
/* Encryption of one block. */
static
void xrijndaelEncrypt(word32 block[], roundkey *rkk)
{
  word32 block2[MAXBC];		/* hold intermediate result */
  int r;
  int *shift = rkk->shift[0];
  int BC = rkk->BC;
  int ROUNDS = rkk->ROUNDS;
  word32 *rp = rkk->rk;
  /* begin with a key addition */
  xKeyAddition(block, block, rp, BC);
  rp += BC;
  /* ROUNDS-1 ordinary rounds */
  for (r = 1; r < ROUNDS; r++) {
    xShiftSubst(block2, block, shift, BC, xS);
    xMixAdd(block, block2, rp, BC);
    rp += BC;
  }
  /* Last round is special: there is no xMixColumn */
  xShiftSubst(block2, block, shift, BC, xS);
  xKeyAddition(block, block2, rp, BC);
}
#if NOTUSED
/* We don't actually need this in AltOS, so don't bother including it */
/* Decryption of one block. */
static
void xrijndaelDecrypt(word32 block[], roundkey *rkk)
{
  word32 block2[MAXBC];		/* hold intermediate result */
  int r;
  int *shift = rkk->shift[1];
  int BC = rkk->BC;
  int ROUNDS = rkk->ROUNDS;
  word32 *rp = rkk->rk + ROUNDS * BC;
  /* To decrypt: apply the inverse operations of the encrypt routine,
   *             in opposite order
   * 
   * (xKeyAddition is an involution: it's equal to its inverse)
   * (the inverse of xSubstitution with table S is xSubstitution with the 
   * inverse table of S)
   * (the inverse of xShiftRow is xShiftRow over a suitable distance)
   */
  /* First the special round:
   *   without xInvMixColumn
   *   with extra xKeyAddition
   */
  xKeyAddition(block2, block, rp, BC);
  xShiftSubst(block, block2, shift, BC, xSi);
  rp -= BC;
  /* ROUNDS-1 ordinary rounds
   */
  for (r = ROUNDS - 1; r > 0; r--) {
    xKeyAddition(block, block, rp, BC);
    xInvMixColumn(block2, block, BC);
    xShiftSubst(block, block2, shift, BC, xSi);
    rp -= BC;
  }
  /* End with the extra key addition
   */
  xKeyAddition(block, block, rp, BC);
}
#endif
uint8_t ao_aes_mutex;
static word32 key[16/4];
static roundkey	rkk;
static word32 iv[16/4];
void
ao_aes_set_mode(enum ao_aes_mode mode)
{
	(void) mode;
	/* we only do CBC_MAC anyways... */
}
void
ao_aes_set_key(__xdata uint8_t *in)
{
	memcpy(key, in, 16);
	xrijndaelKeySched((word32 *) key, 128, 128, &rkk);
}
void
ao_aes_zero_iv(void)
{
	memset(iv, '\0', sizeof (iv));
}
void
ao_aes_run(__xdata uint8_t *in,
	   __xdata uint8_t *out)
{
	uint8_t	i;
	uint8_t *_iv = (uint8_t *) iv;
	for (i = 0; i < 16; i++)
		_iv[i] ^= in[i];
	xrijndaelEncrypt(iv, &rkk);
	if (out)
		memcpy(out, iv, 16);
}
void
ao_aes_init(void)
{
}
 |