aes: replace deprecated prog_uint* types
[aversive.git] / modules / crypto / aes / aes_core.c
1 /*  
2  *  Copyright Droids Corporation, Microb Technology, Eirbot (2005)
3  * 
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  *
18  */
19
20 /*
21  * This code is mainly from rijndael-alg-fst.c
22  *
23  * @version 3.0 (December 2000)
24  *
25  * Optimised ANSI C code for the Rijndael cipher (now AES)
26  *
27  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
28  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
29  * @author Paulo Barreto <paulo.barreto@terra.com.br>
30  *
31  */
32
33 /* Optimized to use AVR flash to store big tables */
34
35 #ifndef HOST_VERSION
36 #define USE_PGMMEM
37 #endif
38
39 #ifndef AES_DEBUG
40 # ifndef NDEBUG
41 #  define NDEBUG
42 # endif
43 #endif
44 #include <assert.h>
45 #include <stdint.h>
46
47 #include <aversive.h>
48 #include "aes_locl.h"
49 #include "aes.h"
50 #include "aes_locl.h"
51 #include <aversive/pgmspace.h>
52
53 /*
54 Te0[x] = S [x].[02, 01, 01, 03];
55 Te1[x] = S [x].[03, 02, 01, 01];
56 Te2[x] = S [x].[01, 03, 02, 01];
57 Te3[x] = S [x].[01, 01, 03, 02];
58 Te4[x] = S [x].[01, 01, 01, 01];
59
60 Td0[x] = Si[x].[0e, 09, 0d, 0b];
61 Td1[x] = Si[x].[0b, 0e, 09, 0d];
62 Td2[x] = Si[x].[0d, 0b, 0e, 09];
63 Td3[x] = Si[x].[09, 0d, 0b, 0e];
64 Td4[x] = Si[x].[01, 01, 01, 01];
65 */
66
67
68 #define ROR_U32_1(a) ( (((a)&0xff)<<24) ^ ((((a)>>8)&0xff)<<0)  ^ ((((a)>>16)&0xff)<<8) ^ (((a)>>24)<<16) )
69 #define ROR_U32_2(a) ( (((a)&0xff)<<16) ^ ((((a)>>8)&0xff)<<24) ^ ((((a)>>16)&0xff)<<0) ^ (((a)>>24)<<8)  )
70 #define ROR_U32_3(a) ( (((a)&0xff)<<8)  ^ ((((a)>>8)&0xff)<<16) ^ ((((a)>>16)&0xff)<<24)^ (((a)>>24)<<0)  )
71
72
73 #ifdef USE_PGMMEM
74 __attribute__((progmem)) const uint32_t Te0_[256] = {
75 #else
76 static const uint32_t Te0_[256] = {
77 #endif
78     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
79     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
80     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
81     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
82     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
83     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
84     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
85     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
86     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
87     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
88     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
89     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
90     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
91     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
92     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
93     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
94     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
95     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
96     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
97     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
98     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
99     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
100     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
101     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
102     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
103     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
104     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
105     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
106     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
107     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
108     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
109     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
110     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
111     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
112     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
113     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
114     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
115     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
116     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
117     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
118     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
119     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
120     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
121     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
122     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
123     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
124     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
125     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
126     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
127     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
128     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
129     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
130     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
131     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
132     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
133     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
134     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
135     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
136     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
137     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
138     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
139     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
140     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
141     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
142 };
143
144 #ifdef USE_PGMMEM
145 static inline uint32_t Te0(int i)
146 {
147         uint32_t tmp;
148         memcpy_P(&tmp, Te0_+i, sizeof(uint32_t));
149         return tmp;
150 }
151 #else
152 #define Te0(x) Te0_[x]
153 #endif
154
155 #ifdef USE_PGMMEM
156 __attribute__((progmem)) const uint8_t Te4_[256] = {
157 #else
158 static const u8 Te4_[256] = {
159 #endif
160     0x63U, 0x7cU, 0x77U, 0x7bU,
161     0xf2U, 0x6bU, 0x6fU, 0xc5U,
162     0x30U, 0x01U, 0x67U, 0x2bU,
163     0xfeU, 0xd7U, 0xabU, 0x76U,
164     0xcaU, 0x82U, 0xc9U, 0x7dU,
165     0xfaU, 0x59U, 0x47U, 0xf0U,
166     0xadU, 0xd4U, 0xa2U, 0xafU,
167     0x9cU, 0xa4U, 0x72U, 0xc0U,
168     0xb7U, 0xfdU, 0x93U, 0x26U,
169     0x36U, 0x3fU, 0xf7U, 0xccU,
170     0x34U, 0xa5U, 0xe5U, 0xf1U,
171     0x71U, 0xd8U, 0x31U, 0x15U,
172     0x04U, 0xc7U, 0x23U, 0xc3U,
173     0x18U, 0x96U, 0x05U, 0x9aU,
174     0x07U, 0x12U, 0x80U, 0xe2U,
175     0xebU, 0x27U, 0xb2U, 0x75U,
176     0x09U, 0x83U, 0x2cU, 0x1aU,
177     0x1bU, 0x6eU, 0x5aU, 0xa0U,
178     0x52U, 0x3bU, 0xd6U, 0xb3U,
179     0x29U, 0xe3U, 0x2fU, 0x84U,
180     0x53U, 0xd1U, 0x00U, 0xedU,
181     0x20U, 0xfcU, 0xb1U, 0x5bU,
182     0x6aU, 0xcbU, 0xbeU, 0x39U,
183     0x4aU, 0x4cU, 0x58U, 0xcfU,
184     0xd0U, 0xefU, 0xaaU, 0xfbU,
185     0x43U, 0x4dU, 0x33U, 0x85U,
186     0x45U, 0xf9U, 0x02U, 0x7fU,
187     0x50U, 0x3cU, 0x9fU, 0xa8U,
188     0x51U, 0xa3U, 0x40U, 0x8fU,
189     0x92U, 0x9dU, 0x38U, 0xf5U,
190     0xbcU, 0xb6U, 0xdaU, 0x21U,
191     0x10U, 0xffU, 0xf3U, 0xd2U,
192     0xcdU, 0x0cU, 0x13U, 0xecU,
193     0x5fU, 0x97U, 0x44U, 0x17U,
194     0xc4U, 0xa7U, 0x7eU, 0x3dU,
195     0x64U, 0x5dU, 0x19U, 0x73U,
196     0x60U, 0x81U, 0x4fU, 0xdcU,
197     0x22U, 0x2aU, 0x90U, 0x88U,
198     0x46U, 0xeeU, 0xb8U, 0x14U,
199     0xdeU, 0x5eU, 0x0bU, 0xdbU,
200     0xe0U, 0x32U, 0x3aU, 0x0aU,
201     0x49U, 0x06U, 0x24U, 0x5cU,
202     0xc2U, 0xd3U, 0xacU, 0x62U,
203     0x91U, 0x95U, 0xe4U, 0x79U,
204     0xe7U, 0xc8U, 0x37U, 0x6dU,
205     0x8dU, 0xd5U, 0x4eU, 0xa9U,
206     0x6cU, 0x56U, 0xf4U, 0xeaU,
207     0x65U, 0x7aU, 0xaeU, 0x08U,
208     0xbaU, 0x78U, 0x25U, 0x2eU,
209     0x1cU, 0xa6U, 0xb4U, 0xc6U,
210     0xe8U, 0xddU, 0x74U, 0x1fU,
211     0x4bU, 0xbdU, 0x8bU, 0x8aU,
212     0x70U, 0x3eU, 0xb5U, 0x66U,
213     0x48U, 0x03U, 0xf6U, 0x0eU,
214     0x61U, 0x35U, 0x57U, 0xb9U,
215     0x86U, 0xc1U, 0x1dU, 0x9eU,
216     0xe1U, 0xf8U, 0x98U, 0x11U,
217     0x69U, 0xd9U, 0x8eU, 0x94U,
218     0x9bU, 0x1eU, 0x87U, 0xe9U,
219     0xceU, 0x55U, 0x28U, 0xdfU,
220     0x8cU, 0xa1U, 0x89U, 0x0dU,
221     0xbfU, 0xe6U, 0x42U, 0x68U,
222     0x41U, 0x99U, 0x2dU, 0x0fU,
223     0xb0U, 0x54U, 0xbbU, 0x16U,
224 };
225 #ifdef USE_PGMMEM
226 static inline u8 Te4(int i)
227 {
228         u8 tmp;
229         memcpy_P(&tmp, Te4_+i, sizeof(u8));
230         return tmp;
231 }
232 #else
233 #define Te4(x) Te4_[x]
234 #endif
235
236
237 #ifdef USE_PGMMEM
238 __attribute__((progmem)) const uint32_t Td0_[256] = {
239 #else
240 static const uint32_t Td0_[256] = {
241 #endif
242     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
243     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
244     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
245     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
246     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
247     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
248     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
249     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
250     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
251     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
252     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
253     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
254     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
255     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
256     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
257     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
258     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
259     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
260     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
261     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
262     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
263     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
264     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
265     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
266     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
267     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
268     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
269     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
270     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
271     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
272     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
273     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
274     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
275     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
276     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
277     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
278     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
279     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
280     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
281     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
282     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
283     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
284     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
285     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
286     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
287     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
288     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
289     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
290     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
291     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
292     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
293     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
294     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
295     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
296     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
297     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
298     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
299     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
300     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
301     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
302     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
303     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
304     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
305     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
306 };
307 #ifdef USE_PGMMEM
308 static inline uint32_t Td0(int i)
309 {
310         uint32_t tmp;
311         memcpy_P(&tmp, Td0_+i, sizeof(uint32_t));
312         return tmp;
313 }
314 #else
315 #define Td0(x) Td0_[x]
316 #endif
317
318 #ifdef USE_PGMMEM
319 __attribute__((progmem)) const uint8_t Td4_[256] = {
320 #else
321 static const u8 Td4_[256] = {
322 #endif
323     0x52U, 0x09U, 0x6aU, 0xd5U,
324     0x30U, 0x36U, 0xa5U, 0x38U,
325     0xbfU, 0x40U, 0xa3U, 0x9eU,
326     0x81U, 0xf3U, 0xd7U, 0xfbU,
327     0x7cU, 0xe3U, 0x39U, 0x82U,
328     0x9bU, 0x2fU, 0xffU, 0x87U,
329     0x34U, 0x8eU, 0x43U, 0x44U,
330     0xc4U, 0xdeU, 0xe9U, 0xcbU,
331     0x54U, 0x7bU, 0x94U, 0x32U,
332     0xa6U, 0xc2U, 0x23U, 0x3dU,
333     0xeeU, 0x4cU, 0x95U, 0x0bU,
334     0x42U, 0xfaU, 0xc3U, 0x4eU,
335     0x08U, 0x2eU, 0xa1U, 0x66U,
336     0x28U, 0xd9U, 0x24U, 0xb2U,
337     0x76U, 0x5bU, 0xa2U, 0x49U,
338     0x6dU, 0x8bU, 0xd1U, 0x25U,
339     0x72U, 0xf8U, 0xf6U, 0x64U,
340     0x86U, 0x68U, 0x98U, 0x16U,
341     0xd4U, 0xa4U, 0x5cU, 0xccU,
342     0x5dU, 0x65U, 0xb6U, 0x92U,
343     0x6cU, 0x70U, 0x48U, 0x50U,
344     0xfdU, 0xedU, 0xb9U, 0xdaU,
345     0x5eU, 0x15U, 0x46U, 0x57U,
346     0xa7U, 0x8dU, 0x9dU, 0x84U,
347     0x90U, 0xd8U, 0xabU, 0x00U,
348     0x8cU, 0xbcU, 0xd3U, 0x0aU,
349     0xf7U, 0xe4U, 0x58U, 0x05U,
350     0xb8U, 0xb3U, 0x45U, 0x06U,
351     0xd0U, 0x2cU, 0x1eU, 0x8fU,
352     0xcaU, 0x3fU, 0x0fU, 0x02U,
353     0xc1U, 0xafU, 0xbdU, 0x03U,
354     0x01U, 0x13U, 0x8aU, 0x6bU,
355     0x3aU, 0x91U, 0x11U, 0x41U,
356     0x4fU, 0x67U, 0xdcU, 0xeaU,
357     0x97U, 0xf2U, 0xcfU, 0xceU,
358     0xf0U, 0xb4U, 0xe6U, 0x73U,
359     0x96U, 0xacU, 0x74U, 0x22U,
360     0xe7U, 0xadU, 0x35U, 0x85U,
361     0xe2U, 0xf9U, 0x37U, 0xe8U,
362     0x1cU, 0x75U, 0xdfU, 0x6eU,
363     0x47U, 0xf1U, 0x1aU, 0x71U,
364     0x1dU, 0x29U, 0xc5U, 0x89U,
365     0x6fU, 0xb7U, 0x62U, 0x0eU,
366     0xaaU, 0x18U, 0xbeU, 0x1bU,
367     0xfcU, 0x56U, 0x3eU, 0x4bU,
368     0xc6U, 0xd2U, 0x79U, 0x20U,
369     0x9aU, 0xdbU, 0xc0U, 0xfeU,
370     0x78U, 0xcdU, 0x5aU, 0xf4U,
371     0x1fU, 0xddU, 0xa8U, 0x33U,
372     0x88U, 0x07U, 0xc7U, 0x31U,
373     0xb1U, 0x12U, 0x10U, 0x59U,
374     0x27U, 0x80U, 0xecU, 0x5fU,
375     0x60U, 0x51U, 0x7fU, 0xa9U,
376     0x19U, 0xb5U, 0x4aU, 0x0dU,
377     0x2dU, 0xe5U, 0x7aU, 0x9fU,
378     0x93U, 0xc9U, 0x9cU, 0xefU,
379     0xa0U, 0xe0U, 0x3bU, 0x4dU,
380     0xaeU, 0x2aU, 0xf5U, 0xb0U,
381     0xc8U, 0xebU, 0xbbU, 0x3cU,
382     0x83U, 0x53U, 0x99U, 0x61U,
383     0x17U, 0x2bU, 0x04U, 0x7eU,
384     0xbaU, 0x77U, 0xd6U, 0x26U,
385     0xe1U, 0x69U, 0x14U, 0x63U,
386     0x55U, 0x21U, 0x0cU, 0x7dU,
387 };
388 #ifdef USE_PGMMEM
389 static inline u8 Td4(int i)
390 {
391         u8 tmp;
392         memcpy_P(&tmp, Td4_+i, sizeof(u8));
393         return tmp;
394 }
395 #else
396 #define Td4(x) Td4_[x]
397 #endif
398
399 static const uint32_t rcon[] = {
400         0x01000000, 0x02000000, 0x04000000, 0x08000000,
401         0x10000000, 0x20000000, 0x40000000, 0x80000000,
402         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
403 };
404
405 /**
406  * Expand the cipher key into the encryption key schedule.
407  */
408 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
409                         AES_KEY *key) {
410
411         uint32_t *rk;
412         int i = 0;
413         uint32_t temp;
414
415         if (!userKey || !key)
416                 return -1;
417         if (bits != 128 && bits != 192 && bits != 256)
418                 return -2;
419
420         rk = key->rd_key;
421
422         if (bits==128)
423                 key->rounds = 10;
424         else if (bits==192)
425                 key->rounds = 12;
426         else
427                 key->rounds = 14;
428
429         rk[0] = GETU32(userKey     );
430         rk[1] = GETU32(userKey +  4);
431         rk[2] = GETU32(userKey +  8);
432         rk[3] = GETU32(userKey + 12);
433         if (bits == 128) {
434                 for (;;) {
435                         temp  = rk[3];
436                         rk[4] = rk[0] ^
437                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<24) ^
438                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<16) ^
439                                 ((uint32_t)Te4((temp      ) & 0xff) <<8) ^
440                                 ((uint32_t)Te4((temp >> 24)       ) ) ^
441                                 rcon[i];
442                         rk[5] = rk[1] ^ rk[4];
443                         rk[6] = rk[2] ^ rk[5];
444                         rk[7] = rk[3] ^ rk[6];
445                         if (++i == 10) {
446                                 return 0;
447                         }
448                         rk += 4;
449                 }
450         }
451         rk[4] = GETU32(userKey + 16);
452         rk[5] = GETU32(userKey + 20);
453         if (bits == 192) {
454                 for (;;) {
455                         temp = rk[ 5];
456                         rk[ 6] = rk[ 0] ^
457                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<24) ^
458                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<16) ^
459                                 ((uint32_t)Te4((temp      ) & 0xff) <<8) ^
460                                 ((uint32_t)Te4((temp >> 24)       ) ) ^
461                                 rcon[i];
462                         rk[ 7] = rk[ 1] ^ rk[ 6];
463                         rk[ 8] = rk[ 2] ^ rk[ 7];
464                         rk[ 9] = rk[ 3] ^ rk[ 8];
465                         if (++i == 8) {
466                                 return 0;
467                         }
468                         rk[10] = rk[ 4] ^ rk[ 9];
469                         rk[11] = rk[ 5] ^ rk[10];
470                         rk += 6;
471                 }
472         }
473         rk[6] = GETU32(userKey + 24);
474         rk[7] = GETU32(userKey + 28);
475         if (bits == 256) {
476                 for (;;) {
477                         temp = rk[ 7];
478                         rk[ 8] = rk[ 0] ^
479                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<24) ^
480                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<16) ^
481                                 ((uint32_t)Te4((temp      ) & 0xff) <<8) ^
482                                 ((uint32_t)Te4((temp >> 24)       ) ) ^
483                                 rcon[i];
484                         rk[ 9] = rk[ 1] ^ rk[ 8];
485                         rk[10] = rk[ 2] ^ rk[ 9];
486                         rk[11] = rk[ 3] ^ rk[10];
487                         if (++i == 7) {
488                                 return 0;
489                         }
490                         temp = rk[11];
491                         rk[12] = rk[ 4] ^
492                                 ((uint32_t)Te4((temp >> 24)       ) <<24) ^
493                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<16) ^
494                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<8) ^
495                                 ((uint32_t)Te4((temp      ) & 0xff) );
496                         rk[13] = rk[ 5] ^ rk[12];
497                         rk[14] = rk[ 6] ^ rk[13];
498                         rk[15] = rk[ 7] ^ rk[14];
499
500                         rk += 8;
501                 }
502         }
503         return 0;
504 }
505
506 /**
507  * Expand the cipher key into the decryption key schedule.
508  */
509 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
510                          AES_KEY *key) {
511
512         uint32_t *rk;
513         int i, j, status;
514         uint32_t temp;
515
516         /* first, start with an encryption schedule */
517         status = AES_set_encrypt_key(userKey, bits, key);
518         if (status < 0)
519                 return status;
520
521         rk = key->rd_key;
522
523         /* invert the order of the round keys: */
524         for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
525                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
526                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
527                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
528                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
529         }
530         /* apply the inverse MixColumn transform to all round keys but the first and the last: */
531         for (i = 1; i < (key->rounds); i++) {
532                 rk += 4;
533                 rk[0] =
534                         Td0(Te4((rk[0] >> 24)       ) ) ^
535                         ROR_U32_1(Td0(Te4((rk[0] >> 16) & 0xff) )) ^
536                         ROR_U32_2(Td0(Te4((rk[0] >>  8) & 0xff) )) ^
537                         ROR_U32_3(Td0(Te4((rk[0]      ) & 0xff) ));
538                 rk[1] =
539                         Td0(Te4((rk[1] >> 24)       ) ) ^
540                         ROR_U32_1(Td0(Te4((rk[1] >> 16) & 0xff) )) ^
541                         ROR_U32_2(Td0(Te4((rk[1] >>  8) & 0xff) )) ^
542                         ROR_U32_3(Td0(Te4((rk[1]      ) & 0xff) ));
543                 rk[2] =
544                         Td0(Te4((rk[2] >> 24)       ) ) ^
545                         ROR_U32_1(Td0(Te4((rk[2] >> 16) & 0xff) )) ^
546                         ROR_U32_2(Td0(Te4((rk[2] >>  8) & 0xff) )) ^
547                         ROR_U32_3(Td0(Te4((rk[2]      ) & 0xff) ));
548                 rk[3] =
549                         Td0(Te4((rk[3] >> 24)       ) ) ^
550                         ROR_U32_1(Td0(Te4((rk[3] >> 16) & 0xff) )) ^
551                         ROR_U32_2(Td0(Te4((rk[3] >>  8) & 0xff) )) ^
552                         ROR_U32_3(Td0(Te4((rk[3]      ) & 0xff) ));
553         }
554         return 0;
555 }
556
557 /*
558  * Encrypt a single block
559  * in and out can overlap
560  */
561 void AES_encrypt(const unsigned char *in, unsigned char *out,
562                  const AES_KEY *key) {
563         const uint32_t *rk;
564         uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
565 #ifndef FULL_UNROLL
566         int r;
567 #endif /* ?FULL_UNROLL */
568
569         assert(in && out && key);
570         rk = key->rd_key;
571
572         /*
573          * map byte array block to cipher state
574          * and add initial round key:
575          */
576         s0 = GETU32(in     ) ^ rk[0];
577         s1 = GETU32(in +  4) ^ rk[1];
578         s2 = GETU32(in +  8) ^ rk[2];
579         s3 = GETU32(in + 12) ^ rk[3];
580     /*
581      * Nr - 1 full rounds:
582      */
583     r = key->rounds >> 1;
584     for (;;) {
585         t0 =
586             Te0((s0 >> 24)       ) ^
587             ROR_U32_1(Te0((s1 >> 16) & 0xff)) ^
588             ROR_U32_2(Te0((s2 >>  8) & 0xff)) ^
589             ROR_U32_3(Te0((s3      ) & 0xff)) ^
590             rk[4];
591         t1 =
592             Te0((s1 >> 24)       ) ^
593             ROR_U32_1(Te0((s2 >> 16) & 0xff)) ^
594             ROR_U32_2(Te0((s3 >>  8) & 0xff)) ^
595             ROR_U32_3(Te0((s0      ) & 0xff)) ^
596             rk[5];
597         t2 =
598             Te0((s2 >> 24)       ) ^
599             ROR_U32_1(Te0((s3 >> 16) & 0xff)) ^
600             ROR_U32_2(Te0((s0 >>  8) & 0xff)) ^
601             ROR_U32_3(Te0((s1      ) & 0xff)) ^
602             rk[6];
603         t3 =
604             Te0((s3 >> 24)       ) ^
605             ROR_U32_1(Te0((s0 >> 16) & 0xff)) ^
606             ROR_U32_2(Te0((s1 >>  8) & 0xff)) ^
607             ROR_U32_3(Te0((s2      ) & 0xff)) ^
608             rk[7];
609
610         rk += 8;
611         if (--r == 0) {
612             break;
613         }
614
615         s0 =
616             Te0((t0 >> 24)       ) ^
617             ROR_U32_1(Te0((t1 >> 16) & 0xff)) ^
618             ROR_U32_2(Te0((t2 >>  8) & 0xff)) ^
619             ROR_U32_3(Te0((t3      ) & 0xff)) ^
620             rk[0];
621         s1 =
622             Te0((t1 >> 24)       ) ^
623             ROR_U32_1(Te0((t2 >> 16) & 0xff)) ^
624             ROR_U32_2(Te0((t3 >>  8) & 0xff)) ^
625             ROR_U32_3(Te0((t0      ) & 0xff)) ^
626             rk[1];
627         s2 =
628             Te0((t2 >> 24)       ) ^
629             ROR_U32_1(Te0((t3 >> 16) & 0xff)) ^
630             ROR_U32_2(Te0((t0 >>  8) & 0xff)) ^
631             ROR_U32_3(Te0((t1      ) & 0xff)) ^
632             rk[2];
633         s3 =
634             Te0((t3 >> 24)       ) ^
635             ROR_U32_1(Te0((t0 >> 16) & 0xff)) ^
636             ROR_U32_2(Te0((t1 >>  8) & 0xff)) ^
637             ROR_U32_3(Te0((t2      ) & 0xff)) ^
638             rk[3];
639     }
640     /*
641          * apply last round and
642          * map cipher state to byte array block:
643          */
644         s0 =
645                 ((uint32_t)Te4((t0 >> 24)       ) <<24) ^
646                 ((uint32_t)Te4((t1 >> 16) & 0xff) <<16) ^
647                 ((uint32_t)Te4((t2 >>  8) & 0xff) <<8) ^
648                 ((uint32_t)Te4((t3      ) & 0xff) ) ^
649                 rk[0];
650         PUTU32(out     , s0);
651         s1 =
652                 ((uint32_t)Te4((t1 >> 24)       ) <<24) ^
653                 ((uint32_t)Te4((t2 >> 16) & 0xff) <<16) ^
654                 ((uint32_t)Te4((t3 >>  8) & 0xff) <<8) ^
655                 ((uint32_t)Te4((t0      ) & 0xff) ) ^
656                 rk[1];
657         PUTU32(out +  4, s1);
658         s2 =
659                 ((uint32_t)Te4((t2 >> 24)       ) <<24) ^
660                 ((uint32_t)Te4((t3 >> 16) & 0xff) <<16) ^
661                 ((uint32_t)Te4((t0 >>  8) & 0xff) <<8) ^
662                 ((uint32_t)Te4((t1      ) & 0xff) ) ^
663                 rk[2];
664         PUTU32(out +  8, s2);
665         s3 =
666                 ((uint32_t)Te4((t3 >> 24)       ) <<24) ^
667                 ((uint32_t)Te4((t0 >> 16) & 0xff) <<16) ^
668                 ((uint32_t)Te4((t1 >>  8) & 0xff) <<8) ^
669                 ((uint32_t)Te4((t2      ) & 0xff) ) ^
670                 rk[3];
671         PUTU32(out + 12, s3);
672 }
673
674 /*
675  * Decrypt a single block
676  * in and out can overlap
677  */
678 void AES_decrypt(const unsigned char *in, unsigned char *out,
679                  const AES_KEY *key) {
680
681         const uint32_t *rk;
682         uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
683 #ifndef FULL_UNROLL
684         int r;
685 #endif /* ?FULL_UNROLL */
686
687         assert(in && out && key);
688         rk = key->rd_key;
689
690         /*
691          * map byte array block to cipher state
692          * and add initial round key:
693          */
694     s0 = GETU32(in     ) ^ rk[0];
695     s1 = GETU32(in +  4) ^ rk[1];
696     s2 = GETU32(in +  8) ^ rk[2];
697     s3 = GETU32(in + 12) ^ rk[3];
698     /*
699      * Nr - 1 full rounds:
700      */
701     r = key->rounds >> 1;
702     for (;;) {
703         t0 =
704             Td0((s0 >> 24)       ) ^
705             ROR_U32_1(Td0((s3 >> 16) & 0xff)) ^
706             ROR_U32_2(Td0((s2 >>  8) & 0xff)) ^
707             ROR_U32_3(Td0((s1      ) & 0xff)) ^
708             rk[4];
709         t1 =
710             Td0((s1 >> 24)       ) ^
711             ROR_U32_1(Td0((s0 >> 16) & 0xff)) ^
712             ROR_U32_2(Td0((s3 >>  8) & 0xff)) ^
713             ROR_U32_3(Td0((s2      ) & 0xff)) ^
714             rk[5];
715         t2 =
716             Td0((s2 >> 24)       ) ^
717             ROR_U32_1(Td0((s1 >> 16) & 0xff)) ^
718             ROR_U32_2(Td0((s0 >>  8) & 0xff)) ^
719             ROR_U32_3(Td0((s3      ) & 0xff)) ^
720             rk[6];
721         t3 =
722             Td0((s3 >> 24)       ) ^
723             ROR_U32_1(Td0((s2 >> 16) & 0xff)) ^
724             ROR_U32_2(Td0((s1 >>  8) & 0xff)) ^
725             ROR_U32_3(Td0((s0      ) & 0xff)) ^
726             rk[7];
727
728         rk += 8;
729         if (--r == 0) {
730             break;
731         }
732
733         s0 =
734             Td0((t0 >> 24)       ) ^
735             ROR_U32_1(Td0((t3 >> 16) & 0xff)) ^
736             ROR_U32_2(Td0((t2 >>  8) & 0xff)) ^
737             ROR_U32_3(Td0((t1      ) & 0xff)) ^
738             rk[0];
739         s1 =
740             Td0((t1 >> 24)       ) ^
741             ROR_U32_1(Td0((t0 >> 16) & 0xff)) ^
742             ROR_U32_2(Td0((t3 >>  8) & 0xff)) ^
743             ROR_U32_3(Td0((t2      ) & 0xff)) ^
744             rk[1];
745         s2 =
746             Td0((t2 >> 24)       ) ^
747             ROR_U32_1(Td0((t1 >> 16) & 0xff)) ^
748             ROR_U32_2(Td0((t0 >>  8) & 0xff)) ^
749             ROR_U32_3(Td0((t3      ) & 0xff)) ^
750             rk[2];
751         s3 =
752             Td0((t3 >> 24)       ) ^
753             ROR_U32_1(Td0((t2 >> 16) & 0xff)) ^
754             ROR_U32_2(Td0((t1 >>  8) & 0xff)) ^
755             ROR_U32_3(Td0((t0      ) & 0xff)) ^
756             rk[3];
757     }
758     /*
759          * apply last round and
760          * map cipher state to byte array block:
761          */
762         s0 =
763                 ((uint32_t)Td4((t0 >> 24)       ) <<24) ^
764                 ((uint32_t)Td4((t3 >> 16) & 0xff) <<16) ^
765                 ((uint32_t)Td4((t2 >>  8) & 0xff) <<8) ^
766                 ((uint32_t)Td4((t1      ) & 0xff) ) ^
767                 rk[0];
768         PUTU32(out     , s0);
769         s1 =
770                 ((uint32_t)Td4((t1 >> 24)       ) <<24) ^
771                 ((uint32_t)Td4((t0 >> 16) & 0xff) <<16) ^
772                 ((uint32_t)Td4((t3 >>  8) & 0xff) <<8) ^
773                 ((uint32_t)Td4((t2      ) & 0xff) ) ^
774                 rk[1];
775         PUTU32(out +  4, s1);
776         s2 =
777                 ((uint32_t)Td4((t2 >> 24)       ) <<24) ^
778                 ((uint32_t)Td4((t1 >> 16) & 0xff) <<16) ^
779                 ((uint32_t)Td4((t0 >>  8) & 0xff) <<8) ^
780                 ((uint32_t)Td4((t3      ) & 0xff) ) ^
781                 rk[2];
782         PUTU32(out +  8, s2);
783         s3 =
784                 ((uint32_t)Td4((t3 >> 24)       ) <<24) ^
785                 ((uint32_t)Td4((t2 >> 16) & 0xff) <<16) ^
786                 ((uint32_t)Td4((t1 >>  8) & 0xff) <<8) ^
787                 ((uint32_t)Td4((t0      ) & 0xff) ) ^
788                 rk[3];
789         PUTU32(out + 12, s3);
790 }
791