d121814a99b5cee160adefec0adf07e81676f6e3
[aversive.git] / aes_core.c
1 /*  
2  *  Copyright Droids Corporation, Microb Technology, Eirbot (2005)
3  * 
4  *  This program is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This program is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with this program; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  *
18  */
19
20 /*
21  * This code is mainly from rijndael-alg-fst.c
22  *
23  * @version 3.0 (December 2000)
24  *
25  * Optimised ANSI C code for the Rijndael cipher (now AES)
26  *
27  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
28  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
29  * @author Paulo Barreto <paulo.barreto@terra.com.br>
30  *
31  */
32
33 /* Optimized to use AVR flash to store big tables */
34
35 #ifndef HOST_VERSION
36 #define USE_PGMMEM
37 #endif
38
39 #ifndef AES_DEBUG
40 # ifndef NDEBUG
41 #  define NDEBUG
42 # endif
43 #endif
44 #include <assert.h>
45
46 #include <aversive.h>
47 #include "aes_locl.h"
48 #include "aes.h"
49 #include "aes_locl.h"
50 #include <aversive/pgmspace.h>
51
52 /*
53 Te0[x] = S [x].[02, 01, 01, 03];
54 Te1[x] = S [x].[03, 02, 01, 01];
55 Te2[x] = S [x].[01, 03, 02, 01];
56 Te3[x] = S [x].[01, 01, 03, 02];
57 Te4[x] = S [x].[01, 01, 01, 01];
58
59 Td0[x] = Si[x].[0e, 09, 0d, 0b];
60 Td1[x] = Si[x].[0b, 0e, 09, 0d];
61 Td2[x] = Si[x].[0d, 0b, 0e, 09];
62 Td3[x] = Si[x].[09, 0d, 0b, 0e];
63 Td4[x] = Si[x].[01, 01, 01, 01];
64 */
65
66
67 #define ROR_U32_1(a) ( (((a)&0xff)<<24) ^ ((((a)>>8)&0xff)<<0)  ^ ((((a)>>16)&0xff)<<8) ^ (((a)>>24)<<16) )
68 #define ROR_U32_2(a) ( (((a)&0xff)<<16) ^ ((((a)>>8)&0xff)<<24) ^ ((((a)>>16)&0xff)<<0) ^ (((a)>>24)<<8)  )
69 #define ROR_U32_3(a) ( (((a)&0xff)<<8)  ^ ((((a)>>8)&0xff)<<16) ^ ((((a)>>16)&0xff)<<24)^ (((a)>>24)<<0)  )
70
71
72 #ifdef USE_PGMMEM
73 prog_uint32_t Te0_[256] = {
74 #else
75 static const uint32_t Te0_[256] = {
76 #endif
77     0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
78     0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
79     0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
80     0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
81     0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
82     0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
83     0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
84     0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
85     0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
86     0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
87     0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
88     0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
89     0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
90     0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
91     0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
92     0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
93     0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
94     0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
95     0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
96     0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
97     0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
98     0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
99     0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
100     0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
101     0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
102     0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
103     0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
104     0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
105     0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
106     0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
107     0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
108     0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
109     0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
110     0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
111     0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
112     0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
113     0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
114     0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
115     0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
116     0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
117     0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
118     0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
119     0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
120     0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
121     0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
122     0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
123     0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
124     0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
125     0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
126     0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
127     0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
128     0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
129     0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
130     0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
131     0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
132     0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
133     0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
134     0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
135     0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
136     0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
137     0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
138     0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
139     0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
140     0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
141 };
142
143 #ifdef USE_PGMMEM
144 static inline uint32_t Te0(int i)
145 {
146         uint32_t tmp;
147         memcpy_P(&tmp, Te0_+i, sizeof(uint32_t));
148         return tmp;
149 }
150 #else
151 #define Te0(x) Te0_[x]
152 #endif
153
154 #ifdef USE_PGMMEM
155 prog_uint8_t Te4_[256] = {
156 #else
157 static const u8 Te4_[256] = {
158 #endif
159     0x63U, 0x7cU, 0x77U, 0x7bU,
160     0xf2U, 0x6bU, 0x6fU, 0xc5U,
161     0x30U, 0x01U, 0x67U, 0x2bU,
162     0xfeU, 0xd7U, 0xabU, 0x76U,
163     0xcaU, 0x82U, 0xc9U, 0x7dU,
164     0xfaU, 0x59U, 0x47U, 0xf0U,
165     0xadU, 0xd4U, 0xa2U, 0xafU,
166     0x9cU, 0xa4U, 0x72U, 0xc0U,
167     0xb7U, 0xfdU, 0x93U, 0x26U,
168     0x36U, 0x3fU, 0xf7U, 0xccU,
169     0x34U, 0xa5U, 0xe5U, 0xf1U,
170     0x71U, 0xd8U, 0x31U, 0x15U,
171     0x04U, 0xc7U, 0x23U, 0xc3U,
172     0x18U, 0x96U, 0x05U, 0x9aU,
173     0x07U, 0x12U, 0x80U, 0xe2U,
174     0xebU, 0x27U, 0xb2U, 0x75U,
175     0x09U, 0x83U, 0x2cU, 0x1aU,
176     0x1bU, 0x6eU, 0x5aU, 0xa0U,
177     0x52U, 0x3bU, 0xd6U, 0xb3U,
178     0x29U, 0xe3U, 0x2fU, 0x84U,
179     0x53U, 0xd1U, 0x00U, 0xedU,
180     0x20U, 0xfcU, 0xb1U, 0x5bU,
181     0x6aU, 0xcbU, 0xbeU, 0x39U,
182     0x4aU, 0x4cU, 0x58U, 0xcfU,
183     0xd0U, 0xefU, 0xaaU, 0xfbU,
184     0x43U, 0x4dU, 0x33U, 0x85U,
185     0x45U, 0xf9U, 0x02U, 0x7fU,
186     0x50U, 0x3cU, 0x9fU, 0xa8U,
187     0x51U, 0xa3U, 0x40U, 0x8fU,
188     0x92U, 0x9dU, 0x38U, 0xf5U,
189     0xbcU, 0xb6U, 0xdaU, 0x21U,
190     0x10U, 0xffU, 0xf3U, 0xd2U,
191     0xcdU, 0x0cU, 0x13U, 0xecU,
192     0x5fU, 0x97U, 0x44U, 0x17U,
193     0xc4U, 0xa7U, 0x7eU, 0x3dU,
194     0x64U, 0x5dU, 0x19U, 0x73U,
195     0x60U, 0x81U, 0x4fU, 0xdcU,
196     0x22U, 0x2aU, 0x90U, 0x88U,
197     0x46U, 0xeeU, 0xb8U, 0x14U,
198     0xdeU, 0x5eU, 0x0bU, 0xdbU,
199     0xe0U, 0x32U, 0x3aU, 0x0aU,
200     0x49U, 0x06U, 0x24U, 0x5cU,
201     0xc2U, 0xd3U, 0xacU, 0x62U,
202     0x91U, 0x95U, 0xe4U, 0x79U,
203     0xe7U, 0xc8U, 0x37U, 0x6dU,
204     0x8dU, 0xd5U, 0x4eU, 0xa9U,
205     0x6cU, 0x56U, 0xf4U, 0xeaU,
206     0x65U, 0x7aU, 0xaeU, 0x08U,
207     0xbaU, 0x78U, 0x25U, 0x2eU,
208     0x1cU, 0xa6U, 0xb4U, 0xc6U,
209     0xe8U, 0xddU, 0x74U, 0x1fU,
210     0x4bU, 0xbdU, 0x8bU, 0x8aU,
211     0x70U, 0x3eU, 0xb5U, 0x66U,
212     0x48U, 0x03U, 0xf6U, 0x0eU,
213     0x61U, 0x35U, 0x57U, 0xb9U,
214     0x86U, 0xc1U, 0x1dU, 0x9eU,
215     0xe1U, 0xf8U, 0x98U, 0x11U,
216     0x69U, 0xd9U, 0x8eU, 0x94U,
217     0x9bU, 0x1eU, 0x87U, 0xe9U,
218     0xceU, 0x55U, 0x28U, 0xdfU,
219     0x8cU, 0xa1U, 0x89U, 0x0dU,
220     0xbfU, 0xe6U, 0x42U, 0x68U,
221     0x41U, 0x99U, 0x2dU, 0x0fU,
222     0xb0U, 0x54U, 0xbbU, 0x16U,
223 };
224 #ifdef USE_PGMMEM
225 static inline u8 Te4(int i)
226 {
227         u8 tmp;
228         memcpy_P(&tmp, Te4_+i, sizeof(u8));
229         return tmp;
230 }
231 #else
232 #define Te4(x) Te4_[x]
233 #endif
234
235
236 #ifdef USE_PGMMEM
237 prog_uint32_t Td0_[256] = {
238 #else
239 static const uint32_t Td0_[256] = {
240 #endif
241     0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
242     0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
243     0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
244     0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
245     0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
246     0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
247     0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
248     0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
249     0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
250     0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
251     0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
252     0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
253     0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
254     0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
255     0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
256     0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
257     0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
258     0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
259     0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
260     0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
261     0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
262     0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
263     0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
264     0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
265     0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
266     0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
267     0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
268     0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
269     0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
270     0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
271     0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
272     0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
273     0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
274     0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
275     0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
276     0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
277     0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
278     0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
279     0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
280     0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
281     0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
282     0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
283     0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
284     0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
285     0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
286     0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
287     0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
288     0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
289     0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
290     0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
291     0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
292     0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
293     0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
294     0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
295     0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
296     0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
297     0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
298     0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
299     0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
300     0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
301     0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
302     0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
303     0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
304     0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
305 };
306 #ifdef USE_PGMMEM
307 static inline uint32_t Td0(int i)
308 {
309         uint32_t tmp;
310         memcpy_P(&tmp, Td0_+i, sizeof(uint32_t));
311         return tmp;
312 }
313 #else
314 #define Td0(x) Td0_[x]
315 #endif
316
317 #ifdef USE_PGMMEM
318 prog_uint8_t Td4_[256] = {
319 #else
320 static const u8 Td4_[256] = {
321 #endif
322     0x52U, 0x09U, 0x6aU, 0xd5U,
323     0x30U, 0x36U, 0xa5U, 0x38U,
324     0xbfU, 0x40U, 0xa3U, 0x9eU,
325     0x81U, 0xf3U, 0xd7U, 0xfbU,
326     0x7cU, 0xe3U, 0x39U, 0x82U,
327     0x9bU, 0x2fU, 0xffU, 0x87U,
328     0x34U, 0x8eU, 0x43U, 0x44U,
329     0xc4U, 0xdeU, 0xe9U, 0xcbU,
330     0x54U, 0x7bU, 0x94U, 0x32U,
331     0xa6U, 0xc2U, 0x23U, 0x3dU,
332     0xeeU, 0x4cU, 0x95U, 0x0bU,
333     0x42U, 0xfaU, 0xc3U, 0x4eU,
334     0x08U, 0x2eU, 0xa1U, 0x66U,
335     0x28U, 0xd9U, 0x24U, 0xb2U,
336     0x76U, 0x5bU, 0xa2U, 0x49U,
337     0x6dU, 0x8bU, 0xd1U, 0x25U,
338     0x72U, 0xf8U, 0xf6U, 0x64U,
339     0x86U, 0x68U, 0x98U, 0x16U,
340     0xd4U, 0xa4U, 0x5cU, 0xccU,
341     0x5dU, 0x65U, 0xb6U, 0x92U,
342     0x6cU, 0x70U, 0x48U, 0x50U,
343     0xfdU, 0xedU, 0xb9U, 0xdaU,
344     0x5eU, 0x15U, 0x46U, 0x57U,
345     0xa7U, 0x8dU, 0x9dU, 0x84U,
346     0x90U, 0xd8U, 0xabU, 0x00U,
347     0x8cU, 0xbcU, 0xd3U, 0x0aU,
348     0xf7U, 0xe4U, 0x58U, 0x05U,
349     0xb8U, 0xb3U, 0x45U, 0x06U,
350     0xd0U, 0x2cU, 0x1eU, 0x8fU,
351     0xcaU, 0x3fU, 0x0fU, 0x02U,
352     0xc1U, 0xafU, 0xbdU, 0x03U,
353     0x01U, 0x13U, 0x8aU, 0x6bU,
354     0x3aU, 0x91U, 0x11U, 0x41U,
355     0x4fU, 0x67U, 0xdcU, 0xeaU,
356     0x97U, 0xf2U, 0xcfU, 0xceU,
357     0xf0U, 0xb4U, 0xe6U, 0x73U,
358     0x96U, 0xacU, 0x74U, 0x22U,
359     0xe7U, 0xadU, 0x35U, 0x85U,
360     0xe2U, 0xf9U, 0x37U, 0xe8U,
361     0x1cU, 0x75U, 0xdfU, 0x6eU,
362     0x47U, 0xf1U, 0x1aU, 0x71U,
363     0x1dU, 0x29U, 0xc5U, 0x89U,
364     0x6fU, 0xb7U, 0x62U, 0x0eU,
365     0xaaU, 0x18U, 0xbeU, 0x1bU,
366     0xfcU, 0x56U, 0x3eU, 0x4bU,
367     0xc6U, 0xd2U, 0x79U, 0x20U,
368     0x9aU, 0xdbU, 0xc0U, 0xfeU,
369     0x78U, 0xcdU, 0x5aU, 0xf4U,
370     0x1fU, 0xddU, 0xa8U, 0x33U,
371     0x88U, 0x07U, 0xc7U, 0x31U,
372     0xb1U, 0x12U, 0x10U, 0x59U,
373     0x27U, 0x80U, 0xecU, 0x5fU,
374     0x60U, 0x51U, 0x7fU, 0xa9U,
375     0x19U, 0xb5U, 0x4aU, 0x0dU,
376     0x2dU, 0xe5U, 0x7aU, 0x9fU,
377     0x93U, 0xc9U, 0x9cU, 0xefU,
378     0xa0U, 0xe0U, 0x3bU, 0x4dU,
379     0xaeU, 0x2aU, 0xf5U, 0xb0U,
380     0xc8U, 0xebU, 0xbbU, 0x3cU,
381     0x83U, 0x53U, 0x99U, 0x61U,
382     0x17U, 0x2bU, 0x04U, 0x7eU,
383     0xbaU, 0x77U, 0xd6U, 0x26U,
384     0xe1U, 0x69U, 0x14U, 0x63U,
385     0x55U, 0x21U, 0x0cU, 0x7dU,
386 };
387 #ifdef USE_PGMMEM
388 static inline u8 Td4(int i)
389 {
390         u8 tmp;
391         memcpy_P(&tmp, Td4_+i, sizeof(u8));
392         return tmp;
393 }
394 #else
395 #define Td4(x) Td4_[x]
396 #endif
397
398 static const uint32_t rcon[] = {
399         0x01000000, 0x02000000, 0x04000000, 0x08000000,
400         0x10000000, 0x20000000, 0x40000000, 0x80000000,
401         0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
402 };
403
404 /**
405  * Expand the cipher key into the encryption key schedule.
406  */
407 int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
408                         AES_KEY *key) {
409
410         uint32_t *rk;
411         int i = 0;
412         uint32_t temp;
413
414         if (!userKey || !key)
415                 return -1;
416         if (bits != 128 && bits != 192 && bits != 256)
417                 return -2;
418
419         rk = key->rd_key;
420
421         if (bits==128)
422                 key->rounds = 10;
423         else if (bits==192)
424                 key->rounds = 12;
425         else
426                 key->rounds = 14;
427
428         rk[0] = GETU32(userKey     );
429         rk[1] = GETU32(userKey +  4);
430         rk[2] = GETU32(userKey +  8);
431         rk[3] = GETU32(userKey + 12);
432         if (bits == 128) {
433                 for (;;) {
434                         temp  = rk[3];
435                         rk[4] = rk[0] ^
436                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<24) ^
437                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<16) ^
438                                 ((uint32_t)Te4((temp      ) & 0xff) <<8) ^
439                                 ((uint32_t)Te4((temp >> 24)       ) ) ^
440                                 rcon[i];
441                         rk[5] = rk[1] ^ rk[4];
442                         rk[6] = rk[2] ^ rk[5];
443                         rk[7] = rk[3] ^ rk[6];
444                         if (++i == 10) {
445                                 return 0;
446                         }
447                         rk += 4;
448                 }
449         }
450         rk[4] = GETU32(userKey + 16);
451         rk[5] = GETU32(userKey + 20);
452         if (bits == 192) {
453                 for (;;) {
454                         temp = rk[ 5];
455                         rk[ 6] = rk[ 0] ^
456                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<24) ^
457                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<16) ^
458                                 ((uint32_t)Te4((temp      ) & 0xff) <<8) ^
459                                 ((uint32_t)Te4((temp >> 24)       ) ) ^
460                                 rcon[i];
461                         rk[ 7] = rk[ 1] ^ rk[ 6];
462                         rk[ 8] = rk[ 2] ^ rk[ 7];
463                         rk[ 9] = rk[ 3] ^ rk[ 8];
464                         if (++i == 8) {
465                                 return 0;
466                         }
467                         rk[10] = rk[ 4] ^ rk[ 9];
468                         rk[11] = rk[ 5] ^ rk[10];
469                         rk += 6;
470                 }
471         }
472         rk[6] = GETU32(userKey + 24);
473         rk[7] = GETU32(userKey + 28);
474         if (bits == 256) {
475                 for (;;) {
476                         temp = rk[ 7];
477                         rk[ 8] = rk[ 0] ^
478                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<24) ^
479                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<16) ^
480                                 ((uint32_t)Te4((temp      ) & 0xff) <<8) ^
481                                 ((uint32_t)Te4((temp >> 24)       ) ) ^
482                                 rcon[i];
483                         rk[ 9] = rk[ 1] ^ rk[ 8];
484                         rk[10] = rk[ 2] ^ rk[ 9];
485                         rk[11] = rk[ 3] ^ rk[10];
486                         if (++i == 7) {
487                                 return 0;
488                         }
489                         temp = rk[11];
490                         rk[12] = rk[ 4] ^
491                                 ((uint32_t)Te4((temp >> 24)       ) <<24) ^
492                                 ((uint32_t)Te4((temp >> 16) & 0xff) <<16) ^
493                                 ((uint32_t)Te4((temp >>  8) & 0xff) <<8) ^
494                                 ((uint32_t)Te4((temp      ) & 0xff) );
495                         rk[13] = rk[ 5] ^ rk[12];
496                         rk[14] = rk[ 6] ^ rk[13];
497                         rk[15] = rk[ 7] ^ rk[14];
498
499                         rk += 8;
500                 }
501         }
502         return 0;
503 }
504
505 /**
506  * Expand the cipher key into the decryption key schedule.
507  */
508 int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
509                          AES_KEY *key) {
510
511         uint32_t *rk;
512         int i, j, status;
513         uint32_t temp;
514
515         /* first, start with an encryption schedule */
516         status = AES_set_encrypt_key(userKey, bits, key);
517         if (status < 0)
518                 return status;
519
520         rk = key->rd_key;
521
522         /* invert the order of the round keys: */
523         for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
524                 temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
525                 temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
526                 temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
527                 temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
528         }
529         /* apply the inverse MixColumn transform to all round keys but the first and the last: */
530         for (i = 1; i < (key->rounds); i++) {
531                 rk += 4;
532                 rk[0] =
533                         Td0(Te4((rk[0] >> 24)       ) ) ^
534                         ROR_U32_1(Td0(Te4((rk[0] >> 16) & 0xff) )) ^
535                         ROR_U32_2(Td0(Te4((rk[0] >>  8) & 0xff) )) ^
536                         ROR_U32_3(Td0(Te4((rk[0]      ) & 0xff) ));
537                 rk[1] =
538                         Td0(Te4((rk[1] >> 24)       ) ) ^
539                         ROR_U32_1(Td0(Te4((rk[1] >> 16) & 0xff) )) ^
540                         ROR_U32_2(Td0(Te4((rk[1] >>  8) & 0xff) )) ^
541                         ROR_U32_3(Td0(Te4((rk[1]      ) & 0xff) ));
542                 rk[2] =
543                         Td0(Te4((rk[2] >> 24)       ) ) ^
544                         ROR_U32_1(Td0(Te4((rk[2] >> 16) & 0xff) )) ^
545                         ROR_U32_2(Td0(Te4((rk[2] >>  8) & 0xff) )) ^
546                         ROR_U32_3(Td0(Te4((rk[2]      ) & 0xff) ));
547                 rk[3] =
548                         Td0(Te4((rk[3] >> 24)       ) ) ^
549                         ROR_U32_1(Td0(Te4((rk[3] >> 16) & 0xff) )) ^
550                         ROR_U32_2(Td0(Te4((rk[3] >>  8) & 0xff) )) ^
551                         ROR_U32_3(Td0(Te4((rk[3]      ) & 0xff) ));
552         }
553         return 0;
554 }
555
556 /*
557  * Encrypt a single block
558  * in and out can overlap
559  */
560 void AES_encrypt(const unsigned char *in, unsigned char *out,
561                  const AES_KEY *key) {
562         const uint32_t *rk;
563         uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
564 #ifndef FULL_UNROLL
565         int r;
566 #endif /* ?FULL_UNROLL */
567
568         assert(in && out && key);
569         rk = key->rd_key;
570
571         /*
572          * map byte array block to cipher state
573          * and add initial round key:
574          */
575         s0 = GETU32(in     ) ^ rk[0];
576         s1 = GETU32(in +  4) ^ rk[1];
577         s2 = GETU32(in +  8) ^ rk[2];
578         s3 = GETU32(in + 12) ^ rk[3];
579     /*
580      * Nr - 1 full rounds:
581      */
582     r = key->rounds >> 1;
583     for (;;) {
584         t0 =
585             Te0((s0 >> 24)       ) ^
586             ROR_U32_1(Te0((s1 >> 16) & 0xff)) ^
587             ROR_U32_2(Te0((s2 >>  8) & 0xff)) ^
588             ROR_U32_3(Te0((s3      ) & 0xff)) ^
589             rk[4];
590         t1 =
591             Te0((s1 >> 24)       ) ^
592             ROR_U32_1(Te0((s2 >> 16) & 0xff)) ^
593             ROR_U32_2(Te0((s3 >>  8) & 0xff)) ^
594             ROR_U32_3(Te0((s0      ) & 0xff)) ^
595             rk[5];
596         t2 =
597             Te0((s2 >> 24)       ) ^
598             ROR_U32_1(Te0((s3 >> 16) & 0xff)) ^
599             ROR_U32_2(Te0((s0 >>  8) & 0xff)) ^
600             ROR_U32_3(Te0((s1      ) & 0xff)) ^
601             rk[6];
602         t3 =
603             Te0((s3 >> 24)       ) ^
604             ROR_U32_1(Te0((s0 >> 16) & 0xff)) ^
605             ROR_U32_2(Te0((s1 >>  8) & 0xff)) ^
606             ROR_U32_3(Te0((s2      ) & 0xff)) ^
607             rk[7];
608
609         rk += 8;
610         if (--r == 0) {
611             break;
612         }
613
614         s0 =
615             Te0((t0 >> 24)       ) ^
616             ROR_U32_1(Te0((t1 >> 16) & 0xff)) ^
617             ROR_U32_2(Te0((t2 >>  8) & 0xff)) ^
618             ROR_U32_3(Te0((t3      ) & 0xff)) ^
619             rk[0];
620         s1 =
621             Te0((t1 >> 24)       ) ^
622             ROR_U32_1(Te0((t2 >> 16) & 0xff)) ^
623             ROR_U32_2(Te0((t3 >>  8) & 0xff)) ^
624             ROR_U32_3(Te0((t0      ) & 0xff)) ^
625             rk[1];
626         s2 =
627             Te0((t2 >> 24)       ) ^
628             ROR_U32_1(Te0((t3 >> 16) & 0xff)) ^
629             ROR_U32_2(Te0((t0 >>  8) & 0xff)) ^
630             ROR_U32_3(Te0((t1      ) & 0xff)) ^
631             rk[2];
632         s3 =
633             Te0((t3 >> 24)       ) ^
634             ROR_U32_1(Te0((t0 >> 16) & 0xff)) ^
635             ROR_U32_2(Te0((t1 >>  8) & 0xff)) ^
636             ROR_U32_3(Te0((t2      ) & 0xff)) ^
637             rk[3];
638     }
639     /*
640          * apply last round and
641          * map cipher state to byte array block:
642          */
643         s0 =
644                 ((uint32_t)Te4((t0 >> 24)       ) <<24) ^
645                 ((uint32_t)Te4((t1 >> 16) & 0xff) <<16) ^
646                 ((uint32_t)Te4((t2 >>  8) & 0xff) <<8) ^
647                 ((uint32_t)Te4((t3      ) & 0xff) ) ^
648                 rk[0];
649         PUTU32(out     , s0);
650         s1 =
651                 ((uint32_t)Te4((t1 >> 24)       ) <<24) ^
652                 ((uint32_t)Te4((t2 >> 16) & 0xff) <<16) ^
653                 ((uint32_t)Te4((t3 >>  8) & 0xff) <<8) ^
654                 ((uint32_t)Te4((t0      ) & 0xff) ) ^
655                 rk[1];
656         PUTU32(out +  4, s1);
657         s2 =
658                 ((uint32_t)Te4((t2 >> 24)       ) <<24) ^
659                 ((uint32_t)Te4((t3 >> 16) & 0xff) <<16) ^
660                 ((uint32_t)Te4((t0 >>  8) & 0xff) <<8) ^
661                 ((uint32_t)Te4((t1      ) & 0xff) ) ^
662                 rk[2];
663         PUTU32(out +  8, s2);
664         s3 =
665                 ((uint32_t)Te4((t3 >> 24)       ) <<24) ^
666                 ((uint32_t)Te4((t0 >> 16) & 0xff) <<16) ^
667                 ((uint32_t)Te4((t1 >>  8) & 0xff) <<8) ^
668                 ((uint32_t)Te4((t2      ) & 0xff) ) ^
669                 rk[3];
670         PUTU32(out + 12, s3);
671 }
672
673 /*
674  * Decrypt a single block
675  * in and out can overlap
676  */
677 void AES_decrypt(const unsigned char *in, unsigned char *out,
678                  const AES_KEY *key) {
679
680         const uint32_t *rk;
681         uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
682 #ifndef FULL_UNROLL
683         int r;
684 #endif /* ?FULL_UNROLL */
685
686         assert(in && out && key);
687         rk = key->rd_key;
688
689         /*
690          * map byte array block to cipher state
691          * and add initial round key:
692          */
693     s0 = GETU32(in     ) ^ rk[0];
694     s1 = GETU32(in +  4) ^ rk[1];
695     s2 = GETU32(in +  8) ^ rk[2];
696     s3 = GETU32(in + 12) ^ rk[3];
697     /*
698      * Nr - 1 full rounds:
699      */
700     r = key->rounds >> 1;
701     for (;;) {
702         t0 =
703             Td0((s0 >> 24)       ) ^
704             ROR_U32_1(Td0((s3 >> 16) & 0xff)) ^
705             ROR_U32_2(Td0((s2 >>  8) & 0xff)) ^
706             ROR_U32_3(Td0((s1      ) & 0xff)) ^
707             rk[4];
708         t1 =
709             Td0((s1 >> 24)       ) ^
710             ROR_U32_1(Td0((s0 >> 16) & 0xff)) ^
711             ROR_U32_2(Td0((s3 >>  8) & 0xff)) ^
712             ROR_U32_3(Td0((s2      ) & 0xff)) ^
713             rk[5];
714         t2 =
715             Td0((s2 >> 24)       ) ^
716             ROR_U32_1(Td0((s1 >> 16) & 0xff)) ^
717             ROR_U32_2(Td0((s0 >>  8) & 0xff)) ^
718             ROR_U32_3(Td0((s3      ) & 0xff)) ^
719             rk[6];
720         t3 =
721             Td0((s3 >> 24)       ) ^
722             ROR_U32_1(Td0((s2 >> 16) & 0xff)) ^
723             ROR_U32_2(Td0((s1 >>  8) & 0xff)) ^
724             ROR_U32_3(Td0((s0      ) & 0xff)) ^
725             rk[7];
726
727         rk += 8;
728         if (--r == 0) {
729             break;
730         }
731
732         s0 =
733             Td0((t0 >> 24)       ) ^
734             ROR_U32_1(Td0((t3 >> 16) & 0xff)) ^
735             ROR_U32_2(Td0((t2 >>  8) & 0xff)) ^
736             ROR_U32_3(Td0((t1      ) & 0xff)) ^
737             rk[0];
738         s1 =
739             Td0((t1 >> 24)       ) ^
740             ROR_U32_1(Td0((t0 >> 16) & 0xff)) ^
741             ROR_U32_2(Td0((t3 >>  8) & 0xff)) ^
742             ROR_U32_3(Td0((t2      ) & 0xff)) ^
743             rk[1];
744         s2 =
745             Td0((t2 >> 24)       ) ^
746             ROR_U32_1(Td0((t1 >> 16) & 0xff)) ^
747             ROR_U32_2(Td0((t0 >>  8) & 0xff)) ^
748             ROR_U32_3(Td0((t3      ) & 0xff)) ^
749             rk[2];
750         s3 =
751             Td0((t3 >> 24)       ) ^
752             ROR_U32_1(Td0((t2 >> 16) & 0xff)) ^
753             ROR_U32_2(Td0((t1 >>  8) & 0xff)) ^
754             ROR_U32_3(Td0((t0      ) & 0xff)) ^
755             rk[3];
756     }
757     /*
758          * apply last round and
759          * map cipher state to byte array block:
760          */
761         s0 =
762                 ((uint32_t)Td4((t0 >> 24)       ) <<24) ^
763                 ((uint32_t)Td4((t3 >> 16) & 0xff) <<16) ^
764                 ((uint32_t)Td4((t2 >>  8) & 0xff) <<8) ^
765                 ((uint32_t)Td4((t1      ) & 0xff) ) ^
766                 rk[0];
767         PUTU32(out     , s0);
768         s1 =
769                 ((uint32_t)Td4((t1 >> 24)       ) <<24) ^
770                 ((uint32_t)Td4((t0 >> 16) & 0xff) <<16) ^
771                 ((uint32_t)Td4((t3 >>  8) & 0xff) <<8) ^
772                 ((uint32_t)Td4((t2      ) & 0xff) ) ^
773                 rk[1];
774         PUTU32(out +  4, s1);
775         s2 =
776                 ((uint32_t)Td4((t2 >> 24)       ) <<24) ^
777                 ((uint32_t)Td4((t1 >> 16) & 0xff) <<16) ^
778                 ((uint32_t)Td4((t0 >>  8) & 0xff) <<8) ^
779                 ((uint32_t)Td4((t3      ) & 0xff) ) ^
780                 rk[2];
781         PUTU32(out +  8, s2);
782         s3 =
783                 ((uint32_t)Td4((t3 >> 24)       ) <<24) ^
784                 ((uint32_t)Td4((t2 >> 16) & 0xff) <<16) ^
785                 ((uint32_t)Td4((t1 >>  8) & 0xff) <<8) ^
786                 ((uint32_t)Td4((t0      ) & 0xff) ) ^
787                 rk[3];
788         PUTU32(out + 12, s3);
789 }
790