rijndael.cpp

00001 // rijndael.cpp - modified by Chris Morgan <cmorgan@wpi.edu>
00002 // and Wei Dai from Paulo Baretto's Rijndael implementation
00003 // The original code and all modifications are in the public domain.
00004 
00005 /*
00006 Defense against timing attacks was added in July 2006 by Wei Dai.
00007 
00008 The code now uses smaller tables in the first and last rounds,
00009 and preloads them into L1 cache before usage (by loading at least 
00010 one element in each cache line). 
00011 
00012 We try to delay subsequent accesses to each table (used in the first 
00013 and last rounds) until all of the table has been preloaded. Hopefully
00014 the compiler isn't smart enough to optimize that code away.
00015 
00016 After preloading the table, we also try not to access any memory location
00017 other than the table and the stack, in order to prevent table entries from 
00018 being unloaded from L1 cache, until that round is finished.
00019 (Some popular CPUs have 2-way associative caches.)
00020 */
00021 
00022 // This is the original introductory comment:
00023 
00024 /**
00025  * version 3.0 (December 2000)
00026  *
00027  * Optimised ANSI C code for the Rijndael cipher (now AES)
00028  *
00029  * author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
00030  * author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
00031  * author Paulo Barreto <paulo.barreto@terra.com.br>
00032  *
00033  * This code is hereby placed in the public domain.
00034  *
00035  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
00036  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00037  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00038  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
00039  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00040  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00041  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
00042  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
00043  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
00044  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
00045  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00046  */
00047 
00048 #include "pch.h"
00049 
00050 #ifndef CRYPTOPP_IMPORTS
00051 
00052 #include "rijndael.h"
00053 #include "misc.h"
00054 
00055 NAMESPACE_BEGIN(CryptoPP)
00056 
00057 void Rijndael::Base::UncheckedSetKey(CipherDir dir, const byte *userKey, unsigned int keylen)
00058 {
00059         AssertValidKeyLength(keylen);
00060 
00061         m_rounds = keylen/4 + 6;
00062         m_key.New(4*(m_rounds+1));
00063 
00064         word32 temp, *rk = m_key;
00065         const word32 *rc = rcon;
00066         unsigned int i=0;
00067 
00068         GetUserKey(BIG_ENDIAN_ORDER, rk, keylen/4, userKey, keylen);
00069 
00070         while (true)
00071         {
00072                 temp  = rk[keylen/4-1];
00073                 rk[keylen/4] = rk[0] ^
00074                         (word32(Se[GETBYTE(temp, 2)]) << 24) ^
00075                         (word32(Se[GETBYTE(temp, 1)]) << 16) ^
00076                         (word32(Se[GETBYTE(temp, 0)]) << 8) ^
00077                         Se[GETBYTE(temp, 3)] ^
00078                         *(rc++);
00079                 rk[keylen/4+1] = rk[1] ^ rk[keylen/4];
00080                 rk[keylen/4+2] = rk[2] ^ rk[keylen/4+1];
00081                 rk[keylen/4+3] = rk[3] ^ rk[keylen/4+2];
00082 
00083                 if (rk + keylen/4 + 4 == m_key.end())
00084                         break;
00085 
00086                 if (keylen == 24)
00087                 {
00088                         rk[10] = rk[ 4] ^ rk[ 9];
00089                         rk[11] = rk[ 5] ^ rk[10];
00090                 }
00091                 else if (keylen == 32)
00092                 {
00093                 temp = rk[11];
00094                 rk[12] = rk[ 4] ^
00095                                 (word32(Se[GETBYTE(temp, 3)]) << 24) ^
00096                                 (word32(Se[GETBYTE(temp, 2)]) << 16) ^
00097                                 (word32(Se[GETBYTE(temp, 1)]) << 8) ^
00098                                 Se[GETBYTE(temp, 0)];
00099                 rk[13] = rk[ 5] ^ rk[12];
00100                 rk[14] = rk[ 6] ^ rk[13];
00101                 rk[15] = rk[ 7] ^ rk[14];
00102                 }
00103                 rk += keylen/4;
00104         }
00105 
00106         if (dir == DECRYPTION)
00107         {
00108                 unsigned int i, j;
00109                 rk = m_key;
00110 
00111                 /* invert the order of the round keys: */
00112                 for (i = 0, j = 4*m_rounds; i < j; i += 4, j -= 4) {
00113                         temp = rk[i    ]; rk[i    ] = rk[j    ]; rk[j    ] = temp;
00114                         temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
00115                         temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
00116                         temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
00117                 }
00118                 /* apply the inverse MixColumn transform to all round keys but the first and the last: */
00119                 for (i = 1; i < m_rounds; i++) {
00120                         rk += 4;
00121                         rk[0] =
00122                                 Td0[Se[GETBYTE(rk[0], 3)]] ^
00123                                 Td1[Se[GETBYTE(rk[0], 2)]] ^
00124                                 Td2[Se[GETBYTE(rk[0], 1)]] ^
00125                                 Td3[Se[GETBYTE(rk[0], 0)]];
00126                         rk[1] =
00127                                 Td0[Se[GETBYTE(rk[1], 3)]] ^
00128                                 Td1[Se[GETBYTE(rk[1], 2)]] ^
00129                                 Td2[Se[GETBYTE(rk[1], 1)]] ^
00130                                 Td3[Se[GETBYTE(rk[1], 0)]];
00131                         rk[2] =
00132                                 Td0[Se[GETBYTE(rk[2], 3)]] ^
00133                                 Td1[Se[GETBYTE(rk[2], 2)]] ^
00134                                 Td2[Se[GETBYTE(rk[2], 1)]] ^
00135                                 Td3[Se[GETBYTE(rk[2], 0)]];
00136                         rk[3] =
00137                                 Td0[Se[GETBYTE(rk[3], 3)]] ^
00138                                 Td1[Se[GETBYTE(rk[3], 2)]] ^
00139                                 Td2[Se[GETBYTE(rk[3], 1)]] ^
00140                                 Td3[Se[GETBYTE(rk[3], 0)]];
00141                 }
00142         }
00143 
00144         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key.begin(), m_key.begin(), 16);
00145         ConditionalByteReverse(BIG_ENDIAN_ORDER, m_key + m_rounds*4, m_key + m_rounds*4, 16);
00146 }
00147 
00148 const static unsigned int s_lineSizeDiv4 = CRYPTOPP_L1_CACHE_LINE_SIZE/4;
00149 #ifdef IS_BIG_ENDIAN
00150 const static unsigned int s_i3=3, s_i2=2, s_i1=1, s_i0=0;
00151 #else
00152 const static unsigned int s_i3=0, s_i2=1, s_i1=2, s_i0=3;
00153 #endif
00154 
00155 void Rijndael::Enc::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00156 {
00157         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00158         const word32 *rk = m_key;
00159 
00160         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00161         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00162         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00163         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00164         t0 = rk[4];
00165         t1 = rk[5];
00166         t2 = rk[6];
00167         t3 = rk[7];
00168         rk += 8;
00169 
00170         // timing attack countermeasure. see comments at top for more details
00171         unsigned int i;
00172         word32 u = 0;
00173         for (i=0; i<sizeof(Te0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00174                 u &= (Te0[i+0*s_lineSizeDiv4] & Te0[i+2*s_lineSizeDiv4]) & (Te0[i+1*s_lineSizeDiv4] & Te0[i+3*s_lineSizeDiv4]);
00175         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00176 
00177         // first round
00178     t0 ^=
00179         Te0[GETBYTE(s0, s_i3)] ^
00180         rotrFixed(Te0[GETBYTE(s1, s_i2)], 8) ^
00181         rotrFixed(Te0[GETBYTE(s2, s_i1)], 16) ^
00182         rotrFixed(Te0[GETBYTE(s3, s_i0)], 24);
00183     t1 ^=
00184         Te0[GETBYTE(s1, s_i3)] ^
00185         rotrFixed(Te0[GETBYTE(s2, s_i2)], 8) ^
00186         rotrFixed(Te0[GETBYTE(s3, s_i1)], 16) ^
00187         rotrFixed(Te0[GETBYTE(s0, s_i0)], 24);
00188     t2 ^=
00189         Te0[GETBYTE(s2, s_i3)] ^
00190         rotrFixed(Te0[GETBYTE(s3, s_i2)], 8) ^
00191         rotrFixed(Te0[GETBYTE(s0, s_i1)], 16) ^
00192         rotrFixed(Te0[GETBYTE(s1, s_i0)], 24);
00193     t3 ^=
00194         Te0[GETBYTE(s3, s_i3)] ^
00195         rotrFixed(Te0[GETBYTE(s0, s_i2)], 8) ^
00196         rotrFixed(Te0[GETBYTE(s1, s_i1)], 16) ^
00197         rotrFixed(Te0[GETBYTE(s2, s_i0)], 24);
00198 
00199         // Nr - 2 full rounds:
00200     unsigned int r = m_rounds/2 - 1;
00201     do
00202         {
00203         s0 =
00204             Te0[GETBYTE(t0, 3)] ^
00205             Te1[GETBYTE(t1, 2)] ^
00206             Te2[GETBYTE(t2, 1)] ^
00207             Te3[GETBYTE(t3, 0)] ^
00208             rk[0];
00209         s1 =
00210             Te0[GETBYTE(t1, 3)] ^
00211             Te1[GETBYTE(t2, 2)] ^
00212             Te2[GETBYTE(t3, 1)] ^
00213             Te3[GETBYTE(t0, 0)] ^
00214             rk[1];
00215         s2 =
00216             Te0[GETBYTE(t2, 3)] ^
00217             Te1[GETBYTE(t3, 2)] ^
00218             Te2[GETBYTE(t0, 1)] ^
00219             Te3[GETBYTE(t1, 0)] ^
00220             rk[2];
00221         s3 =
00222             Te0[GETBYTE(t3, 3)] ^
00223             Te1[GETBYTE(t0, 2)] ^
00224             Te2[GETBYTE(t1, 1)] ^
00225             Te3[GETBYTE(t2, 0)] ^
00226             rk[3];
00227 
00228         t0 =
00229             Te0[GETBYTE(s0, 3)] ^
00230             Te1[GETBYTE(s1, 2)] ^
00231             Te2[GETBYTE(s2, 1)] ^
00232             Te3[GETBYTE(s3, 0)] ^
00233             rk[4];
00234         t1 =
00235             Te0[GETBYTE(s1, 3)] ^
00236             Te1[GETBYTE(s2, 2)] ^
00237             Te2[GETBYTE(s3, 1)] ^
00238             Te3[GETBYTE(s0, 0)] ^
00239             rk[5];
00240         t2 =
00241             Te0[GETBYTE(s2, 3)] ^
00242             Te1[GETBYTE(s3, 2)] ^
00243             Te2[GETBYTE(s0, 1)] ^
00244             Te3[GETBYTE(s1, 0)] ^
00245             rk[6];
00246         t3 =
00247             Te0[GETBYTE(s3, 3)] ^
00248             Te1[GETBYTE(s0, 2)] ^
00249             Te2[GETBYTE(s1, 1)] ^
00250             Te3[GETBYTE(s2, 0)] ^
00251             rk[7];
00252 
00253         rk += 8;
00254     } while (--r);
00255 
00256         // timing attack countermeasure. see comments at top for more details
00257         u = 0;
00258         for (i=0; i<sizeof(Se)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00259                 u &= (((word32*)Se)[i+0*s_lineSizeDiv4] & ((word32*)Se)[i+2*s_lineSizeDiv4]) & (((word32*)Se)[i+1*s_lineSizeDiv4] & ((word32*)Se)[i+3*s_lineSizeDiv4]);
00260         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00261 
00262         word32 tbw[4];
00263         byte *const tempBlock = (byte *)tbw;
00264         word32 *const obw = (word32 *)outBlock;
00265         const word32 *const xbw = (const word32 *)xorBlock;
00266 
00267         // last round
00268         tempBlock[0] = Se[GETBYTE(t0, 3)];
00269         tempBlock[1] = Se[GETBYTE(t1, 2)];
00270         tempBlock[2] = Se[GETBYTE(t2, 1)];
00271         tempBlock[3] = Se[GETBYTE(t3, 0)];
00272         tempBlock[4] = Se[GETBYTE(t1, 3)];
00273         tempBlock[5] = Se[GETBYTE(t2, 2)];
00274         tempBlock[6] = Se[GETBYTE(t3, 1)];
00275         tempBlock[7] = Se[GETBYTE(t0, 0)];
00276         tempBlock[8] = Se[GETBYTE(t2, 3)];
00277         tempBlock[9] = Se[GETBYTE(t3, 2)];
00278         tempBlock[10] = Se[GETBYTE(t0, 1)];
00279         tempBlock[11] = Se[GETBYTE(t1, 0)];
00280         tempBlock[12] = Se[GETBYTE(t3, 3)];
00281         tempBlock[13] = Se[GETBYTE(t0, 2)];
00282         tempBlock[14] = Se[GETBYTE(t1, 1)];
00283         tempBlock[15] = Se[GETBYTE(t2, 0)];
00284 
00285         if (xbw)
00286         {
00287                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00288                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00289                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00290                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00291         }
00292         else
00293         {
00294                 obw[0] = tbw[0] ^ rk[0];
00295                 obw[1] = tbw[1] ^ rk[1];
00296                 obw[2] = tbw[2] ^ rk[2];
00297                 obw[3] = tbw[3] ^ rk[3];
00298         }
00299 }
00300 
00301 void Rijndael::Dec::ProcessAndXorBlock(const byte *inBlock, const byte *xorBlock, byte *outBlock) const
00302 {
00303         word32 s0, s1, s2, s3, t0, t1, t2, t3;
00304     const word32 *rk = m_key;
00305 
00306         s0 = ((const word32 *)inBlock)[0] ^ rk[0];
00307         s1 = ((const word32 *)inBlock)[1] ^ rk[1];
00308         s2 = ((const word32 *)inBlock)[2] ^ rk[2];
00309         s3 = ((const word32 *)inBlock)[3] ^ rk[3];
00310         t0 = rk[4];
00311         t1 = rk[5];
00312         t2 = rk[6];
00313         t3 = rk[7];
00314         rk += 8;
00315 
00316         // timing attack countermeasure. see comments at top for more details
00317         unsigned int i;
00318         word32 u = 0;
00319         for (i=0; i<sizeof(Td0)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00320                 u &= (Td0[i+0*s_lineSizeDiv4] & Td0[i+2*s_lineSizeDiv4]) & (Td0[i+1*s_lineSizeDiv4] & Td0[i+3*s_lineSizeDiv4]);
00321         s0 |= u; s1 |= u; s2 |= u; s3 |= u;
00322 
00323         // first round
00324     t0 ^=
00325         Td0[GETBYTE(s0, s_i3)] ^
00326         rotrFixed(Td0[GETBYTE(s3, s_i2)], 8) ^
00327         rotrFixed(Td0[GETBYTE(s2, s_i1)], 16) ^
00328         rotrFixed(Td0[GETBYTE(s1, s_i0)], 24);
00329     t1 ^=
00330         Td0[GETBYTE(s1, s_i3)] ^
00331         rotrFixed(Td0[GETBYTE(s0, s_i2)], 8) ^
00332         rotrFixed(Td0[GETBYTE(s3, s_i1)], 16) ^
00333         rotrFixed(Td0[GETBYTE(s2, s_i0)], 24);
00334     t2 ^=
00335         Td0[GETBYTE(s2, s_i3)] ^
00336         rotrFixed(Td0[GETBYTE(s1, s_i2)], 8) ^
00337         rotrFixed(Td0[GETBYTE(s0, s_i1)], 16) ^
00338         rotrFixed(Td0[GETBYTE(s3, s_i0)], 24);
00339     t3 ^=
00340         Td0[GETBYTE(s3, s_i3)] ^
00341         rotrFixed(Td0[GETBYTE(s2, s_i2)], 8) ^
00342         rotrFixed(Td0[GETBYTE(s1, s_i1)], 16) ^
00343         rotrFixed(Td0[GETBYTE(s0, s_i0)], 24);
00344 
00345         // Nr - 2 full rounds:
00346     unsigned int r = m_rounds/2 - 1;
00347     do
00348         {
00349         s0 =
00350             Td0[GETBYTE(t0, 3)] ^
00351             Td1[GETBYTE(t3, 2)] ^
00352             Td2[GETBYTE(t2, 1)] ^
00353             Td3[GETBYTE(t1, 0)] ^
00354             rk[0];
00355         s1 =
00356             Td0[GETBYTE(t1, 3)] ^
00357             Td1[GETBYTE(t0, 2)] ^
00358             Td2[GETBYTE(t3, 1)] ^
00359             Td3[GETBYTE(t2, 0)] ^
00360             rk[1];
00361         s2 =
00362             Td0[GETBYTE(t2, 3)] ^
00363             Td1[GETBYTE(t1, 2)] ^
00364             Td2[GETBYTE(t0, 1)] ^
00365             Td3[GETBYTE(t3, 0)] ^
00366             rk[2];
00367         s3 =
00368             Td0[GETBYTE(t3, 3)] ^
00369             Td1[GETBYTE(t2, 2)] ^
00370             Td2[GETBYTE(t1, 1)] ^
00371             Td3[GETBYTE(t0, 0)] ^
00372             rk[3];
00373 
00374         t0 =
00375             Td0[GETBYTE(s0, 3)] ^
00376             Td1[GETBYTE(s3, 2)] ^
00377             Td2[GETBYTE(s2, 1)] ^
00378             Td3[GETBYTE(s1, 0)] ^
00379             rk[4];
00380         t1 =
00381             Td0[GETBYTE(s1, 3)] ^
00382             Td1[GETBYTE(s0, 2)] ^
00383             Td2[GETBYTE(s3, 1)] ^
00384             Td3[GETBYTE(s2, 0)] ^
00385             rk[5];
00386         t2 =
00387             Td0[GETBYTE(s2, 3)] ^
00388             Td1[GETBYTE(s1, 2)] ^
00389             Td2[GETBYTE(s0, 1)] ^
00390             Td3[GETBYTE(s3, 0)] ^
00391             rk[6];
00392         t3 =
00393             Td0[GETBYTE(s3, 3)] ^
00394             Td1[GETBYTE(s2, 2)] ^
00395             Td2[GETBYTE(s1, 1)] ^
00396             Td3[GETBYTE(s0, 0)] ^
00397             rk[7];
00398 
00399         rk += 8;
00400     } while (--r);
00401 
00402         // timing attack countermeasure. see comments at top for more details
00403         u = 0;
00404         for (i=0; i<sizeof(Sd)/4; i+=CRYPTOPP_L1_CACHE_LINE_SIZE)
00405                 u &= (((word32*)Sd)[i+0*s_lineSizeDiv4] & ((word32*)Sd)[i+2*s_lineSizeDiv4]) & (((word32*)Sd)[i+1*s_lineSizeDiv4] & ((word32*)Sd)[i+3*s_lineSizeDiv4]);
00406         t0 |= u; t1 |= u; t2 |= u; t3 |= u;
00407 
00408         word32 tbw[4];
00409         byte *const tempBlock = (byte *)tbw;
00410         word32 *const obw = (word32 *)outBlock;
00411         const word32 *const xbw = (const word32 *)xorBlock;
00412 
00413         // last round
00414         tempBlock[0] = Sd[GETBYTE(t0, 3)];
00415         tempBlock[1] = Sd[GETBYTE(t3, 2)];
00416         tempBlock[2] = Sd[GETBYTE(t2, 1)];
00417         tempBlock[3] = Sd[GETBYTE(t1, 0)];
00418         tempBlock[4] = Sd[GETBYTE(t1, 3)];
00419         tempBlock[5] = Sd[GETBYTE(t0, 2)];
00420         tempBlock[6] = Sd[GETBYTE(t3, 1)];
00421         tempBlock[7] = Sd[GETBYTE(t2, 0)];
00422         tempBlock[8] = Sd[GETBYTE(t2, 3)];
00423         tempBlock[9] = Sd[GETBYTE(t1, 2)];
00424         tempBlock[10] = Sd[GETBYTE(t0, 1)];
00425         tempBlock[11] = Sd[GETBYTE(t3, 0)];
00426         tempBlock[12] = Sd[GETBYTE(t3, 3)];
00427         tempBlock[13] = Sd[GETBYTE(t2, 2)];
00428         tempBlock[14] = Sd[GETBYTE(t1, 1)];
00429         tempBlock[15] = Sd[GETBYTE(t0, 0)];
00430 
00431         if (xbw)
00432         {
00433                 obw[0] = tbw[0] ^ xbw[0] ^ rk[0];
00434                 obw[1] = tbw[1] ^ xbw[1] ^ rk[1];
00435                 obw[2] = tbw[2] ^ xbw[2] ^ rk[2];
00436                 obw[3] = tbw[3] ^ xbw[3] ^ rk[3];
00437         }
00438         else
00439         {
00440                 obw[0] = tbw[0] ^ rk[0];
00441                 obw[1] = tbw[1] ^ rk[1];
00442                 obw[2] = tbw[2] ^ rk[2];
00443                 obw[3] = tbw[3] ^ rk[3];
00444         }
00445 }
00446 
00447 NAMESPACE_END
00448 
00449 #endif

Generated on Thu Nov 23 15:57:44 2006 for Crypto++ by  doxygen 1.5.1-p1