diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2015-10-30 18:27:46 +0700 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2015-10-30 18:27:46 +0700 |
commit | d6a972a9f64c3f671c80376e2ac5e5758b757ce4 (patch) | |
tree | f9618e99b70e721f59431ece7a7901b7e9f016a2 | |
parent | Rewrite (block) update for improved performance (diff) | |
download | BouncyCastle.NET-ed25519-d6a972a9f64c3f671c80376e2ac5e5758b757ce4.tar.xz |
Improve performance of AES key schedule
-rw-r--r-- | crypto/src/crypto/engines/AesEngine.cs | 135 | ||||
-rw-r--r-- | crypto/src/crypto/engines/AesFastEngine.cs | 129 | ||||
-rw-r--r-- | crypto/src/crypto/engines/AesLightEngine.cs | 139 |
3 files changed, 310 insertions, 93 deletions
diff --git a/crypto/src/crypto/engines/AesEngine.cs b/crypto/src/crypto/engines/AesEngine.cs index 164c43ee9..c84f4a964 100644 --- a/crypto/src/crypto/engines/AesEngine.cs +++ b/crypto/src/crypto/engines/AesEngine.cs @@ -288,17 +288,14 @@ namespace Org.BouncyCastle.Crypto.Engines * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits * This code is written assuming those are the only possible values */ - private uint[][] GenerateWorkingKey( - byte[] key, - bool forEncryption) + private uint[][] GenerateWorkingKey(byte[] key, bool forEncryption) { - int KC = key.Length / 4; // key length in words - int t; - - if ((KC != 4) && (KC != 6) && (KC != 8)) + int keyLen = key.Length; + if (keyLen < 16 || keyLen > 32 || (keyLen & 7) != 0) throw new ArgumentException("Key length not 128/192/256 bits."); - ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes + int KC = keyLen >> 2; + this.ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block for (int i = 0; i <= ROUNDS; ++i) @@ -306,35 +303,109 @@ namespace Org.BouncyCastle.Crypto.Engines W[i] = new uint[4]; } - // - // copy the key into the round key array - // - - t = 0; - for (int i = 0; i < key.Length; t++) - { - W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i); - i+=4; - } - - // - // while not enough round key material calculated - // calculate new values - // - int k = (ROUNDS + 1) << 2; - for (int i = KC; (i < k); i++) + switch (KC) { - uint temp = W[(i-1)>>2][(i-1)&3]; - if ((i % KC) == 0) + case 4: { - temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1]; - } - else if ((KC > 6) && ((i % KC) == 4)) + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + + for (int i = 1; i <= 10; ++i) + { + uint u = SubWord(Shift(t3, 8)) ^ rcon[i - 1]; + t0 ^= u; W[i][0] = t0; + t1 ^= t0; W[i][1] = t1; + t2 ^= t1; W[i][2] = t2; + t3 ^= t2; W[i][3] = t3; + } + + break; + } + case 6: { - temp = SubWord(temp); + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4; + uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5; + + uint rcon = 1; + uint u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[1][2] = t0; + t1 ^= t0; W[1][3] = t1; + t2 ^= t1; W[2][0] = t2; + t3 ^= t2; W[2][1] = t3; + t4 ^= t3; W[2][2] = t4; + t5 ^= t4; W[2][3] = t5; + + for (int i = 3; i < 12; i += 3) + { + u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i ][0] = t0; + t1 ^= t0; W[i ][1] = t1; + t2 ^= t1; W[i ][2] = t2; + t3 ^= t2; W[i ][3] = t3; + t4 ^= t3; W[i + 1][0] = t4; + t5 ^= t4; W[i + 1][1] = t5; + u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i + 1][2] = t0; + t1 ^= t0; W[i + 1][3] = t1; + t2 ^= t1; W[i + 2][0] = t2; + t3 ^= t2; W[i + 2][1] = t3; + t4 ^= t3; W[i + 2][2] = t4; + t5 ^= t4; W[i + 2][3] = t5; + } + + u = SubWord(Shift(t5, 8)) ^ rcon; + t0 ^= u; W[12][0] = t0; + t1 ^= t0; W[12][1] = t1; + t2 ^= t1; W[12][2] = t2; + t3 ^= t2; W[12][3] = t3; + + break; } + case 8: + { + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4; + uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5; + uint t6 = Pack.LE_To_UInt32(key, 24); W[1][2] = t6; + uint t7 = Pack.LE_To_UInt32(key, 28); W[1][3] = t7; + + uint u, rcon = 1; + + for (int i = 2; i < 14; i += 2) + { + u = SubWord(Shift(t7, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i ][0] = t0; + t1 ^= t0; W[i ][1] = t1; + t2 ^= t1; W[i ][2] = t2; + t3 ^= t2; W[i ][3] = t3; + u = SubWord(t3); + t4 ^= u; W[i + 1][0] = t4; + t5 ^= t4; W[i + 1][1] = t5; + t6 ^= t5; W[i + 1][2] = t6; + t7 ^= t6; W[i + 1][3] = t7; + } + + u = SubWord(Shift(t7, 8)) ^ rcon; + t0 ^= u; W[14][0] = t0; + t1 ^= t0; W[14][1] = t1; + t2 ^= t1; W[14][2] = t2; + t3 ^= t2; W[14][3] = t3; - W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp; + break; + } + default: + { + throw new InvalidOperationException("Should never get here"); + } } if (!forEncryption) diff --git a/crypto/src/crypto/engines/AesFastEngine.cs b/crypto/src/crypto/engines/AesFastEngine.cs index 38ce1a946..18367a324 100644 --- a/crypto/src/crypto/engines/AesFastEngine.cs +++ b/crypto/src/crypto/engines/AesFastEngine.cs @@ -624,16 +624,14 @@ namespace Org.BouncyCastle.Crypto.Engines * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits * This code is written assuming those are the only possible values */ - private uint[][] GenerateWorkingKey( - byte[] key, - bool forEncryption) + private uint[][] GenerateWorkingKey(byte[] key, bool forEncryption) { - int KC = key.Length / 4; // key length in words - - if (((KC != 4) && (KC != 6) && (KC != 8)) || ((KC * 4) != key.Length)) + int keyLen = key.Length; + if (keyLen < 16 || keyLen > 32 || (keyLen & 7) != 0) throw new ArgumentException("Key length not 128/192/256 bits."); - ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes + int KC = keyLen >> 2; + this.ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block for (int i = 0; i <= ROUNDS; ++i) @@ -641,32 +639,109 @@ namespace Org.BouncyCastle.Crypto.Engines W[i] = new uint[4]; } - // - // copy the key into the round key array - // - - int t = 0; - for (int i = 0; i < key.Length; t++) + switch (KC) + { + case 4: { - W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i); - i+=4; + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + + for (int i = 1; i <= 10; ++i) + { + uint u = SubWord(Shift(t3, 8)) ^ rcon[i - 1]; + t0 ^= u; W[i][0] = t0; + t1 ^= t0; W[i][1] = t1; + t2 ^= t1; W[i][2] = t2; + t3 ^= t2; W[i][3] = t3; + } + + break; } + case 6: + { + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4; + uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5; + + uint rcon = 1; + uint u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[1][2] = t0; + t1 ^= t0; W[1][3] = t1; + t2 ^= t1; W[2][0] = t2; + t3 ^= t2; W[2][1] = t3; + t4 ^= t3; W[2][2] = t4; + t5 ^= t4; W[2][3] = t5; + + for (int i = 3; i < 12; i += 3) + { + u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i ][0] = t0; + t1 ^= t0; W[i ][1] = t1; + t2 ^= t1; W[i ][2] = t2; + t3 ^= t2; W[i ][3] = t3; + t4 ^= t3; W[i + 1][0] = t4; + t5 ^= t4; W[i + 1][1] = t5; + u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i + 1][2] = t0; + t1 ^= t0; W[i + 1][3] = t1; + t2 ^= t1; W[i + 2][0] = t2; + t3 ^= t2; W[i + 2][1] = t3; + t4 ^= t3; W[i + 2][2] = t4; + t5 ^= t4; W[i + 2][3] = t5; + } + + u = SubWord(Shift(t5, 8)) ^ rcon; + t0 ^= u; W[12][0] = t0; + t1 ^= t0; W[12][1] = t1; + t2 ^= t1; W[12][2] = t2; + t3 ^= t2; W[12][3] = t3; - // - // while not enough round key material calculated - // calculate new values - // - int k = (ROUNDS + 1) << 2; - for (int i = KC; (i < k); i++) + break; + } + case 8: { - uint temp = W[(i-1)>>2][(i-1)&3]; - if ((i % KC) == 0) { - temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1]; - } else if ((KC > 6) && ((i % KC) == 4)) { - temp = SubWord(temp); + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4; + uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5; + uint t6 = Pack.LE_To_UInt32(key, 24); W[1][2] = t6; + uint t7 = Pack.LE_To_UInt32(key, 28); W[1][3] = t7; + + uint u, rcon = 1; + + for (int i = 2; i < 14; i += 2) + { + u = SubWord(Shift(t7, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i ][0] = t0; + t1 ^= t0; W[i ][1] = t1; + t2 ^= t1; W[i ][2] = t2; + t3 ^= t2; W[i ][3] = t3; + u = SubWord(t3); + t4 ^= u; W[i + 1][0] = t4; + t5 ^= t4; W[i + 1][1] = t5; + t6 ^= t5; W[i + 1][2] = t6; + t7 ^= t6; W[i + 1][3] = t7; } - W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp; + u = SubWord(Shift(t7, 8)) ^ rcon; + t0 ^= u; W[14][0] = t0; + t1 ^= t0; W[14][1] = t1; + t2 ^= t1; W[14][2] = t2; + t3 ^= t2; W[14][3] = t3; + + break; + } + default: + { + throw new InvalidOperationException("Should never get here"); + } } if (!forEncryption) diff --git a/crypto/src/crypto/engines/AesLightEngine.cs b/crypto/src/crypto/engines/AesLightEngine.cs index a42b34971..a48fa5857 100644 --- a/crypto/src/crypto/engines/AesLightEngine.cs +++ b/crypto/src/crypto/engines/AesLightEngine.cs @@ -185,17 +185,14 @@ namespace Org.BouncyCastle.Crypto.Engines * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits * This code is written assuming those are the only possible values */ - private uint[][] GenerateWorkingKey( - byte[] key, - bool forEncryption) + private uint[][] GenerateWorkingKey(byte[] key, bool forEncryption) { - int KC = key.Length / 4; // key length in words - int t; - - if ((KC != 4) && (KC != 6) && (KC != 8)) + int keyLen = key.Length; + if (keyLen < 16 || keyLen > 32 || (keyLen & 7) != 0) throw new ArgumentException("Key length not 128/192/256 bits."); - ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes + int KC = keyLen >> 2; + this.ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block for (int i = 0; i <= ROUNDS; ++i) @@ -203,40 +200,114 @@ namespace Org.BouncyCastle.Crypto.Engines W[i] = new uint[4]; } - // - // copy the key into the round key array - // - - t = 0; - for (int i = 0; i < key.Length; t++) - { - W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i); - i+=4; - } - - // - // while not enough round key material calculated - // calculate new values - // - int k = (ROUNDS + 1) << 2; - for (int i = KC; (i < k); i++) + switch (KC) { - uint temp = W[(i-1)>>2][(i-1)&3]; - if ((i % KC) == 0) + case 4: { - temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1]; - } - else if ((KC > 6) && ((i % KC) == 4)) + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + + for (int i = 1; i <= 10; ++i) + { + uint u = SubWord(Shift(t3, 8)) ^ rcon[i - 1]; + t0 ^= u; W[i][0] = t0; + t1 ^= t0; W[i][1] = t1; + t2 ^= t1; W[i][2] = t2; + t3 ^= t2; W[i][3] = t3; + } + + break; + } + case 6: { - temp = SubWord(temp); + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4; + uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5; + + uint rcon = 1; + uint u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[1][2] = t0; + t1 ^= t0; W[1][3] = t1; + t2 ^= t1; W[2][0] = t2; + t3 ^= t2; W[2][1] = t3; + t4 ^= t3; W[2][2] = t4; + t5 ^= t4; W[2][3] = t5; + + for (int i = 3; i < 12; i += 3) + { + u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i ][0] = t0; + t1 ^= t0; W[i ][1] = t1; + t2 ^= t1; W[i ][2] = t2; + t3 ^= t2; W[i ][3] = t3; + t4 ^= t3; W[i + 1][0] = t4; + t5 ^= t4; W[i + 1][1] = t5; + u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i + 1][2] = t0; + t1 ^= t0; W[i + 1][3] = t1; + t2 ^= t1; W[i + 2][0] = t2; + t3 ^= t2; W[i + 2][1] = t3; + t4 ^= t3; W[i + 2][2] = t4; + t5 ^= t4; W[i + 2][3] = t5; + } + + u = SubWord(Shift(t5, 8)) ^ rcon; + t0 ^= u; W[12][0] = t0; + t1 ^= t0; W[12][1] = t1; + t2 ^= t1; W[12][2] = t2; + t3 ^= t2; W[12][3] = t3; + + break; } + case 8: + { + uint t0 = Pack.LE_To_UInt32(key, 0); W[0][0] = t0; + uint t1 = Pack.LE_To_UInt32(key, 4); W[0][1] = t1; + uint t2 = Pack.LE_To_UInt32(key, 8); W[0][2] = t2; + uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3; + uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4; + uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5; + uint t6 = Pack.LE_To_UInt32(key, 24); W[1][2] = t6; + uint t7 = Pack.LE_To_UInt32(key, 28); W[1][3] = t7; + + uint u, rcon = 1; + + for (int i = 2; i < 14; i += 2) + { + u = SubWord(Shift(t7, 8)) ^ rcon; rcon <<= 1; + t0 ^= u; W[i ][0] = t0; + t1 ^= t0; W[i ][1] = t1; + t2 ^= t1; W[i ][2] = t2; + t3 ^= t2; W[i ][3] = t3; + u = SubWord(t3); + t4 ^= u; W[i + 1][0] = t4; + t5 ^= t4; W[i + 1][1] = t5; + t6 ^= t5; W[i + 1][2] = t6; + t7 ^= t6; W[i + 1][3] = t7; + } + + u = SubWord(Shift(t7, 8)) ^ rcon; + t0 ^= u; W[14][0] = t0; + t1 ^= t0; W[14][1] = t1; + t2 ^= t1; W[14][2] = t2; + t3 ^= t2; W[14][3] = t3; - W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp; + break; + } + default: + { + throw new InvalidOperationException("Should never get here"); + } } - if (!forEncryption) + if (!forEncryption) { - for (int j = 1; j < ROUNDS; j++) + for (int j = 1; j < ROUNDS; j++) { uint[] w = W[j]; for (int i = 0; i < 4; i++) |