diff options
Diffstat (limited to 'crypto/src/crypto/engines/AesLightEngine.cs')
-rw-r--r-- | crypto/src/crypto/engines/AesLightEngine.cs | 441 |
1 files changed, 441 insertions, 0 deletions
diff --git a/crypto/src/crypto/engines/AesLightEngine.cs b/crypto/src/crypto/engines/AesLightEngine.cs new file mode 100644 index 000000000..828ceaa33 --- /dev/null +++ b/crypto/src/crypto/engines/AesLightEngine.cs @@ -0,0 +1,441 @@ +using System; +using System.Diagnostics; + +using Org.BouncyCastle.Crypto.Parameters; +using Org.BouncyCastle.Crypto.Utilities; + +namespace Org.BouncyCastle.Crypto.Engines +{ + /** + * an implementation of the AES (Rijndael), from FIPS-197. + * <p> + * For further details see: <a href="http://csrc.nist.gov/encryption/aes/">http://csrc.nist.gov/encryption/aes/</a>. + * + * This implementation is based on optimizations from Dr. Brian Gladman's paper and C code at + * <a href="http://fp.gladman.plus.com/cryptography_technology/rijndael/">http://fp.gladman.plus.com/cryptography_technology/rijndael/</a> + * + * There are three levels of tradeoff of speed vs memory + * Because java has no preprocessor, they are written as three separate classes from which to choose + * + * The fastest uses 8Kbytes of static tables to precompute round calculations, 4 256 word tables for encryption + * and 4 for decryption. + * + * The middle performance version uses only one 256 word table for each, for a total of 2Kbytes, + * adding 12 rotate operations per round to compute the values contained in the other tables from + * the contents of the first + * + * The slowest version uses no static tables at all and computes the values + * in each round. + * </p> + * <p> + * This file contains the slowest performance version with no static tables + * for round precomputation, but it has the smallest foot print. + * </p> + */ + public class AesLightEngine + : IBlockCipher + { + // The S box + private static readonly byte[] S = + { + 99, 124, 119, 123, 242, 107, 111, 197, + 48, 1, 103, 43, 254, 215, 171, 118, + 202, 130, 201, 125, 250, 89, 71, 240, + 173, 212, 162, 175, 156, 164, 114, 192, + 183, 253, 147, 38, 54, 63, 247, 204, + 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, + 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, + 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, + 106, 203, 190, 57, 74, 76, 88, 207, + 208, 239, 170, 251, 67, 77, 51, 133, + 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, + 188, 182, 218, 33, 16, 255, 243, 210, + 205, 12, 19, 236, 95, 151, 68, 23, + 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, + 70, 238, 184, 20, 222, 94, 11, 219, + 224, 50, 58, 10, 73, 6, 36, 92, + 194, 211, 172, 98, 145, 149, 228, 121, + 231, 200, 55, 109, 141, 213, 78, 169, + 108, 86, 244, 234, 101, 122, 174, 8, + 186, 120, 37, 46, 28, 166, 180, 198, + 232, 221, 116, 31, 75, 189, 139, 138, + 112, 62, 181, 102, 72, 3, 246, 14, + 97, 53, 87, 185, 134, 193, 29, 158, + 225, 248, 152, 17, 105, 217, 142, 148, + 155, 30, 135, 233, 206, 85, 40, 223, + 140, 161, 137, 13, 191, 230, 66, 104, + 65, 153, 45, 15, 176, 84, 187, 22, + }; + + // The inverse S-box + private static readonly byte[] Si = + { + 82, 9, 106, 213, 48, 54, 165, 56, + 191, 64, 163, 158, 129, 243, 215, 251, + 124, 227, 57, 130, 155, 47, 255, 135, + 52, 142, 67, 68, 196, 222, 233, 203, + 84, 123, 148, 50, 166, 194, 35, 61, + 238, 76, 149, 11, 66, 250, 195, 78, + 8, 46, 161, 102, 40, 217, 36, 178, + 118, 91, 162, 73, 109, 139, 209, 37, + 114, 248, 246, 100, 134, 104, 152, 22, + 212, 164, 92, 204, 93, 101, 182, 146, + 108, 112, 72, 80, 253, 237, 185, 218, + 94, 21, 70, 87, 167, 141, 157, 132, + 144, 216, 171, 0, 140, 188, 211, 10, + 247, 228, 88, 5, 184, 179, 69, 6, + 208, 44, 30, 143, 202, 63, 15, 2, + 193, 175, 189, 3, 1, 19, 138, 107, + 58, 145, 17, 65, 79, 103, 220, 234, + 151, 242, 207, 206, 240, 180, 230, 115, + 150, 172, 116, 34, 231, 173, 53, 133, + 226, 249, 55, 232, 28, 117, 223, 110, + 71, 241, 26, 113, 29, 41, 197, 137, + 111, 183, 98, 14, 170, 24, 190, 27, + 252, 86, 62, 75, 198, 210, 121, 32, + 154, 219, 192, 254, 120, 205, 90, 244, + 31, 221, 168, 51, 136, 7, 199, 49, + 177, 18, 16, 89, 39, 128, 236, 95, + 96, 81, 127, 169, 25, 181, 74, 13, + 45, 229, 122, 159, 147, 201, 156, 239, + 160, 224, 59, 77, 174, 42, 245, 176, + 200, 235, 187, 60, 131, 83, 153, 97, + 23, 43, 4, 126, 186, 119, 214, 38, + 225, 105, 20, 99, 85, 33, 12, 125, + }; + + // vector used in calculating key schedule (powers of x in GF(256)) + private static readonly byte[] rcon = + { + 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a, + 0x2f, 0x5e, 0xbc, 0x63, 0xc6, 0x97, 0x35, 0x6a, 0xd4, 0xb3, 0x7d, 0xfa, 0xef, 0xc5, 0x91 + }; + + private static uint Shift(uint r, int shift) + { + return (r >> shift) | (r << (32 - shift)); + } + + /* multiply four bytes in GF(2^8) by 'x' {02} in parallel */ + + private const uint m1 = 0x80808080; + private const uint m2 = 0x7f7f7f7f; + private const uint m3 = 0x0000001b; + + private static uint FFmulX(uint x) + { + return ((x & m2) << 1) ^ (((x & m1) >> 7) * m3); + } + + /* + The following defines provide alternative definitions of FFmulX that might + give improved performance if a fast 32-bit multiply is not available. + + private int FFmulX(int x) { int u = x & m1; u |= (u >> 1); return ((x & m2) << 1) ^ ((u >>> 3) | (u >>> 6)); } + private static final int m4 = 0x1b1b1b1b; + private int FFmulX(int x) { int u = x & m1; return ((x & m2) << 1) ^ ((u - (u >>> 7)) & m4); } + + */ + + private static uint Mcol(uint x) + { + uint f2 = FFmulX(x); + return f2 ^ Shift(x ^ f2, 8) ^ Shift(x, 16) ^ Shift(x, 24); + } + + private static uint Inv_Mcol(uint x) + { + uint f2 = FFmulX(x); + uint f4 = FFmulX(f2); + uint f8 = FFmulX(f4); + uint f9 = x ^ f8; + + return f2 ^ f4 ^ f8 ^ Shift(f2 ^ f9, 8) ^ Shift(f4 ^ f9, 16) ^ Shift(f9, 24); + } + + private static uint SubWord(uint x) + { + return (uint)S[x&255] + | (((uint)S[(x>>8)&255]) << 8) + | (((uint)S[(x>>16)&255]) << 16) + | (((uint)S[(x>>24)&255]) << 24); + } + + /** + * Calculate the necessary round keys + * The number of calculations depends on key size and block size + * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits + * This code is written assuming those are the only possible values + */ + private uint[][] GenerateWorkingKey( + byte[] key, + bool forEncryption) + { + int KC = key.Length / 4; // key length in words + int t; + + if ((KC != 4) && (KC != 6) && (KC != 8)) + throw new ArgumentException("Key length not 128/192/256 bits."); + + ROUNDS = KC + 6; // This is not always true for the generalized Rijndael that allows larger block sizes + + uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block + for (int i = 0; i <= ROUNDS; ++i) + { + W[i] = new uint[4]; + } + + // + // copy the key into the round key array + // + + t = 0; + for (int i = 0; i < key.Length; t++) + { + W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i); + i+=4; + } + + // + // while not enough round key material calculated + // calculate new values + // + int k = (ROUNDS + 1) << 2; + for (int i = KC; (i < k); i++) + { + uint temp = W[(i-1)>>2][(i-1)&3]; + if ((i % KC) == 0) + { + temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1]; + } + else if ((KC > 6) && ((i % KC) == 4)) + { + temp = SubWord(temp); + } + + W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp; + } + + if (!forEncryption) + { + for (int j = 1; j < ROUNDS; j++) + { + uint[] w = W[j]; + for (int i = 0; i < 4; i++) + { + w[i] = Inv_Mcol(w[i]); + } + } + } + + return W; + } + + private int ROUNDS; + private uint[][] WorkingKey; + private uint C0, C1, C2, C3; + private bool forEncryption; + + private const int BLOCK_SIZE = 16; + + /** + * default constructor - 128 bit block size. + */ + public AesLightEngine() + { + } + + /** + * initialise an AES cipher. + * + * @param forEncryption whether or not we are for encryption. + * @param parameters the parameters required to set up the cipher. + * @exception ArgumentException if the parameters argument is + * inappropriate. + */ + public void Init( + bool forEncryption, + ICipherParameters parameters) + { + KeyParameter keyParameter = parameters as KeyParameter; + + if (keyParameter == null) + throw new ArgumentException("invalid parameter passed to AES init - " + parameters.GetType().Name); + + WorkingKey = GenerateWorkingKey(keyParameter.GetKey(), forEncryption); + + this.forEncryption = forEncryption; + } + + public string AlgorithmName + { + get { return "AES"; } + } + + public bool IsPartialBlockOkay + { + get { return false; } + } + + public int GetBlockSize() + { + return BLOCK_SIZE; + } + + public int ProcessBlock( + byte[] input, + int inOff, + byte[] output, + int outOff) + { + if (WorkingKey == null) + { + throw new InvalidOperationException("AES engine not initialised"); + } + + if ((inOff + (32 / 2)) > input.Length) + { + throw new DataLengthException("input buffer too short"); + } + + if ((outOff + (32 / 2)) > output.Length) + { + throw new DataLengthException("output buffer too short"); + } + + if (forEncryption) + { + UnPackBlock(input, inOff); + EncryptBlock(); + PackBlock(output, outOff); + } + else + { + UnPackBlock(input, inOff); + DecryptBlock(); + PackBlock(output, outOff); + } + + return BLOCK_SIZE; + } + + public void Reset() + { + } + + private void UnPackBlock( + byte[] bytes, + int off) + { + C0 = Pack.LE_To_UInt32(bytes, off); + C1 = Pack.LE_To_UInt32(bytes, off + 4); + C2 = Pack.LE_To_UInt32(bytes, off + 8); + C3 = Pack.LE_To_UInt32(bytes, off + 12); + } + + private void PackBlock( + byte[] bytes, + int off) + { + Pack.UInt32_To_LE(C0, bytes, off); + Pack.UInt32_To_LE(C1, bytes, off + 4); + Pack.UInt32_To_LE(C2, bytes, off + 8); + Pack.UInt32_To_LE(C3, bytes, off + 12); + } + + private void EncryptBlock() + { + uint[][] KW = WorkingKey; + + int r = 0; + uint[] kw = KW[r]; + + C0 ^= kw[0]; + C1 ^= kw[1]; + C2 ^= kw[2]; + C3 ^= kw[3]; + + uint r0, r1, r2, r3; + + while (r < (ROUNDS - 2)) + { + kw = KW[++r]; + r0 = Mcol((uint)S[C0 & 255] ^ (((uint)S[(C1 >> 8) & 255]) << 8) ^ (((uint)S[(C2 >> 16) & 255]) << 16) ^ (((uint)S[(C3 >> 24) & 255]) << 24)) ^ kw[0]; + r1 = Mcol((uint)S[C1 & 255] ^ (((uint)S[(C2 >> 8) & 255]) << 8) ^ (((uint)S[(C3 >> 16) & 255]) << 16) ^ (((uint)S[(C0 >> 24) & 255]) << 24)) ^ kw[1]; + r2 = Mcol((uint)S[C2 & 255] ^ (((uint)S[(C3 >> 8) & 255]) << 8) ^ (((uint)S[(C0 >> 16) & 255]) << 16) ^ (((uint)S[(C1 >> 24) & 255]) << 24)) ^ kw[2]; + r3 = Mcol((uint)S[C3 & 255] ^ (((uint)S[(C0 >> 8) & 255]) << 8) ^ (((uint)S[(C1 >> 16) & 255]) << 16) ^ (((uint)S[(C2 >> 24) & 255]) << 24)) ^ kw[3]; + kw = KW[++r]; + C0 = Mcol((uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)S[(r2 >> 16) & 255]) << 16) ^ (((uint)S[(r3 >> 24) & 255]) << 24)) ^ kw[0]; + C1 = Mcol((uint)S[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)S[(r0 >> 24) & 255]) << 24)) ^ kw[1]; + C2 = Mcol((uint)S[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[(r1 >> 24) & 255]) << 24)) ^ kw[2]; + C3 = Mcol((uint)S[r3 & 255] ^ (((uint)S[(r0 >> 8) & 255]) << 8) ^ (((uint)S[(r1 >> 16) & 255]) << 16) ^ (((uint)S[(r2 >> 24) & 255]) << 24)) ^ kw[3]; + } + + kw = KW[++r]; + r0 = Mcol((uint)S[C0 & 255] ^ (((uint)S[(C1 >> 8) & 255]) << 8) ^ (((uint)S[(C2 >> 16) & 255]) << 16) ^ (((uint)S[(C3 >> 24) & 255]) << 24)) ^ kw[0]; + r1 = Mcol((uint)S[C1 & 255] ^ (((uint)S[(C2 >> 8) & 255]) << 8) ^ (((uint)S[(C3 >> 16) & 255]) << 16) ^ (((uint)S[(C0 >> 24) & 255]) << 24)) ^ kw[1]; + r2 = Mcol((uint)S[C2 & 255] ^ (((uint)S[(C3 >> 8) & 255]) << 8) ^ (((uint)S[(C0 >> 16) & 255]) << 16) ^ (((uint)S[(C1 >> 24) & 255]) << 24)) ^ kw[2]; + r3 = Mcol((uint)S[C3 & 255] ^ (((uint)S[(C0 >> 8) & 255]) << 8) ^ (((uint)S[(C1 >> 16) & 255]) << 16) ^ (((uint)S[(C2 >> 24) & 255]) << 24)) ^ kw[3]; + + // the final round is a simple function of S + + kw = KW[++r]; + C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)S[(r2 >> 16) & 255]) << 16) ^ (((uint)S[(r3 >> 24) & 255]) << 24) ^ kw[0]; + C1 = (uint)S[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)S[(r0 >> 24) & 255]) << 24) ^ kw[1]; + C2 = (uint)S[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[(r1 >> 24) & 255]) << 24) ^ kw[2]; + C3 = (uint)S[r3 & 255] ^ (((uint)S[(r0 >> 8) & 255]) << 8) ^ (((uint)S[(r1 >> 16) & 255]) << 16) ^ (((uint)S[(r2 >> 24) & 255]) << 24) ^ kw[3]; + + Debug.Assert(r == ROUNDS); + } + + private void DecryptBlock() + { + uint[][] KW = WorkingKey; + + int r = ROUNDS; + uint[] kw = KW[r]; + + C0 ^= kw[0]; + C1 ^= kw[1]; + C2 ^= kw[2]; + C3 ^= kw[3]; + + uint r0, r1, r2, r3; + + while (r > 2) + { + kw = KW[--r]; + r0 = Inv_Mcol((uint)Si[C0 & 255] ^ (((uint)Si[(C3 >> 8) & 255]) << 8) ^ (((uint)Si[(C2 >> 16) & 255]) << 16) ^ ((uint)Si[(C1 >> 24) & 255] << 24)) ^ kw[0]; + r1 = Inv_Mcol((uint)Si[C1 & 255] ^ (((uint)Si[(C0 >> 8) & 255]) << 8) ^ (((uint)Si[(C3 >> 16) & 255]) << 16) ^ ((uint)Si[(C2 >> 24) & 255] << 24)) ^ kw[1]; + r2 = Inv_Mcol((uint)Si[C2 & 255] ^ (((uint)Si[(C1 >> 8) & 255]) << 8) ^ (((uint)Si[(C0 >> 16) & 255]) << 16) ^ ((uint)Si[(C3 >> 24) & 255] << 24)) ^ kw[2]; + r3 = Inv_Mcol((uint)Si[C3 & 255] ^ (((uint)Si[(C2 >> 8) & 255]) << 8) ^ (((uint)Si[(C1 >> 16) & 255]) << 16) ^ ((uint)Si[(C0 >> 24) & 255] << 24)) ^ kw[3]; + kw = KW[--r]; + C0 = Inv_Mcol((uint)Si[r0 & 255] ^ (((uint)Si[(r3 >> 8) & 255]) << 8) ^ (((uint)Si[(r2 >> 16) & 255]) << 16) ^ ((uint)Si[(r1 >> 24) & 255] << 24)) ^ kw[0]; + C1 = Inv_Mcol((uint)Si[r1 & 255] ^ (((uint)Si[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ ((uint)Si[(r2 >> 24) & 255] << 24)) ^ kw[1]; + C2 = Inv_Mcol((uint)Si[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ ((uint)Si[(r3 >> 24) & 255] << 24)) ^ kw[2]; + C3 = Inv_Mcol((uint)Si[r3 & 255] ^ (((uint)Si[(r2 >> 8) & 255]) << 8) ^ (((uint)Si[(r1 >> 16) & 255]) << 16) ^ ((uint)Si[(r0 >> 24) & 255] << 24)) ^ kw[3]; + } + + kw = KW[--r]; + r0 = Inv_Mcol((uint)Si[C0 & 255] ^ (((uint)Si[(C3 >> 8) & 255]) << 8) ^ (((uint)Si[(C2 >> 16) & 255]) << 16) ^ ((uint)Si[(C1 >> 24) & 255] << 24)) ^ kw[0]; + r1 = Inv_Mcol((uint)Si[C1 & 255] ^ (((uint)Si[(C0 >> 8) & 255]) << 8) ^ (((uint)Si[(C3 >> 16) & 255]) << 16) ^ ((uint)Si[(C2 >> 24) & 255] << 24)) ^ kw[1]; + r2 = Inv_Mcol((uint)Si[C2 & 255] ^ (((uint)Si[(C1 >> 8) & 255]) << 8) ^ (((uint)Si[(C0 >> 16) & 255]) << 16) ^ ((uint)Si[(C3 >> 24) & 255] << 24)) ^ kw[2]; + r3 = Inv_Mcol((uint)Si[C3 & 255] ^ (((uint)Si[(C2 >> 8) & 255]) << 8) ^ (((uint)Si[(C1 >> 16) & 255]) << 16) ^ ((uint)Si[(C0 >> 24) & 255] << 24)) ^ kw[3]; + + // the final round's table is a simple function of Si + + kw = KW[--r]; + C0 = (uint)Si[r0 & 255] ^ (((uint)Si[(r3 >> 8) & 255]) << 8) ^ (((uint)Si[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[(r1 >> 24) & 255]) << 24) ^ kw[0]; + C1 = (uint)Si[r1 & 255] ^ (((uint)Si[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)Si[(r2 >> 24) & 255]) << 24) ^ kw[1]; + C2 = (uint)Si[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)Si[(r3 >> 24) & 255]) << 24) ^ kw[2]; + C3 = (uint)Si[r3 & 255] ^ (((uint)Si[(r2 >> 8) & 255]) << 8) ^ (((uint)Si[(r1 >> 16) & 255]) << 16) ^ (((uint)Si[(r0 >> 24) & 255]) << 24) ^ kw[3]; + + Debug.Assert(r == 0); + } + } +} |