summary refs log tree commit diff
path: root/crypto
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2015-10-30 18:27:46 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2015-10-30 18:27:46 +0700
commitd6a972a9f64c3f671c80376e2ac5e5758b757ce4 (patch)
treef9618e99b70e721f59431ece7a7901b7e9f016a2 /crypto
parentRewrite (block) update for improved performance (diff)
downloadBouncyCastle.NET-ed25519-d6a972a9f64c3f671c80376e2ac5e5758b757ce4.tar.xz
Improve performance of AES key schedule
Diffstat (limited to 'crypto')
-rw-r--r--crypto/src/crypto/engines/AesEngine.cs135
-rw-r--r--crypto/src/crypto/engines/AesFastEngine.cs129
-rw-r--r--crypto/src/crypto/engines/AesLightEngine.cs139
3 files changed, 310 insertions, 93 deletions
diff --git a/crypto/src/crypto/engines/AesEngine.cs b/crypto/src/crypto/engines/AesEngine.cs
index 164c43ee9..c84f4a964 100644
--- a/crypto/src/crypto/engines/AesEngine.cs
+++ b/crypto/src/crypto/engines/AesEngine.cs
@@ -288,17 +288,14 @@ namespace Org.BouncyCastle.Crypto.Engines
         * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits
         * This code is written assuming those are the only possible values
         */
-        private uint[][] GenerateWorkingKey(
-            byte[]	key,
-            bool	forEncryption)
+        private uint[][] GenerateWorkingKey(byte[] key, bool forEncryption)
         {
-            int KC = key.Length / 4;  // key length in words
-            int t;
-
-            if ((KC != 4) && (KC != 6) && (KC != 8)) 
+            int keyLen = key.Length;
+            if (keyLen < 16 || keyLen > 32 || (keyLen & 7) != 0)
                 throw new ArgumentException("Key length not 128/192/256 bits.");
 
-            ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
+            int KC = keyLen >> 2;
+            this.ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
 
             uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block
             for (int i = 0; i <= ROUNDS; ++i)
@@ -306,35 +303,109 @@ namespace Org.BouncyCastle.Crypto.Engines
                 W[i] = new uint[4];
             }
 
-            //
-            // copy the key into the round key array
-            //
-
-            t = 0;
-            for (int i = 0; i < key.Length; t++)
-            {
-                W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i);
-                i+=4;
-            }
-
-            //
-            // while not enough round key material calculated
-            // calculate new values
-            //
-            int k = (ROUNDS + 1) << 2;
-            for (int i = KC; (i < k); i++)
+            switch (KC)
             {
-                uint temp = W[(i-1)>>2][(i-1)&3];
-                if ((i % KC) == 0) 
+                case 4:
                 {
-                    temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1];
-                } 
-                else if ((KC > 6) && ((i % KC) == 4)) 
+                    uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                    uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                    uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                    uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+
+                    for (int i = 1; i <= 10; ++i)
+                    {
+                        uint u = SubWord(Shift(t3, 8)) ^ rcon[i - 1];
+                        t0 ^= u;  W[i][0] = t0;
+                        t1 ^= t0; W[i][1] = t1;
+                        t2 ^= t1; W[i][2] = t2;
+                        t3 ^= t2; W[i][3] = t3;
+                    }
+
+                    break;
+                }
+                case 6:
                 {
-                    temp = SubWord(temp);
+                    uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                    uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                    uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                    uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+                    uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4;
+                    uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5;
+
+                    uint rcon = 1;
+                    uint u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                    t0 ^= u;  W[1][2] = t0;
+                    t1 ^= t0; W[1][3] = t1;
+                    t2 ^= t1; W[2][0] = t2;
+                    t3 ^= t2; W[2][1] = t3;
+                    t4 ^= t3; W[2][2] = t4;
+                    t5 ^= t4; W[2][3] = t5;
+
+                    for (int i = 3; i < 12; i += 3)
+                    {
+                        u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                        t0 ^= u;  W[i    ][0] = t0;
+                        t1 ^= t0; W[i    ][1] = t1;
+                        t2 ^= t1; W[i    ][2] = t2;
+                        t3 ^= t2; W[i    ][3] = t3;
+                        t4 ^= t3; W[i + 1][0] = t4;
+                        t5 ^= t4; W[i + 1][1] = t5;
+                        u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                        t0 ^= u;  W[i + 1][2] = t0;
+                        t1 ^= t0; W[i + 1][3] = t1;
+                        t2 ^= t1; W[i + 2][0] = t2;
+                        t3 ^= t2; W[i + 2][1] = t3;
+                        t4 ^= t3; W[i + 2][2] = t4;
+                        t5 ^= t4; W[i + 2][3] = t5;
+                    }
+
+                    u = SubWord(Shift(t5, 8)) ^ rcon;
+                    t0 ^= u;  W[12][0] = t0;
+                    t1 ^= t0; W[12][1] = t1;
+                    t2 ^= t1; W[12][2] = t2;
+                    t3 ^= t2; W[12][3] = t3;
+
+                    break;
                 }
+                case 8:
+                {
+                    uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                    uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                    uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                    uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+                    uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4;
+                    uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5;
+                    uint t6 = Pack.LE_To_UInt32(key, 24); W[1][2] = t6;
+                    uint t7 = Pack.LE_To_UInt32(key, 28); W[1][3] = t7;
+
+                    uint u, rcon = 1;
+
+                    for (int i = 2; i < 14; i += 2)
+                    {
+                        u = SubWord(Shift(t7, 8)) ^ rcon; rcon <<= 1;
+                        t0 ^= u;  W[i    ][0] = t0;
+                        t1 ^= t0; W[i    ][1] = t1;
+                        t2 ^= t1; W[i    ][2] = t2;
+                        t3 ^= t2; W[i    ][3] = t3;
+                        u = SubWord(t3);
+                        t4 ^= u;  W[i + 1][0] = t4;
+                        t5 ^= t4; W[i + 1][1] = t5;
+                        t6 ^= t5; W[i + 1][2] = t6;
+                        t7 ^= t6; W[i + 1][3] = t7;
+                    }
+
+                    u = SubWord(Shift(t7, 8)) ^ rcon;
+                    t0 ^= u;  W[14][0] = t0;
+                    t1 ^= t0; W[14][1] = t1;
+                    t2 ^= t1; W[14][2] = t2;
+                    t3 ^= t2; W[14][3] = t3;
 
-                W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp;
+                    break;
+                }
+                default:
+                {
+                    throw new InvalidOperationException("Should never get here");
+                }
             }
 
             if (!forEncryption)
diff --git a/crypto/src/crypto/engines/AesFastEngine.cs b/crypto/src/crypto/engines/AesFastEngine.cs
index 38ce1a946..18367a324 100644
--- a/crypto/src/crypto/engines/AesFastEngine.cs
+++ b/crypto/src/crypto/engines/AesFastEngine.cs
@@ -624,16 +624,14 @@ namespace Org.BouncyCastle.Crypto.Engines
         * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits
         * This code is written assuming those are the only possible values
         */
-        private uint[][] GenerateWorkingKey(
-            byte[]	key,
-            bool	forEncryption)
+        private uint[][] GenerateWorkingKey(byte[] key, bool forEncryption)
         {
-            int KC = key.Length / 4;  // key length in words
-
-            if (((KC != 4) && (KC != 6) && (KC != 8)) || ((KC * 4) != key.Length))
+            int keyLen = key.Length;
+            if (keyLen < 16 || keyLen > 32 || (keyLen & 7) != 0)
                 throw new ArgumentException("Key length not 128/192/256 bits.");
 
-            ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
+            int KC = keyLen >> 2;
+            this.ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
 
             uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block
             for (int i = 0; i <= ROUNDS; ++i)
@@ -641,32 +639,109 @@ namespace Org.BouncyCastle.Crypto.Engines
                 W[i] = new uint[4];
             }
 
-            //
-            // copy the key into the round key array
-            //
-
-            int t = 0;
-            for (int i = 0; i < key.Length; t++)
+            switch (KC)
+            {
+            case 4:
             {
-                W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i);
-                i+=4;
+                uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+
+                for (int i = 1; i <= 10; ++i)
+                {
+                    uint u = SubWord(Shift(t3, 8)) ^ rcon[i - 1];
+                    t0 ^= u;  W[i][0] = t0;
+                    t1 ^= t0; W[i][1] = t1;
+                    t2 ^= t1; W[i][2] = t2;
+                    t3 ^= t2; W[i][3] = t3;
+                }
+
+                break;
             }
+            case 6:
+            {
+                uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+                uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4;
+                uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5;
+
+                uint rcon = 1;
+                uint u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                t0 ^= u;  W[1][2] = t0;
+                t1 ^= t0; W[1][3] = t1;
+                t2 ^= t1; W[2][0] = t2;
+                t3 ^= t2; W[2][1] = t3;
+                t4 ^= t3; W[2][2] = t4;
+                t5 ^= t4; W[2][3] = t5;
+
+                for (int i = 3; i < 12; i += 3)
+                {
+                    u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                    t0 ^= u;  W[i    ][0] = t0;
+                    t1 ^= t0; W[i    ][1] = t1;
+                    t2 ^= t1; W[i    ][2] = t2;
+                    t3 ^= t2; W[i    ][3] = t3;
+                    t4 ^= t3; W[i + 1][0] = t4;
+                    t5 ^= t4; W[i + 1][1] = t5;
+                    u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                    t0 ^= u;  W[i + 1][2] = t0;
+                    t1 ^= t0; W[i + 1][3] = t1;
+                    t2 ^= t1; W[i + 2][0] = t2;
+                    t3 ^= t2; W[i + 2][1] = t3;
+                    t4 ^= t3; W[i + 2][2] = t4;
+                    t5 ^= t4; W[i + 2][3] = t5;
+                }
+
+                u = SubWord(Shift(t5, 8)) ^ rcon;
+                t0 ^= u;  W[12][0] = t0;
+                t1 ^= t0; W[12][1] = t1;
+                t2 ^= t1; W[12][2] = t2;
+                t3 ^= t2; W[12][3] = t3;
 
-            //
-            // while not enough round key material calculated
-            // calculate new values
-            //
-            int k = (ROUNDS + 1) << 2;
-            for (int i = KC; (i < k); i++)
+                break;
+            }
+            case 8:
             {
-                uint temp = W[(i-1)>>2][(i-1)&3];
-                if ((i % KC) == 0) {
-                    temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1];
-                } else if ((KC > 6) && ((i % KC) == 4)) {
-                    temp = SubWord(temp);
+                uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+                uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4;
+                uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5;
+                uint t6 = Pack.LE_To_UInt32(key, 24); W[1][2] = t6;
+                uint t7 = Pack.LE_To_UInt32(key, 28); W[1][3] = t7;
+
+                uint u, rcon = 1;
+
+                for (int i = 2; i < 14; i += 2)
+                {
+                    u = SubWord(Shift(t7, 8)) ^ rcon; rcon <<= 1;
+                    t0 ^= u;  W[i    ][0] = t0;
+                    t1 ^= t0; W[i    ][1] = t1;
+                    t2 ^= t1; W[i    ][2] = t2;
+                    t3 ^= t2; W[i    ][3] = t3;
+                    u = SubWord(t3);
+                    t4 ^= u;  W[i + 1][0] = t4;
+                    t5 ^= t4; W[i + 1][1] = t5;
+                    t6 ^= t5; W[i + 1][2] = t6;
+                    t7 ^= t6; W[i + 1][3] = t7;
                 }
 
-                W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp;
+                u = SubWord(Shift(t7, 8)) ^ rcon;
+                t0 ^= u;  W[14][0] = t0;
+                t1 ^= t0; W[14][1] = t1;
+                t2 ^= t1; W[14][2] = t2;
+                t3 ^= t2; W[14][3] = t3;
+
+                break;
+            }
+            default:
+            {
+                throw new InvalidOperationException("Should never get here");
+            }
             }
 
             if (!forEncryption)
diff --git a/crypto/src/crypto/engines/AesLightEngine.cs b/crypto/src/crypto/engines/AesLightEngine.cs
index a42b34971..a48fa5857 100644
--- a/crypto/src/crypto/engines/AesLightEngine.cs
+++ b/crypto/src/crypto/engines/AesLightEngine.cs
@@ -185,17 +185,14 @@ namespace Org.BouncyCastle.Crypto.Engines
         * AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits
         * This code is written assuming those are the only possible values
         */
-        private uint[][] GenerateWorkingKey(
-            byte[]	key,
-            bool	forEncryption)
+        private uint[][] GenerateWorkingKey(byte[] key, bool forEncryption)
         {
-            int KC = key.Length / 4;  // key length in words
-            int t;
-
-            if ((KC != 4) && (KC != 6) && (KC != 8))
+            int keyLen = key.Length;
+            if (keyLen < 16 || keyLen > 32 || (keyLen & 7) != 0)
                 throw new ArgumentException("Key length not 128/192/256 bits.");
 
-            ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
+            int KC = keyLen >> 2;
+            this.ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
 
             uint[][] W = new uint[ROUNDS + 1][]; // 4 words in a block
             for (int i = 0; i <= ROUNDS; ++i)
@@ -203,40 +200,114 @@ namespace Org.BouncyCastle.Crypto.Engines
                 W[i] = new uint[4];
             }
 
-            //
-            // copy the key into the round key array
-            //
-
-            t = 0;
-            for (int i = 0; i < key.Length; t++)
-            {
-                W[t >> 2][t & 3] = Pack.LE_To_UInt32(key, i);
-                i+=4;
-            }
-
-            //
-            // while not enough round key material calculated
-            // calculate new values
-            //
-            int k = (ROUNDS + 1) << 2;
-            for (int i = KC; (i < k); i++)
+            switch (KC)
             {
-                uint temp = W[(i-1)>>2][(i-1)&3];
-                if ((i % KC) == 0) 
+                case 4:
                 {
-                    temp = SubWord(Shift(temp, 8)) ^ rcon[(i / KC)-1];
-                } 
-                else if ((KC > 6) && ((i % KC) == 4)) 
+                    uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                    uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                    uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                    uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+
+                    for (int i = 1; i <= 10; ++i)
+                    {
+                        uint u = SubWord(Shift(t3, 8)) ^ rcon[i - 1];
+                        t0 ^= u;  W[i][0] = t0;
+                        t1 ^= t0; W[i][1] = t1;
+                        t2 ^= t1; W[i][2] = t2;
+                        t3 ^= t2; W[i][3] = t3;
+                    }
+
+                    break;
+                }
+                case 6:
                 {
-                    temp = SubWord(temp);
+                    uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                    uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                    uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                    uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+                    uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4;
+                    uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5;
+
+                    uint rcon = 1;
+                    uint u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                    t0 ^= u;  W[1][2] = t0;
+                    t1 ^= t0; W[1][3] = t1;
+                    t2 ^= t1; W[2][0] = t2;
+                    t3 ^= t2; W[2][1] = t3;
+                    t4 ^= t3; W[2][2] = t4;
+                    t5 ^= t4; W[2][3] = t5;
+
+                    for (int i = 3; i < 12; i += 3)
+                    {
+                        u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                        t0 ^= u;  W[i    ][0] = t0;
+                        t1 ^= t0; W[i    ][1] = t1;
+                        t2 ^= t1; W[i    ][2] = t2;
+                        t3 ^= t2; W[i    ][3] = t3;
+                        t4 ^= t3; W[i + 1][0] = t4;
+                        t5 ^= t4; W[i + 1][1] = t5;
+                        u = SubWord(Shift(t5, 8)) ^ rcon; rcon <<= 1;
+                        t0 ^= u;  W[i + 1][2] = t0;
+                        t1 ^= t0; W[i + 1][3] = t1;
+                        t2 ^= t1; W[i + 2][0] = t2;
+                        t3 ^= t2; W[i + 2][1] = t3;
+                        t4 ^= t3; W[i + 2][2] = t4;
+                        t5 ^= t4; W[i + 2][3] = t5;
+                    }
+
+                    u = SubWord(Shift(t5, 8)) ^ rcon;
+                    t0 ^= u;  W[12][0] = t0;
+                    t1 ^= t0; W[12][1] = t1;
+                    t2 ^= t1; W[12][2] = t2;
+                    t3 ^= t2; W[12][3] = t3;
+
+                    break;
                 }
+                case 8:
+                {
+                    uint t0 = Pack.LE_To_UInt32(key,  0); W[0][0] = t0;
+                    uint t1 = Pack.LE_To_UInt32(key,  4); W[0][1] = t1;
+                    uint t2 = Pack.LE_To_UInt32(key,  8); W[0][2] = t2;
+                    uint t3 = Pack.LE_To_UInt32(key, 12); W[0][3] = t3;
+                    uint t4 = Pack.LE_To_UInt32(key, 16); W[1][0] = t4;
+                    uint t5 = Pack.LE_To_UInt32(key, 20); W[1][1] = t5;
+                    uint t6 = Pack.LE_To_UInt32(key, 24); W[1][2] = t6;
+                    uint t7 = Pack.LE_To_UInt32(key, 28); W[1][3] = t7;
+
+                    uint u, rcon = 1;
+
+                    for (int i = 2; i < 14; i += 2)
+                    {
+                        u = SubWord(Shift(t7, 8)) ^ rcon; rcon <<= 1;
+                        t0 ^= u;  W[i    ][0] = t0;
+                        t1 ^= t0; W[i    ][1] = t1;
+                        t2 ^= t1; W[i    ][2] = t2;
+                        t3 ^= t2; W[i    ][3] = t3;
+                        u = SubWord(t3);
+                        t4 ^= u;  W[i + 1][0] = t4;
+                        t5 ^= t4; W[i + 1][1] = t5;
+                        t6 ^= t5; W[i + 1][2] = t6;
+                        t7 ^= t6; W[i + 1][3] = t7;
+                    }
+
+                    u = SubWord(Shift(t7, 8)) ^ rcon;
+                    t0 ^= u;  W[14][0] = t0;
+                    t1 ^= t0; W[14][1] = t1;
+                    t2 ^= t1; W[14][2] = t2;
+                    t3 ^= t2; W[14][3] = t3;
 
-                W[i>>2][i&3] = W[(i - KC)>>2][(i-KC)&3] ^ temp;
+                    break;
+                }
+                default:
+                {
+                    throw new InvalidOperationException("Should never get here");
+                }
             }
 
-            if (!forEncryption) 
+            if (!forEncryption)
             {
-                for (int j = 1; j < ROUNDS; j++) 
+                for (int j = 1; j < ROUNDS; j++)
                 {
                     uint[] w = W[j];
                     for (int i = 0; i < 4; i++)