summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2021-07-21 18:35:57 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2021-07-21 18:35:57 +0700
commita2a91fceff2e3c01b9e5ae341908e2052cade376 (patch)
tree7dcc4aba08c0aa933d336e18e6682463aa99e3b2
parentSimplify HandleClose (diff)
downloadBouncyCastle.NET-ed25519-a2a91fceff2e3c01b9e5ae341908e2052cade376.tar.xz
AES perf. opt.
-rw-r--r--crypto/src/crypto/engines/AesEngine.cs89
-rw-r--r--crypto/src/crypto/engines/AesFastEngine.cs89
-rw-r--r--crypto/src/crypto/engines/AesLightEngine.cs89
3 files changed, 120 insertions, 147 deletions
diff --git a/crypto/src/crypto/engines/AesEngine.cs b/crypto/src/crypto/engines/AesEngine.cs
index 91bdf69ef..10c720968 100644
--- a/crypto/src/crypto/engines/AesEngine.cs
+++ b/crypto/src/crypto/engines/AesEngine.cs
@@ -426,7 +426,6 @@ namespace Org.BouncyCastle.Crypto.Engines
 
         private int ROUNDS;
         private uint[][] WorkingKey;
-        private uint C0, C1, C2, C3;
         private bool forEncryption;
 
         private byte[] s;
@@ -479,11 +478,7 @@ namespace Org.BouncyCastle.Crypto.Engines
             return BLOCK_SIZE;
         }
 
-        public virtual int ProcessBlock(
-            byte[]	input,
-            int		inOff,
-            byte[]	output,
-            int		outOff)
+        public virtual int ProcessBlock(byte[] input, int inOff, byte[] output, int outOff)
         {
             if (WorkingKey == null)
                 throw new InvalidOperationException("AES engine not initialised");
@@ -491,19 +486,15 @@ namespace Org.BouncyCastle.Crypto.Engines
             Check.DataLength(input, inOff, 16, "input buffer too short");
             Check.OutputLength(output, outOff, 16, "output buffer too short");
 
-            UnPackBlock(input, inOff);
-
             if (forEncryption)
             {
-                EncryptBlock(WorkingKey);
+                EncryptBlock(input, inOff, output, outOff, WorkingKey);
             }
             else
             {
-                DecryptBlock(WorkingKey);
+                DecryptBlock(input, inOff, output, outOff, WorkingKey);
             }
 
-            PackBlock(output, outOff);
-
             return BLOCK_SIZE;
         }
 
@@ -511,34 +502,19 @@ namespace Org.BouncyCastle.Crypto.Engines
         {
         }
 
-        private void UnPackBlock(
-            byte[]	bytes,
-            int		off)
-        {
-            C0 = Pack.LE_To_UInt32(bytes, off);
-            C1 = Pack.LE_To_UInt32(bytes, off + 4);
-            C2 = Pack.LE_To_UInt32(bytes, off + 8);
-            C3 = Pack.LE_To_UInt32(bytes, off + 12);
-        }
-
-        private void PackBlock(
-            byte[]	bytes,
-            int		off)
+        private void EncryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW)
         {
-            Pack.UInt32_To_LE(C0, bytes, off);
-            Pack.UInt32_To_LE(C1, bytes, off + 4);
-            Pack.UInt32_To_LE(C2, bytes, off + 8);
-            Pack.UInt32_To_LE(C3, bytes, off + 12);
-        }
+            uint C0 = Pack.LE_To_UInt32(input, inOff +  0);
+            uint C1 = Pack.LE_To_UInt32(input, inOff +  4);
+            uint C2 = Pack.LE_To_UInt32(input, inOff +  8);
+            uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
 
-        private void EncryptBlock(uint[][] KW)
-        {
             uint[] kw = KW[0];
-            uint t0 = this.C0 ^ kw[0];
-            uint t1 = this.C1 ^ kw[1];
-            uint t2 = this.C2 ^ kw[2];
+            uint t0 = C0 ^ kw[0];
+            uint t1 = C1 ^ kw[1];
+            uint t2 = C2 ^ kw[2];
 
-            uint r0, r1, r2, r3 = this.C3 ^ kw[3];
+            uint r0, r1, r2, r3 = C3 ^ kw[3];
             int r = 1;
             while (r < ROUNDS - 1)
             {
@@ -563,20 +539,30 @@ namespace Org.BouncyCastle.Crypto.Engines
             // the final round's table is a simple function of S so we don't use a whole other four tables for it
 
             kw = KW[r];
-            this.C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)s[(r2 >> 16) & 255]) << 16) ^ (((uint)s[(r3 >> 24) & 255]) << 24) ^ kw[0];
-            this.C1 = (uint)s[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)s[(r0 >> 24) & 255]) << 24) ^ kw[1];
-            this.C2 = (uint)s[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[(r1 >> 24) & 255]) << 24) ^ kw[2];
-            this.C3 = (uint)s[r3 & 255] ^ (((uint)s[(r0 >> 8) & 255]) << 8) ^ (((uint)s[(r1 >> 16) & 255]) << 16) ^ (((uint)S[(r2 >> 24) & 255]) << 24) ^ kw[3];
+            C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)s[(r2 >> 16) & 255]) << 16) ^ (((uint)s[(r3 >> 24) & 255]) << 24) ^ kw[0];
+            C1 = (uint)s[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)s[(r0 >> 24) & 255]) << 24) ^ kw[1];
+            C2 = (uint)s[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[(r1 >> 24) & 255]) << 24) ^ kw[2];
+            C3 = (uint)s[r3 & 255] ^ (((uint)s[(r0 >> 8) & 255]) << 8) ^ (((uint)s[(r1 >> 16) & 255]) << 16) ^ (((uint)S[(r2 >> 24) & 255]) << 24) ^ kw[3];
+
+            Pack.UInt32_To_LE(C0, output, outOff +  0);
+            Pack.UInt32_To_LE(C1, output, outOff +  4);
+            Pack.UInt32_To_LE(C2, output, outOff +  8);
+            Pack.UInt32_To_LE(C3, output, outOff + 12);
         }
 
-        private void DecryptBlock(uint[][] KW)
+        private void DecryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW)
         {
+            uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
+            uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
+            uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
+            uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
+
             uint[] kw = KW[ROUNDS];
-            uint t0 = this.C0 ^ kw[0];
-            uint t1 = this.C1 ^ kw[1];
-            uint t2 = this.C2 ^ kw[2];
+            uint t0 = C0 ^ kw[0];
+            uint t1 = C1 ^ kw[1];
+            uint t2 = C2 ^ kw[2];
 
-            uint r0, r1, r2, r3 = this.C3 ^ kw[3];
+            uint r0, r1, r2, r3 = C3 ^ kw[3];
             int r = ROUNDS - 1;
             while (r > 1)
             {
@@ -601,10 +587,15 @@ namespace Org.BouncyCastle.Crypto.Engines
             // the final round's table is a simple function of Si so we don't use a whole other four tables for it
 
             kw = KW[0];
-            this.C0 = (uint)Si[r0 & 255] ^ (((uint)s[(r3 >> 8) & 255]) << 8) ^ (((uint)s[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[(r1 >> 24) & 255]) << 24) ^ kw[0];
-            this.C1 = (uint)s[r1 & 255] ^ (((uint)s[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)s[(r2 >> 24) & 255]) << 24) ^ kw[1];
-            this.C2 = (uint)s[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)s[(r3 >> 24) & 255]) << 24) ^ kw[2];
-            this.C3 = (uint)Si[r3 & 255] ^ (((uint)s[(r2 >> 8) & 255]) << 8) ^ (((uint)s[(r1 >> 16) & 255]) << 16) ^ (((uint)s[(r0 >> 24) & 255]) << 24) ^ kw[3];
+            C0 = (uint)Si[r0 & 255] ^ (((uint)s[(r3 >> 8) & 255]) << 8) ^ (((uint)s[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[(r1 >> 24) & 255]) << 24) ^ kw[0];
+            C1 = (uint)s[r1 & 255] ^ (((uint)s[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)s[(r2 >> 24) & 255]) << 24) ^ kw[1];
+            C2 = (uint)s[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)s[(r3 >> 24) & 255]) << 24) ^ kw[2];
+            C3 = (uint)Si[r3 & 255] ^ (((uint)s[(r2 >> 8) & 255]) << 8) ^ (((uint)s[(r1 >> 16) & 255]) << 16) ^ (((uint)s[(r0 >> 24) & 255]) << 24) ^ kw[3];
+
+            Pack.UInt32_To_LE(C0, output, outOff + 0);
+            Pack.UInt32_To_LE(C1, output, outOff + 4);
+            Pack.UInt32_To_LE(C2, output, outOff + 8);
+            Pack.UInt32_To_LE(C3, output, outOff + 12);
         }
     }
 }
diff --git a/crypto/src/crypto/engines/AesFastEngine.cs b/crypto/src/crypto/engines/AesFastEngine.cs
index 9d3a86fd2..32e1795c3 100644
--- a/crypto/src/crypto/engines/AesFastEngine.cs
+++ b/crypto/src/crypto/engines/AesFastEngine.cs
@@ -767,7 +767,6 @@ namespace Org.BouncyCastle.Crypto.Engines
 
         private int ROUNDS;
         private uint[][] WorkingKey;
-        private uint C0, C1, C2, C3;
         private bool forEncryption;
 
         private const int BLOCK_SIZE = 16;
@@ -817,11 +816,7 @@ namespace Org.BouncyCastle.Crypto.Engines
             return BLOCK_SIZE;
         }
 
-        public virtual int ProcessBlock(
-            byte[] input,
-            int inOff,
-            byte[] output,
-            int outOff)
+        public virtual int ProcessBlock(byte[] input, int inOff, byte[] output, int outOff)
         {
             if (WorkingKey == null)
                 throw new InvalidOperationException("AES engine not initialised");
@@ -829,19 +824,15 @@ namespace Org.BouncyCastle.Crypto.Engines
             Check.DataLength(input, inOff, 16, "input buffer too short");
             Check.OutputLength(output, outOff, 16, "output buffer too short");
 
-            UnPackBlock(input, inOff);
-
             if (forEncryption)
             {
-                EncryptBlock(WorkingKey);
+                EncryptBlock(input, inOff, output, outOff, WorkingKey);
             }
             else
             {
-                DecryptBlock(WorkingKey);
+                DecryptBlock(input, inOff, output, outOff, WorkingKey);
             }
 
-            PackBlock(output, outOff);
-
             return BLOCK_SIZE;
         }
 
@@ -849,34 +840,19 @@ namespace Org.BouncyCastle.Crypto.Engines
         {
         }
 
-        private void UnPackBlock(
-            byte[]	bytes,
-            int		off)
-        {
-            C0 = Pack.LE_To_UInt32(bytes, off);
-            C1 = Pack.LE_To_UInt32(bytes, off + 4);
-            C2 = Pack.LE_To_UInt32(bytes, off + 8);
-            C3 = Pack.LE_To_UInt32(bytes, off + 12);
-        }
-
-        private void PackBlock(
-            byte[]	bytes,
-            int		off)
+        private void EncryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW)
         {
-            Pack.UInt32_To_LE(C0, bytes, off);
-            Pack.UInt32_To_LE(C1, bytes, off + 4);
-            Pack.UInt32_To_LE(C2, bytes, off + 8);
-            Pack.UInt32_To_LE(C3, bytes, off + 12);
-        }
+            uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
+            uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
+            uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
+            uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
 
-        private void EncryptBlock(uint[][] KW)
-        {
             uint[] kw = KW[0];
-            uint t0 = this.C0 ^ kw[0];
-            uint t1 = this.C1 ^ kw[1];
-            uint t2 = this.C2 ^ kw[2];
+            uint t0 = C0 ^ kw[0];
+            uint t1 = C1 ^ kw[1];
+            uint t2 = C2 ^ kw[2];
 
-            uint r0, r1, r2, r3 = this.C3 ^ kw[3];
+            uint r0, r1, r2, r3 = C3 ^ kw[3];
             int r = 1;
             while (r < ROUNDS - 1)
             {
@@ -901,20 +877,30 @@ namespace Org.BouncyCastle.Crypto.Engines
             // the final round's table is a simple function of S so we don't use a whole other four tables for it
 
             kw = KW[r];
-            this.C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)S[(r2 >> 16) & 255]) << 16) ^ (((uint)S[r3 >> 24]) << 24) ^ kw[0];
-            this.C1 = (uint)S[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)S[r0 >> 24]) << 24) ^ kw[1];
-            this.C2 = (uint)S[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[r1 >> 24]) << 24) ^ kw[2];
-            this.C3 = (uint)S[r3 & 255] ^ (((uint)S[(r0 >> 8) & 255]) << 8) ^ (((uint)S[(r1 >> 16) & 255]) << 16) ^ (((uint)S[r2 >> 24]) << 24) ^ kw[3];
+            C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)S[(r2 >> 16) & 255]) << 16) ^ (((uint)S[r3 >> 24]) << 24) ^ kw[0];
+            C1 = (uint)S[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)S[r0 >> 24]) << 24) ^ kw[1];
+            C2 = (uint)S[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[r1 >> 24]) << 24) ^ kw[2];
+            C3 = (uint)S[r3 & 255] ^ (((uint)S[(r0 >> 8) & 255]) << 8) ^ (((uint)S[(r1 >> 16) & 255]) << 16) ^ (((uint)S[r2 >> 24]) << 24) ^ kw[3];
+
+            Pack.UInt32_To_LE(C0, output, outOff + 0);
+            Pack.UInt32_To_LE(C1, output, outOff + 4);
+            Pack.UInt32_To_LE(C2, output, outOff + 8);
+            Pack.UInt32_To_LE(C3, output, outOff + 12);
         }
 
-        private void DecryptBlock(uint[][] KW)
+        private void DecryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW)
         {
+            uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
+            uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
+            uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
+            uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
+
             uint[] kw = KW[ROUNDS];
-            uint t0 = this.C0 ^ kw[0];
-            uint t1 = this.C1 ^ kw[1];
-            uint t2 = this.C2 ^ kw[2];
+            uint t0 = C0 ^ kw[0];
+            uint t1 = C1 ^ kw[1];
+            uint t2 = C2 ^ kw[2];
 
-            uint r0, r1, r2, r3 = this.C3 ^ kw[3];
+            uint r0, r1, r2, r3 = C3 ^ kw[3];
             int r = ROUNDS - 1;
             while (r > 1)
             {
@@ -939,10 +925,15 @@ namespace Org.BouncyCastle.Crypto.Engines
             // the final round's table is a simple function of Si so we don't use a whole other four tables for it
 
             kw = KW[0];
-            this.C0 = (uint)Si[r0 & 255] ^ (((uint)Si[(r3 >> 8) & 255]) << 8) ^ (((uint)Si[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[r1 >> 24]) << 24) ^ kw[0];
-            this.C1 = (uint)Si[r1 & 255] ^ (((uint)Si[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)Si[r2 >> 24]) << 24) ^ kw[1];
-            this.C2 = (uint)Si[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)Si[r3 >> 24]) << 24) ^ kw[2];
-            this.C3 = (uint)Si[r3 & 255] ^ (((uint)Si[(r2 >> 8) & 255]) << 8) ^ (((uint)Si[(r1 >> 16) & 255]) << 16) ^ (((uint)Si[r0 >> 24]) << 24) ^ kw[3];
+            C0 = (uint)Si[r0 & 255] ^ (((uint)Si[(r3 >> 8) & 255]) << 8) ^ (((uint)Si[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[r1 >> 24]) << 24) ^ kw[0];
+            C1 = (uint)Si[r1 & 255] ^ (((uint)Si[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)Si[r2 >> 24]) << 24) ^ kw[1];
+            C2 = (uint)Si[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)Si[r3 >> 24]) << 24) ^ kw[2];
+            C3 = (uint)Si[r3 & 255] ^ (((uint)Si[(r2 >> 8) & 255]) << 8) ^ (((uint)Si[(r1 >> 16) & 255]) << 16) ^ (((uint)Si[r0 >> 24]) << 24) ^ kw[3];
+
+            Pack.UInt32_To_LE(C0, output, outOff + 0);
+            Pack.UInt32_To_LE(C1, output, outOff + 4);
+            Pack.UInt32_To_LE(C2, output, outOff + 8);
+            Pack.UInt32_To_LE(C3, output, outOff + 12);
         }
     }
 }
diff --git a/crypto/src/crypto/engines/AesLightEngine.cs b/crypto/src/crypto/engines/AesLightEngine.cs
index 9cc9c34a0..8d5a98a9f 100644
--- a/crypto/src/crypto/engines/AesLightEngine.cs
+++ b/crypto/src/crypto/engines/AesLightEngine.cs
@@ -323,7 +323,6 @@ namespace Org.BouncyCastle.Crypto.Engines
 
         private int ROUNDS;
         private uint[][] WorkingKey;
-        private uint C0, C1, C2, C3;
         private bool forEncryption;
 
         private const int BLOCK_SIZE = 16;
@@ -373,11 +372,7 @@ namespace Org.BouncyCastle.Crypto.Engines
             return BLOCK_SIZE;
         }
 
-        public virtual int ProcessBlock(
-            byte[]	input,
-            int		inOff,
-            byte[]	output,
-            int		outOff)
+        public virtual int ProcessBlock(byte[] input, int inOff, byte[] output, int outOff)
         {
             if (WorkingKey == null)
                 throw new InvalidOperationException("AES engine not initialised");
@@ -385,19 +380,15 @@ namespace Org.BouncyCastle.Crypto.Engines
             Check.DataLength(input, inOff, 16, "input buffer too short");
             Check.OutputLength(output, outOff, 16, "output buffer too short");
 
-            UnPackBlock(input, inOff);
-
             if (forEncryption)
             {
-                EncryptBlock(WorkingKey);
+                EncryptBlock(input, inOff, output, outOff, WorkingKey);
             }
             else
             {
-                DecryptBlock(WorkingKey);
+                DecryptBlock(input, inOff, output, outOff, WorkingKey);
             }
 
-            PackBlock(output, outOff);
-
             return BLOCK_SIZE;
         }
 
@@ -405,34 +396,19 @@ namespace Org.BouncyCastle.Crypto.Engines
         {
         }
 
-        private void UnPackBlock(
-            byte[]	bytes,
-            int		off)
-        {
-            C0 = Pack.LE_To_UInt32(bytes, off);
-            C1 = Pack.LE_To_UInt32(bytes, off + 4);
-            C2 = Pack.LE_To_UInt32(bytes, off + 8);
-            C3 = Pack.LE_To_UInt32(bytes, off + 12);
-        }
-
-        private void PackBlock(
-            byte[]	bytes,
-            int		off)
+        private void EncryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW)
         {
-            Pack.UInt32_To_LE(C0, bytes, off);
-            Pack.UInt32_To_LE(C1, bytes, off + 4);
-            Pack.UInt32_To_LE(C2, bytes, off + 8);
-            Pack.UInt32_To_LE(C3, bytes, off + 12);
-        }
+            uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
+            uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
+            uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
+            uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
 
-        private void EncryptBlock(uint[][] KW)
-        {
             uint[] kw = KW[0];
-            uint t0 = this.C0 ^ kw[0];
-            uint t1 = this.C1 ^ kw[1];
-            uint t2 = this.C2 ^ kw[2];
+            uint t0 = C0 ^ kw[0];
+            uint t1 = C1 ^ kw[1];
+            uint t2 = C2 ^ kw[2];
 
-            uint r0, r1, r2, r3 = this.C3 ^ kw[3];
+            uint r0, r1, r2, r3 = C3 ^ kw[3];
             int r = 1;
             while (r < ROUNDS - 1)
             {
@@ -457,20 +433,30 @@ namespace Org.BouncyCastle.Crypto.Engines
             // the final round is a simple function of S
 
             kw = KW[r];
-            this.C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)S[(r2 >> 16) & 255]) << 16) ^ (((uint)S[(r3 >> 24) & 255]) << 24) ^ kw[0];
-            this.C1 = (uint)S[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)S[(r0 >> 24) & 255]) << 24) ^ kw[1];
-            this.C2 = (uint)S[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[(r1 >> 24) & 255]) << 24) ^ kw[2];
-            this.C3 = (uint)S[r3 & 255] ^ (((uint)S[(r0 >> 8) & 255]) << 8) ^ (((uint)S[(r1 >> 16) & 255]) << 16) ^ (((uint)S[(r2 >> 24) & 255]) << 24) ^ kw[3];
+            C0 = (uint)S[r0 & 255] ^ (((uint)S[(r1 >> 8) & 255]) << 8) ^ (((uint)S[(r2 >> 16) & 255]) << 16) ^ (((uint)S[(r3 >> 24) & 255]) << 24) ^ kw[0];
+            C1 = (uint)S[r1 & 255] ^ (((uint)S[(r2 >> 8) & 255]) << 8) ^ (((uint)S[(r3 >> 16) & 255]) << 16) ^ (((uint)S[(r0 >> 24) & 255]) << 24) ^ kw[1];
+            C2 = (uint)S[r2 & 255] ^ (((uint)S[(r3 >> 8) & 255]) << 8) ^ (((uint)S[(r0 >> 16) & 255]) << 16) ^ (((uint)S[(r1 >> 24) & 255]) << 24) ^ kw[2];
+            C3 = (uint)S[r3 & 255] ^ (((uint)S[(r0 >> 8) & 255]) << 8) ^ (((uint)S[(r1 >> 16) & 255]) << 16) ^ (((uint)S[(r2 >> 24) & 255]) << 24) ^ kw[3];
+
+            Pack.UInt32_To_LE(C0, output, outOff + 0);
+            Pack.UInt32_To_LE(C1, output, outOff + 4);
+            Pack.UInt32_To_LE(C2, output, outOff + 8);
+            Pack.UInt32_To_LE(C3, output, outOff + 12);
         }
 
-        private void DecryptBlock(uint[][] KW)
+        private void DecryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW)
         {
+            uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
+            uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
+            uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
+            uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
+
             uint[] kw = KW[ROUNDS];
-            uint t0 = this.C0 ^ kw[0];
-            uint t1 = this.C1 ^ kw[1];
-            uint t2 = this.C2 ^ kw[2];
+            uint t0 = C0 ^ kw[0];
+            uint t1 = C1 ^ kw[1];
+            uint t2 = C2 ^ kw[2];
 
-            uint r0, r1, r2, r3 = this.C3 ^ kw[3];
+            uint r0, r1, r2, r3 = C3 ^ kw[3];
             int r = ROUNDS - 1;
             while (r > 1)
             {
@@ -495,10 +481,15 @@ namespace Org.BouncyCastle.Crypto.Engines
             // the final round's table is a simple function of Si
 
             kw = KW[0];
-            this.C0 = (uint)Si[r0 & 255] ^ (((uint)Si[(r3 >> 8) & 255]) << 8) ^ (((uint)Si[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[(r1 >> 24) & 255]) << 24) ^ kw[0];
-            this.C1 = (uint)Si[r1 & 255] ^ (((uint)Si[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)Si[(r2 >> 24) & 255]) << 24) ^ kw[1];
-            this.C2 = (uint)Si[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)Si[(r3 >> 24) & 255]) << 24) ^ kw[2];
-            this.C3 = (uint)Si[r3 & 255] ^ (((uint)Si[(r2 >> 8) & 255]) << 8) ^ (((uint)Si[(r1 >> 16) & 255]) << 16) ^ (((uint)Si[(r0 >> 24) & 255]) << 24) ^ kw[3];
+            C0 = (uint)Si[r0 & 255] ^ (((uint)Si[(r3 >> 8) & 255]) << 8) ^ (((uint)Si[(r2 >> 16) & 255]) << 16) ^ (((uint)Si[(r1 >> 24) & 255]) << 24) ^ kw[0];
+            C1 = (uint)Si[r1 & 255] ^ (((uint)Si[(r0 >> 8) & 255]) << 8) ^ (((uint)Si[(r3 >> 16) & 255]) << 16) ^ (((uint)Si[(r2 >> 24) & 255]) << 24) ^ kw[1];
+            C2 = (uint)Si[r2 & 255] ^ (((uint)Si[(r1 >> 8) & 255]) << 8) ^ (((uint)Si[(r0 >> 16) & 255]) << 16) ^ (((uint)Si[(r3 >> 24) & 255]) << 24) ^ kw[2];
+            C3 = (uint)Si[r3 & 255] ^ (((uint)Si[(r2 >> 8) & 255]) << 8) ^ (((uint)Si[(r1 >> 16) & 255]) << 16) ^ (((uint)Si[(r0 >> 24) & 255]) << 24) ^ kw[3];
+
+            Pack.UInt32_To_LE(C0, output, outOff + 0);
+            Pack.UInt32_To_LE(C1, output, outOff + 4);
+            Pack.UInt32_To_LE(C2, output, outOff + 8);
+            Pack.UInt32_To_LE(C3, output, outOff + 12);
         }
     }
 }