summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2020-10-18 23:51:27 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2020-10-18 23:51:27 +0700
commit3661f953ec2adc89301a9846540cc995045d6637 (patch)
treebef58eed83f48b6dc9cee40bd51c42ce3e8545e1
parentAdd Bits and Longs classes from bc-java (diff)
downloadBouncyCastle.NET-ed25519-3661f953ec2adc89301a9846540cc995045d6637.tar.xz
GCM updates from bc-java
-rw-r--r--crypto/BouncyCastle.Android.csproj1
-rw-r--r--crypto/BouncyCastle.csproj1
-rw-r--r--crypto/BouncyCastle.iOS.csproj1
-rw-r--r--crypto/crypto.csproj5
-rw-r--r--crypto/src/crypto/modes/GCMBlockCipher.cs5
-rw-r--r--crypto/src/crypto/modes/gcm/BasicGcmExponentiator.cs10
-rw-r--r--crypto/src/crypto/modes/gcm/BasicGcmMultiplier.cs6
-rw-r--r--crypto/src/crypto/modes/gcm/GcmUtilities.cs425
-rw-r--r--crypto/src/crypto/modes/gcm/Tables1kGcmExponentiator.cs12
-rw-r--r--crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs70
-rw-r--r--crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs78
-rw-r--r--crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs98
-rw-r--r--crypto/src/util/Arrays.cs51
-rw-r--r--crypto/test/src/crypto/test/GCMTest.cs4
-rw-r--r--crypto/test/src/crypto/test/GcmReorderTest.cs3
15 files changed, 515 insertions, 255 deletions
diff --git a/crypto/BouncyCastle.Android.csproj b/crypto/BouncyCastle.Android.csproj
index fe25eec6b..7b40965f7 100644
--- a/crypto/BouncyCastle.Android.csproj
+++ b/crypto/BouncyCastle.Android.csproj
@@ -912,6 +912,7 @@
     <Compile Include="src\crypto\modes\gcm\IGcmExponentiator.cs" />
     <Compile Include="src\crypto\modes\gcm\IGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables1kGcmExponentiator.cs" />
+    <Compile Include="src\crypto\modes\gcm\Tables4kGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables64kGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables8kGcmMultiplier.cs" />
     <Compile Include="src\crypto\operators\Asn1CipherBuilder.cs" />
diff --git a/crypto/BouncyCastle.csproj b/crypto/BouncyCastle.csproj
index 315f6cd46..099c57d87 100644
--- a/crypto/BouncyCastle.csproj
+++ b/crypto/BouncyCastle.csproj
@@ -906,6 +906,7 @@
     <Compile Include="src\crypto\modes\gcm\IGcmExponentiator.cs" />
     <Compile Include="src\crypto\modes\gcm\IGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables1kGcmExponentiator.cs" />
+    <Compile Include="src\crypto\modes\gcm\Tables4kGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables64kGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables8kGcmMultiplier.cs" />
     <Compile Include="src\crypto\operators\Asn1CipherBuilder.cs" />
diff --git a/crypto/BouncyCastle.iOS.csproj b/crypto/BouncyCastle.iOS.csproj
index b8d43cb27..95d9a0a14 100644
--- a/crypto/BouncyCastle.iOS.csproj
+++ b/crypto/BouncyCastle.iOS.csproj
@@ -907,6 +907,7 @@
     <Compile Include="src\crypto\modes\gcm\IGcmExponentiator.cs" />
     <Compile Include="src\crypto\modes\gcm\IGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables1kGcmExponentiator.cs" />
+    <Compile Include="src\crypto\modes\gcm\Tables4kGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables64kGcmMultiplier.cs" />
     <Compile Include="src\crypto\modes\gcm\Tables8kGcmMultiplier.cs" />
     <Compile Include="src\crypto\operators\Asn1CipherBuilder.cs" />
diff --git a/crypto/crypto.csproj b/crypto/crypto.csproj
index 797c6ae9e..f4f656036 100644
--- a/crypto/crypto.csproj
+++ b/crypto/crypto.csproj
@@ -4414,6 +4414,11 @@
                     BuildAction = "Compile"
                 />
                 <File
+                    RelPath = "src\crypto\modes\gcm\Tables4kGcmMultiplier.cs"
+                    SubType = "Code"
+                    BuildAction = "Compile"
+                />
+                <File
                     RelPath = "src\crypto\modes\gcm\Tables64kGcmMultiplier.cs"
                     SubType = "Code"
                     BuildAction = "Compile"
diff --git a/crypto/src/crypto/modes/GCMBlockCipher.cs b/crypto/src/crypto/modes/GCMBlockCipher.cs
index b5919aead..88b413fa2 100644
--- a/crypto/src/crypto/modes/GCMBlockCipher.cs
+++ b/crypto/src/crypto/modes/GCMBlockCipher.cs
@@ -59,8 +59,7 @@ namespace Org.BouncyCastle.Crypto.Modes
 
             if (m == null)
             {
-                // TODO Consider a static property specifying default multiplier
-                m = new Tables8kGcmMultiplier();
+                m = new Tables4kGcmMultiplier();
             }
 
             this.cipher = c;
@@ -444,7 +443,7 @@ namespace Org.BouncyCastle.Crypto.Modes
                 byte[] H_c = new byte[16];
                 if (exp == null)
                 {
-                    exp = new Tables1kGcmExponentiator();
+                    exp = new BasicGcmExponentiator();
                     exp.Init(H);
                 }
                 exp.ExponentiateX(c, H_c);
diff --git a/crypto/src/crypto/modes/gcm/BasicGcmExponentiator.cs b/crypto/src/crypto/modes/gcm/BasicGcmExponentiator.cs
index 5660a1f84..e7386b881 100644
--- a/crypto/src/crypto/modes/gcm/BasicGcmExponentiator.cs
+++ b/crypto/src/crypto/modes/gcm/BasicGcmExponentiator.cs
@@ -7,28 +7,28 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
     public class BasicGcmExponentiator
         : IGcmExponentiator
     {
-        private uint[] x;
+        private ulong[] x;
 
         public void Init(byte[] x)
         {
-            this.x = GcmUtilities.AsUints(x);
+            this.x = GcmUtilities.AsUlongs(x);
         }
 
         public void ExponentiateX(long pow, byte[] output)
         {
             // Initial value is little-endian 1
-            uint[] y = GcmUtilities.OneAsUints();
+            ulong[] y = GcmUtilities.OneAsUlongs();
 
             if (pow > 0)
             {
-                uint[] powX = Arrays.Clone(x);
+                ulong[] powX = Arrays.Clone(x);
                 do
                 {
                     if ((pow & 1L) != 0)
                     {
                         GcmUtilities.Multiply(y, powX);
                     }
-                    GcmUtilities.Multiply(powX, powX);
+                    GcmUtilities.Square(powX, powX);
                     pow >>= 1;
                 }
                 while (pow > 0);
diff --git a/crypto/src/crypto/modes/gcm/BasicGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/BasicGcmMultiplier.cs
index eb89383fb..644ddb1e2 100644
--- a/crypto/src/crypto/modes/gcm/BasicGcmMultiplier.cs
+++ b/crypto/src/crypto/modes/gcm/BasicGcmMultiplier.cs
@@ -5,16 +5,16 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
     public class BasicGcmMultiplier
         : IGcmMultiplier
     {
-        private uint[] H;
+        private ulong[] H;
 
         public void Init(byte[] H)
         {
-            this.H = GcmUtilities.AsUints(H);
+            this.H = GcmUtilities.AsUlongs(H);
         }
 
         public void MultiplyH(byte[] x)
         {
-            uint[] t = GcmUtilities.AsUints(x);
+            ulong[] t = GcmUtilities.AsUlongs(x);
             GcmUtilities.Multiply(t, H);
             GcmUtilities.AsBytes(t, x);
         }
diff --git a/crypto/src/crypto/modes/gcm/GcmUtilities.cs b/crypto/src/crypto/modes/gcm/GcmUtilities.cs
index 22e0067a5..8cc3fd887 100644
--- a/crypto/src/crypto/modes/gcm/GcmUtilities.cs
+++ b/crypto/src/crypto/modes/gcm/GcmUtilities.cs
@@ -1,6 +1,7 @@
 using System;
 
 using Org.BouncyCastle.Crypto.Utilities;
+using Org.BouncyCastle.Math.Raw;
 using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Crypto.Modes.Gcm
@@ -8,29 +9,7 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
     internal abstract class GcmUtilities
     {
         private const uint E1 = 0xe1000000;
-        private const ulong E1L = (ulong)E1 << 32;
-
-        private static uint[] GenerateLookup()
-        {
-            uint[] lookup = new uint[256];
-
-            for (int c = 0; c < 256; ++c)
-            {
-                uint v = 0;
-                for (int i = 7; i >= 0; --i)
-                {
-                    if ((c & (1 << i)) != 0)
-                    {
-                        v ^= (E1 >> (7 - i));
-                    }
-                }
-                lookup[c] = v;
-            }
-
-            return lookup;
-        }
-
-        private static readonly uint[] LOOKUP = GenerateLookup();
+        private const ulong E1UL = (ulong)E1 << 32;
 
         internal static byte[] OneAsBytes()
         {
@@ -94,164 +73,299 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
             return z;
         }
 
-        public static void AsUlongs(byte[] x, ulong[] z)
+        internal static void AsUlongs(byte[] x, ulong[] z)
         {
             Pack.BE_To_UInt64(x, 0, z);
         }
 
+        internal static void AsUlongs(byte[] x, ulong[] z, int zOff)
+        {
+            Pack.BE_To_UInt64(x, 0, z, zOff, 2);
+        }
+
+        internal static void Copy(uint[] x, uint[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+        }
+
+        internal static void Copy(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+        }
+
+        internal static void Copy(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+            z[zOff + 0] = x[xOff + 0];
+            z[zOff + 1] = x[xOff + 1];
+        }
+
+        internal static void DivideP(ulong[] x, ulong[] z)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong m = (ulong)((long)x0 >> 63);
+            x0 ^= (m & E1UL);
+            z[0] = (x0 << 1) | (x1 >> 63);
+            z[1] = (x1 << 1) | (ulong)(-(long)m);
+        }
+
+        internal static void DivideP(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+            ulong x0 = x[xOff + 0], x1 = x[xOff + 1];
+            ulong m = (ulong)((long)x0 >> 63);
+            x0 ^= (m & E1UL);
+            z[zOff + 0] = (x0 << 1) | (x1 >> 63);
+            z[zOff + 1] = (x1 << 1) | (ulong)(-(long)m);
+        }
+
         internal static void Multiply(byte[] x, byte[] y)
         {
-            uint[] t1 = GcmUtilities.AsUints(x);
-            uint[] t2 = GcmUtilities.AsUints(y);
+            ulong[] t1 = GcmUtilities.AsUlongs(x);
+            ulong[] t2 = GcmUtilities.AsUlongs(y);
             GcmUtilities.Multiply(t1, t2);
             GcmUtilities.AsBytes(t1, x);
         }
 
         internal static void Multiply(uint[] x, uint[] y)
         {
-            uint r00 = x[0], r01 = x[1], r02 = x[2], r03 = x[3];
-            uint r10 = 0, r11 = 0, r12 = 0, r13 = 0;
+            uint y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3];
+            uint z0 = 0, z1 = 0, z2 = 0, z3 = 0;
 
             for (int i = 0; i < 4; ++i)
             {
-                int bits = (int)y[i];
+                int bits = (int)x[i];
                 for (int j = 0; j < 32; ++j)
                 {
                     uint m1 = (uint)(bits >> 31); bits <<= 1;
-                    r10 ^= (r00 & m1);
-                    r11 ^= (r01 & m1);
-                    r12 ^= (r02 & m1);
-                    r13 ^= (r03 & m1);
-
-                    uint m2 = (uint)((int)(r03 << 31) >> 8);
-                    r03 = (r03 >> 1) | (r02 << 31);
-                    r02 = (r02 >> 1) | (r01 << 31);
-                    r01 = (r01 >> 1) | (r00 << 31);
-                    r00 = (r00 >> 1) ^ (m2 & E1);
+                    z0 ^= (y0 & m1);
+                    z1 ^= (y1 & m1);
+                    z2 ^= (y2 & m1);
+                    z3 ^= (y3 & m1);
+
+                    uint m2 = (uint)((int)(y3 << 31) >> 8);
+                    y3 = (y3 >> 1) | (y2 << 31);
+                    y2 = (y2 >> 1) | (y1 << 31);
+                    y1 = (y1 >> 1) | (y0 << 31);
+                    y0 = (y0 >> 1) ^ (m2 & E1);
                 }
             }
 
-            x[0] = r10;
-            x[1] = r11;
-            x[2] = r12;
-            x[3] = r13;
+            x[0] = z0;
+            x[1] = z1;
+            x[2] = z2;
+            x[3] = z3;
         }
 
         internal static void Multiply(ulong[] x, ulong[] y)
         {
-            ulong r00 = x[0], r01 = x[1], r10 = 0, r11 = 0;
+            //ulong x0 = x[0], x1 = x[1];
+            //ulong y0 = y[0], y1 = y[1];
+            //ulong z0 = 0, z1 = 0, z2 = 0;
 
-            for (int i = 0; i < 2; ++i)
-            {
-                long bits = (long)y[i];
-                for (int j = 0; j < 64; ++j)
-                {
-                    ulong m1 = (ulong)(bits >> 63); bits <<= 1;
-                    r10 ^= (r00 & m1);
-                    r11 ^= (r01 & m1);
+            //for (int j = 0; j < 64; ++j)
+            //{
+            //    ulong m0 = (ulong)((long)x0 >> 63); x0 <<= 1;
+            //    z0 ^= (y0 & m0);
+            //    z1 ^= (y1 & m0);
 
-                    ulong m2 = (ulong)((long)(r01 << 63) >> 8);
-                    r01 = (r01 >> 1) | (r00 << 63);
-                    r00 = (r00 >> 1) ^ (m2 & E1L);
-                }
-            }
+            //    ulong m1 = (ulong)((long)x1 >> 63); x1 <<= 1;
+            //    z1 ^= (y0 & m1);
+            //    z2 ^= (y1 & m1);
+
+            //    ulong c = (ulong)((long)(y1 << 63) >> 8);
+            //    y1 = (y1 >> 1) | (y0 << 63);
+            //    y0 = (y0 >> 1) ^ (c & E1UL);
+            //}
+
+            //z0 ^= z2 ^ (z2 >>  1) ^ (z2 >>  2) ^ (z2 >>  7);
+            //z1 ^=      (z2 << 63) ^ (z2 << 62) ^ (z2 << 57);
+
+            //x[0] = z0;
+            //x[1] = z1;
+
+            /*
+             * "Three-way recursion" as described in "Batch binary Edwards", Daniel J. Bernstein.
+             *
+             * Without access to the high part of a 64x64 product x * y, we use a bit reversal to calculate it:
+             *     rev(x) * rev(y) == rev((x * y) << 1) 
+             */
 
-            x[0] = r10;
-            x[1] = r11;
+            ulong x0 = x[0], x1 = x[1];
+            ulong y0 = y[0], y1 = y[1];
+            ulong x0r = Longs.Reverse(x0), x1r = Longs.Reverse(x1);
+            ulong y0r = Longs.Reverse(y0), y1r = Longs.Reverse(y1);
+
+            ulong h0 = Longs.Reverse(ImplMul64(x0r, y0r));
+            ulong h1 = ImplMul64(x0, y0) << 1;
+            ulong h2 = Longs.Reverse(ImplMul64(x1r, y1r));
+            ulong h3 = ImplMul64(x1, y1) << 1;
+            ulong h4 = Longs.Reverse(ImplMul64(x0r ^ x1r, y0r ^ y1r));
+            ulong h5 = ImplMul64(x0 ^ x1, y0 ^ y1) << 1;
+
+            ulong z0 = h0;
+            ulong z1 = h1 ^ h0 ^ h2 ^ h4;
+            ulong z2 = h2 ^ h1 ^ h3 ^ h5;
+            ulong z3 = h3;
+
+            z1 ^= z3 ^ (z3 >>  1) ^ (z3 >>  2) ^ (z3 >>  7);
+//          z2 ^=      (z3 << 63) ^ (z3 << 62) ^ (z3 << 57);
+            z2 ^=                   (z3 << 62) ^ (z3 << 57);
+
+            z0 ^= z2 ^ (z2 >>  1) ^ (z2 >>  2) ^ (z2 >>  7);
+            z1 ^=      (z2 << 63) ^ (z2 << 62) ^ (z2 << 57);
+
+            x[0] = z0;
+            x[1] = z1;
         }
 
-        // P is the value with only bit i=1 set
         internal static void MultiplyP(uint[] x)
         {
-            uint m = (uint)((int)ShiftRight(x) >> 8);
-            x[0] ^= (m & E1);
+            uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
+            uint m = (uint)((int)(x3 << 31) >> 31);
+            x[0] = (x0 >> 1) ^ (m & E1);
+            x[1] = (x1 >> 1) | (x0 << 31);
+            x[2] = (x2 >> 1) | (x1 << 31);
+            x[3] = (x3 >> 1) | (x2 << 31);
         }
 
         internal static void MultiplyP(uint[] x, uint[] z)
         {
-            uint m = (uint)((int)ShiftRight(x, z) >> 8);
-            z[0] ^= (m & E1);
+            uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
+            uint m = (uint)((int)(x3 << 31) >> 31);
+            z[0] = (x0 >> 1) ^ (m & E1);
+            z[1] = (x1 >> 1) | (x0 << 31);
+            z[2] = (x2 >> 1) | (x1 << 31);
+            z[3] = (x3 >> 1) | (x2 << 31);
         }
 
-        internal static void MultiplyP8(uint[] x)
+        internal static void MultiplyP(ulong[] x)
         {
-//			for (int i = 8; i != 0; --i)
-//			{
-//				MultiplyP(x);
-//			}
+            ulong x0 = x[0], x1 = x[1];
+            ulong m = (ulong)((long)(x1 << 63) >> 63);
+            x[0] = (x0 >> 1) ^ (m & E1UL);
+            x[1] = (x1 >> 1) | (x0 << 63);
+        }
+
+        internal static void MultiplyP(ulong[] x, ulong[] z)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong m = (ulong)((long)(x1 << 63) >> 63);
+            z[0] = (x0 >> 1) ^ (m & E1UL);
+            z[1] = (x1 >> 1) | (x0 << 63);
+        }
 
-            uint c = ShiftRightN(x, 8);
-            x[0] ^= LOOKUP[c >> 24];
+        internal static void MultiplyP3(ulong[] x, ulong[] z)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong c = x1 << 61;
+            z[0] = (x0 >> 3) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            z[1] = (x1 >> 3) | (x0 << 61);
+        }
+
+        internal static void MultiplyP3(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+            ulong x0 = x[xOff + 0], x1 = x[xOff + 1];
+            ulong c = x1 << 61;
+            z[zOff + 0] = (x0 >> 3) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            z[zOff + 1] = (x1 >> 3) | (x0 << 61);
+        }
+
+        internal static void MultiplyP4(ulong[] x, ulong[] z)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong c = x1 << 60;
+            z[0] = (x0 >> 4) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            z[1] = (x1 >> 4) | (x0 << 60);
+        }
+
+        internal static void MultiplyP4(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+            ulong x0 = x[xOff + 0], x1 = x[xOff + 1];
+            ulong c = x1 << 60;
+            z[zOff + 0] = (x0 >> 4) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            z[zOff + 1] = (x1 >> 4) | (x0 << 60);
+        }
+
+        internal static void MultiplyP7(ulong[] x, ulong[] z)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong c = x1 << 57;
+            z[0] = (x0 >> 7) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            z[1] = (x1 >> 7) | (x0 << 57);
+        }
+
+        internal static void MultiplyP7(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+            ulong x0 = x[xOff + 0], x1 = x[xOff + 1];
+            ulong c = x1 << 57;
+            z[zOff + 0] = (x0 >> 7) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            z[zOff + 1] = (x1 >> 7) | (x0 << 57);
+        }
+
+        internal static void MultiplyP8(uint[] x)
+        {
+            uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
+            uint c = x3 << 24;
+            x[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            x[1] = (x1 >> 8) | (x0 << 24);
+            x[2] = (x2 >> 8) | (x1 << 24);
+            x[3] = (x3 >> 8) | (x2 << 24);
         }
 
         internal static void MultiplyP8(uint[] x, uint[] y)
         {
-            uint c = ShiftRightN(x, 8, y);
-            y[0] ^= LOOKUP[c >> 24];
-        }
-
-        internal static uint ShiftRight(uint[] x)
-        {
-            uint b = x[0];
-            x[0] = b >> 1;
-            uint c = b << 31;
-            b = x[1];
-            x[1] = (b >> 1) | c;
-            c = b << 31;
-            b = x[2];
-            x[2] = (b >> 1) | c;
-            c = b << 31;
-            b = x[3];
-            x[3] = (b >> 1) | c;
-            return b << 31;
-        }
-
-        internal static uint ShiftRight(uint[] x, uint[] z)
-        {
-            uint b = x[0];
-            z[0] = b >> 1;
-            uint c = b << 31;
-            b = x[1];
-            z[1] = (b >> 1) | c;
-            c = b << 31;
-            b = x[2];
-            z[2] = (b >> 1) | c;
-            c = b << 31;
-            b = x[3];
-            z[3] = (b >> 1) | c;
-            return b << 31;
-        }
-
-        internal static uint ShiftRightN(uint[] x, int n)
-        {
-            uint b = x[0]; int nInv = 32 - n;
-            x[0] = b >> n;
-            uint c = b << nInv;
-            b = x[1];
-            x[1] = (b >> n) | c;
-            c = b << nInv;
-            b = x[2];
-            x[2] = (b >> n) | c;
-            c = b << nInv;
-            b = x[3];
-            x[3] = (b >> n) | c;
-            return b << nInv;
-        }
-
-        internal static uint ShiftRightN(uint[] x, int n, uint[] z)
-        {
-            uint b = x[0]; int nInv = 32 - n;
-            z[0] = b >> n;
-            uint c = b << nInv;
-            b = x[1];
-            z[1] = (b >> n) | c;
-            c = b << nInv;
-            b = x[2];
-            z[2] = (b >> n) | c;
-            c = b << nInv;
-            b = x[3];
-            z[3] = (b >> n) | c;
-            return b << nInv;
+            uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
+            uint c = x3 << 24;
+            y[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            y[1] = (x1 >> 8) | (x0 << 24);
+            y[2] = (x2 >> 8) | (x1 << 24);
+            y[3] = (x3 >> 8) | (x2 << 24);
+        }
+
+        internal static void MultiplyP8(ulong[] x)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong c = x1 << 56;
+            x[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            x[1] = (x1 >> 8) | (x0 << 56);
+        }
+
+        internal static void MultiplyP8(ulong[] x, ulong[] y)
+        {
+            ulong x0 = x[0], x1 = x[1];
+            ulong c = x1 << 56;
+            y[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            y[1] = (x1 >> 8) | (x0 << 56);
+        }
+
+        internal static void MultiplyP8(ulong[] x, int xOff, ulong[] y, int yOff)
+        {
+            ulong x0 = x[xOff + 0], x1 = x[xOff + 1];
+            ulong c = x1 << 56;
+            y[yOff + 0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            y[yOff + 1] = (x1 >> 8) | (x0 << 56);
+        }
+
+        internal static void Square(ulong[] x, ulong[] z)
+        {
+            ulong[] t = new ulong[4];
+            Interleave.Expand64To128Rev(x[0], t, 0);
+            Interleave.Expand64To128Rev(x[1], t, 2);
+
+            ulong z0 = t[0], z1 = t[1], z2 = t[2], z3 = t[3];
+
+            z1 ^= z3 ^ (z3 >>  1) ^ (z3 >>  2) ^ (z3 >>  7);
+//          z2 ^=      (z3 << 63) ^ (z3 << 62) ^ (z3 << 57);
+            z2 ^=                   (z3 << 62) ^ (z3 << 57);
+
+            z0 ^= z2 ^ (z2 >>  1) ^ (z2 >>  2) ^ (z2 >>  7);
+            z1 ^=      (z2 << 63) ^ (z2 << 62) ^ (z2 << 57);
+
+            z[0] = z0;
+            z[1] = z1;
         }
 
         internal static void Xor(byte[] x, byte[] y)
@@ -344,10 +458,47 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
             x[1] ^= y[1];
         }
 
+        internal static void Xor(ulong[] x, int xOff, ulong[] y, int yOff)
+        {
+            x[xOff + 0] ^= y[yOff + 0];
+            x[xOff + 1] ^= y[yOff + 1];
+        }
+
         internal static void Xor(ulong[] x, ulong[] y, ulong[] z)
         {
             z[0] = x[0] ^ y[0];
             z[1] = x[1] ^ y[1];
         }
+
+        internal static void Xor(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
+        {
+            z[zOff + 0] = x[xOff + 0] ^ y[yOff + 0];
+            z[zOff + 1] = x[xOff + 1] ^ y[yOff + 1];
+        }
+
+        private static ulong ImplMul64(ulong x, ulong y)
+        {
+            ulong x0 = x & 0x1111111111111111UL;
+            ulong x1 = x & 0x2222222222222222UL;
+            ulong x2 = x & 0x4444444444444444UL;
+            ulong x3 = x & 0x8888888888888888UL;
+
+            ulong y0 = y & 0x1111111111111111UL;
+            ulong y1 = y & 0x2222222222222222UL;
+            ulong y2 = y & 0x4444444444444444UL;
+            ulong y3 = y & 0x8888888888888888UL;
+
+            ulong z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
+            ulong z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
+            ulong z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
+            ulong z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);
+
+            z0 &= 0x1111111111111111UL;
+            z1 &= 0x2222222222222222UL;
+            z2 &= 0x4444444444444444UL;
+            z3 &= 0x8888888888888888UL;
+
+            return z0 | z1 | z2 | z3;
+        }
     }
 }
diff --git a/crypto/src/crypto/modes/gcm/Tables1kGcmExponentiator.cs b/crypto/src/crypto/modes/gcm/Tables1kGcmExponentiator.cs
index e649d6770..3667241e3 100644
--- a/crypto/src/crypto/modes/gcm/Tables1kGcmExponentiator.cs
+++ b/crypto/src/crypto/modes/gcm/Tables1kGcmExponentiator.cs
@@ -14,8 +14,8 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
 
         public void Init(byte[] x)
         {
-            uint[] y = GcmUtilities.AsUints(x);
-            if (lookupPowX2 != null && Arrays.AreEqual(y, (uint[])lookupPowX2[0]))
+            ulong[] y = GcmUtilities.AsUlongs(x);
+            if (lookupPowX2 != null && Arrays.AreEqual(y, (ulong[])lookupPowX2[0]))
                 return;
 
             lookupPowX2 = Platform.CreateArrayList(8);
@@ -24,14 +24,14 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
 
         public void ExponentiateX(long pow, byte[] output)
         {
-            uint[] y = GcmUtilities.OneAsUints();
+            ulong[] y = GcmUtilities.OneAsUlongs();
             int bit = 0;
             while (pow > 0)
             {
                 if ((pow & 1L) != 0)
                 {
                     EnsureAvailable(bit);
-                    GcmUtilities.Multiply(y, (uint[])lookupPowX2[bit]);
+                    GcmUtilities.Multiply(y, (ulong[])lookupPowX2[bit]);
                 }
                 ++bit;
                 pow >>= 1;
@@ -45,11 +45,11 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
             int count = lookupPowX2.Count;
             if (count <= bit)
             {
-                uint[] tmp = (uint[])lookupPowX2[count - 1];
+                ulong[] tmp = (ulong[])lookupPowX2[count - 1];
                 do
                 {
                     tmp = Arrays.Clone(tmp);
-                    GcmUtilities.Multiply(tmp, tmp);
+                    GcmUtilities.Square(tmp, tmp);
                     lookupPowX2.Add(tmp);
                 }
                 while (++count <= bit);
diff --git a/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs
new file mode 100644
index 000000000..3ed596e94
--- /dev/null
+++ b/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs
@@ -0,0 +1,70 @@
+using System;
+
+using Org.BouncyCastle.Crypto.Utilities;
+using Org.BouncyCastle.Utilities;
+
+namespace Org.BouncyCastle.Crypto.Modes.Gcm
+{
+    public class Tables4kGcmMultiplier
+        : IGcmMultiplier
+    {
+        private byte[] H;
+        private ulong[] T;
+
+        public void Init(byte[] H)
+        {
+            if (T == null)
+            {
+                T = new ulong[512];
+            }
+            else if (Arrays.AreEqual(this.H, H))
+            {
+                return;
+            }
+
+            this.H = Arrays.Clone(H);
+
+            // T[0] = 0
+
+            // T[1] = H.p^7
+            GcmUtilities.AsUlongs(this.H, T, 2);
+            GcmUtilities.MultiplyP7(T, 2, T, 2);
+
+            for (int n = 2; n < 256; n += 2)
+            {
+                // T[2.n] = T[n].p^-1
+                GcmUtilities.DivideP(T, n, T, n << 1);
+
+                // T[2.n + 1] = T[2.n] + T[1]
+                GcmUtilities.Xor(T, n << 1, T, 2, T, (n + 1) << 1);
+            }
+        }
+
+        public void MultiplyH(byte[] x)
+        {
+            //ulong[] z = new ulong[2];
+            //GcmUtilities.Copy(T, x[15] << 1, z, 0);
+            //for (int i = 14; i >= 0; --i)
+            //{
+            //    GcmUtilities.MultiplyP8(z);
+            //    GcmUtilities.Xor(z, 0, T, x[i] << 1);
+            //}
+            //Pack.UInt64_To_BE(z, x, 0);
+
+            int pos = x[15] << 1;
+            ulong z0 = T[pos + 0], z1 = T[pos + 1];
+
+            for (int i = 14; i >= 0; --i)
+            {
+                pos = x[i] << 1;
+
+                ulong c = z1 << 56;
+                z1 = T[pos + 1] ^ ((z1 >> 8) | (z0 << 56));
+                z0 = T[pos + 0] ^ (z0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7);
+            }
+
+            Pack.UInt64_To_BE(z0, x, 0);
+            Pack.UInt64_To_BE(z1, x, 8);
+        }
+    }
+}
diff --git a/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs
index 707b0be2e..0c15ff6b6 100644
--- a/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs
+++ b/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs
@@ -9,13 +9,13 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
         : IGcmMultiplier
     {
         private byte[] H;
-        private uint[][][] M;
+        private ulong[][] T;
 
         public void Init(byte[] H)
         {
-            if (M == null)
+            if (T == null)
             {
-                M = new uint[16][][];
+                T = new ulong[16][];
             }
             else if (Arrays.AreEqual(this.H, H))
             {
@@ -24,54 +24,58 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
 
             this.H = Arrays.Clone(H);
 
-            M[0] = new uint[256][];
-            M[0][0] = new uint[4];
-            M[0][128] = GcmUtilities.AsUints(H);
-            for (int j = 64; j >= 1; j >>= 1)
+            for (int i = 0; i < 16; ++i)
             {
-                uint[] tmp = (uint[])M[0][j + j].Clone();
-                GcmUtilities.MultiplyP(tmp);
-                M[0][j] = tmp;
-            }
-            for (int i = 0; ; )
-            {
-                for (int j = 2; j < 256; j += j)
+                ulong[] t = T[i] = new ulong[512];
+
+                // t[0] = 0
+
+                if (i == 0)
                 {
-                    for (int k = 1; k < j; ++k)
-                    {
-                        uint[] tmp = (uint[])M[i][j].Clone();
-                        GcmUtilities.Xor(tmp, M[i][k]);
-                        M[i][j + k] = tmp;
-                    }
+                    // t[1] = H.p^7
+                    GcmUtilities.AsUlongs(this.H, t, 2);
+                    GcmUtilities.MultiplyP7(t, 2, t, 2);
+                }
+                else
+                {
+                    // t[1] = T[i-1][1].p^8
+                    GcmUtilities.MultiplyP8(T[i - 1], 2, t, 2);
                 }
 
-                if (++i == 16) return;
-
-                M[i] = new uint[256][];
-                M[i][0] = new uint[4];
-                for (int j = 128; j > 0; j >>= 1)
+                for (int n = 2; n < 256; n += 2)
                 {
-                    uint[] tmp = (uint[])M[i - 1][j].Clone();
-                    GcmUtilities.MultiplyP8(tmp);
-                    M[i][j] = tmp;
+                    // t[2.n] = t[n].p^-1
+                    GcmUtilities.DivideP(t, n, t, n << 1);
+
+                    // t[2.n + 1] = t[2.n] + t[1]
+                    GcmUtilities.Xor(t, n << 1, t, 2, t, (n + 1) << 1);
                 }
             }
         }
 
         public void MultiplyH(byte[] x)
         {
-            uint[] z = new uint[4];
-            for (int i = 0; i != 16; ++i)
+            //ulong[] z = new ulong[2];
+            //for (int i = 15; i >= 0; --i)
+            //{
+            //    GcmUtilities.Xor(z, 0, T[i], x[i] << 1);
+            //}
+            //Pack.UInt64_To_BE(z, x, 0);
+
+            ulong[] t = T[15];
+            int tPos = x[15] << 1;
+            ulong z0 = t[tPos + 0], z1 = t[tPos + 1];
+
+            for (int i = 14; i >= 0; --i)
             {
-                //GcmUtilities.Xor(z, M[i][x[i]]);
-                uint[] m = M[i][x[i]];
-                z[0] ^= m[0];
-                z[1] ^= m[1];
-                z[2] ^= m[2];
-                z[3] ^= m[3];
+                t = T[i];
+                tPos = x[i] << 1;
+                z0 ^= t[tPos + 0];
+                z1 ^= t[tPos + 1];
             }
 
-            Pack.UInt32_To_BE(z, x, 0);
+            Pack.UInt64_To_BE(z0, x, 0);
+            Pack.UInt64_To_BE(z1, x, 8);
         }
     }
 }
diff --git a/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs
index 5f3d6c86c..81f60bb2f 100644
--- a/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs
+++ b/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs
@@ -9,13 +9,13 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
         : IGcmMultiplier
     {
         private byte[] H;
-        private uint[][][] M;
+        private ulong[][] T;
 
         public void Init(byte[] H)
         {
-            if (M == null)
+            if (T == null)
             {
-                M = new uint[32][][];
+                T = new ulong[32][];
             }
             else if (Arrays.AreEqual(this.H, H))
             {
@@ -24,80 +24,58 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm
 
             this.H = Arrays.Clone(H);
 
-            M[0] = new uint[16][];
-            M[1] = new uint[16][];
-            M[0][0] = new uint[4];
-            M[1][0] = new uint[4];
-            M[1][8] = GcmUtilities.AsUints(H);
-
-            for (int j = 4; j >= 1; j >>= 1)
-            {
-                uint[] tmp = (uint[])M[1][j + j].Clone();
-                GcmUtilities.MultiplyP(tmp);
-                M[1][j] = tmp;
-            }
-
+            for (int i = 0; i < 32; ++i)
             {
-                uint[] tmp = (uint[])M[1][1].Clone();
-                GcmUtilities.MultiplyP(tmp);
-                M[0][8] = tmp;
-            }
+                ulong[] t = T[i] = new ulong[32];
 
-            for (int j = 4; j >= 1; j >>= 1)
-            {
-                uint[] tmp = (uint[])M[0][j + j].Clone();
-                GcmUtilities.MultiplyP(tmp);
-                M[0][j] = tmp;
-            }
+                // t[0] = 0
 
-            for (int i = 0; ; )
-            {
-                for (int j = 2; j < 16; j += j)
+                if (i == 0)
                 {
-                    for (int k = 1; k < j; ++k)
-                    {
-                        uint[] tmp = (uint[])M[i][j].Clone();
-                        GcmUtilities.Xor(tmp, M[i][k]);
-                        M[i][j + k] = tmp;
-                    }
+                    // t[1] = H.p^3
+                    GcmUtilities.AsUlongs(this.H, t, 2);
+                    GcmUtilities.MultiplyP3(t, 2, t, 2);
+                }
+                else
+                {
+                    // t[1] = T[i-1][1].p^4
+                    GcmUtilities.MultiplyP4(T[i - 1], 2, t, 2);
                 }
 
-                if (++i == 32) return;
-
-                if (i > 1)
+                for (int n = 2; n < 16; n += 2)
                 {
-                    M[i] = new uint[16][];
-                    M[i][0] = new uint[4];
-                    for (int j = 8; j > 0; j >>= 1)
-                    {
-                        uint[] tmp = (uint[])M[i - 2][j].Clone();
-                        GcmUtilities.MultiplyP8(tmp);
-                        M[i][j] = tmp;
-                    }
+                    // t[2.n] = t[n].p^-1
+                    GcmUtilities.DivideP(t, n, t, n << 1);
+
+                    // t[2.n + 1] = t[2.n] + t[1]
+                    GcmUtilities.Xor(t, n << 1, t, 2, t, (n + 1) << 1);
                 }
             }
         }
 
         public void MultiplyH(byte[] x)
         {
-            uint[] z = new uint[4];
+            //ulong[] z = new ulong[2];
+            //for (int i = 15; i >= 0; --i)
+            //{
+            //    GcmUtilities.Xor(z, 0, T[i + i + 1], (x[i] & 0x0F) << 1);
+            //    GcmUtilities.Xor(z, 0, T[i + i], (x[i] & 0xF0) >> 3);
+            //}
+            //Pack.UInt64_To_BE(z, x, 0);
+
+            ulong z0 = 0, z1 = 0;
+
             for (int i = 15; i >= 0; --i)
             {
-                //GcmUtilities.Xor(z, M[i + i][x[i] & 0x0f]);
-                uint[] m = M[i + i][x[i] & 0x0f];
-                z[0] ^= m[0];
-                z[1] ^= m[1];
-                z[2] ^= m[2];
-                z[3] ^= m[3];
-                //GcmUtilities.Xor(z, M[i + i + 1][(x[i] & 0xf0) >> 4]);
-                m = M[i + i + 1][(x[i] & 0xf0) >> 4];
-                z[0] ^= m[0];
-                z[1] ^= m[1];
-                z[2] ^= m[2];
-                z[3] ^= m[3];
+                ulong[] tu = T[i + i + 1], tv = T[i + i];
+                int uPos = (x[i] & 0x0F) << 1, vPos = (x[i] & 0xF0) >> 3;
+
+                z0 ^= tu[uPos + 0] ^ tv[vPos + 0];
+                z1 ^= tu[uPos + 1] ^ tv[vPos + 1];
             }
 
-            Pack.UInt32_To_BE(z, x, 0);
+            Pack.UInt64_To_BE(z0, x, 0);
+            Pack.UInt64_To_BE(z1, x, 8);
         }
     }
 }
diff --git a/crypto/src/util/Arrays.cs b/crypto/src/util/Arrays.cs
index 03b451ef9..57d79c0bb 100644
--- a/crypto/src/util/Arrays.cs
+++ b/crypto/src/util/Arrays.cs
@@ -162,6 +162,29 @@ namespace Org.BouncyCastle.Utilities
             return HaveSameContents(a, b);
         }
 
+        public static bool AreEqual(long[] a, long[] b)
+        {
+            if (a == b)
+                return true;
+
+            if (a == null || b == null)
+                return false;
+
+            return HaveSameContents(a, b);
+        }
+
+        [CLSCompliantAttribute(false)]
+        public static bool AreEqual(ulong[] a, ulong[] b)
+        {
+            if (a == b)
+                return true;
+
+            if (a == null || b == null)
+                return false;
+
+            return HaveSameContents(a, b);
+        }
+
         private static bool HaveSameContents(
             bool[] a,
             bool[] b)
@@ -240,6 +263,34 @@ namespace Org.BouncyCastle.Utilities
             return true;
         }
 
+        private static bool HaveSameContents(long[] a, long[] b)
+        {
+            int i = a.Length;
+            if (i != b.Length)
+                return false;
+            while (i != 0)
+            {
+                --i;
+                if (a[i] != b[i])
+                    return false;
+            }
+            return true;
+        }
+
+        private static bool HaveSameContents(ulong[] a, ulong[] b)
+        {
+            int i = a.Length;
+            if (i != b.Length)
+                return false;
+            while (i != 0)
+            {
+                --i;
+                if (a[i] != b[i])
+                    return false;
+            }
+            return true;
+        }
+
         public static string ToString(
             object[] a)
         {
diff --git a/crypto/test/src/crypto/test/GCMTest.cs b/crypto/test/src/crypto/test/GCMTest.cs
index 2ab5c6216..a225708ef 100644
--- a/crypto/test/src/crypto/test/GCMTest.cs
+++ b/crypto/test/src/crypto/test/GCMTest.cs
@@ -1,9 +1,7 @@
 using System;
-using System.Text;
 
 using NUnit.Framework;
 
-using Org.BouncyCastle.Crypto;
 using Org.BouncyCastle.Crypto.Engines;
 using Org.BouncyCastle.Crypto.Modes;
 using Org.BouncyCastle.Crypto.Modes.Gcm;
@@ -418,6 +416,7 @@ namespace Org.BouncyCastle.Crypto.Tests
             RunTestCase(null, null, testName, K, IV, A, P, C, T);
 
             RunTestCase(new BasicGcmMultiplier(), new BasicGcmMultiplier(), testName, K, IV, A, P, C, T);
+            RunTestCase(new Tables4kGcmMultiplier(), new Tables4kGcmMultiplier(), testName, K, IV, A, P, C, T);
             RunTestCase(new Tables8kGcmMultiplier(), new Tables8kGcmMultiplier(), testName, K, IV, A, P, C, T);
             RunTestCase(new Tables64kGcmMultiplier(), new Tables64kGcmMultiplier(), testName, K, IV, A, P, C, T);
         }
@@ -555,6 +554,7 @@ namespace Org.BouncyCastle.Crypto.Tests
             srng.SetSeed(DateTimeUtilities.CurrentUnixMs());
             RandomTests(srng, null);
             RandomTests(srng, new BasicGcmMultiplier());
+            RandomTests(srng, new Tables4kGcmMultiplier());
             RandomTests(srng, new Tables8kGcmMultiplier());
             RandomTests(srng, new Tables64kGcmMultiplier());
         }
diff --git a/crypto/test/src/crypto/test/GcmReorderTest.cs b/crypto/test/src/crypto/test/GcmReorderTest.cs
index 9694216f2..5d4bafb51 100644
--- a/crypto/test/src/crypto/test/GcmReorderTest.cs
+++ b/crypto/test/src/crypto/test/GcmReorderTest.cs
@@ -2,7 +2,6 @@ using System;
 
 using NUnit.Framework;
 
-using Org.BouncyCastle.Crypto.Modes;
 using Org.BouncyCastle.Crypto.Modes.Gcm;
 using Org.BouncyCastle.Security;
 using Org.BouncyCastle.Utilities;
@@ -15,7 +14,7 @@ namespace Org.BouncyCastle.Crypto.Tests
 	{
 		private static readonly byte[] H;
 		private static readonly SecureRandom random = new SecureRandom(); 
-		private static readonly IGcmMultiplier mul = new Tables64kGcmMultiplier();
+		private static readonly IGcmMultiplier mul = new Tables4kGcmMultiplier();
 		private static readonly IGcmExponentiator exp = new Tables1kGcmExponentiator();
 		private static readonly byte[] Empty = new byte[0];