From 38ec124b3124bfb29a82fef7ff2c54d77d05d85b Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Tue, 8 Feb 2022 23:10:27 +0700 Subject: GCM perf. opts. --- crypto/src/crypto/modes/gcm/GcmUtilities.cs | 70 ++++++++++++++++++++++ .../src/crypto/modes/gcm/Tables4kGcmMultiplier.cs | 33 +++++----- .../src/crypto/modes/gcm/Tables64kGcmMultiplier.cs | 38 ++++++------ .../src/crypto/modes/gcm/Tables8kGcmMultiplier.cs | 49 +++++++-------- 4 files changed, 130 insertions(+), 60 deletions(-) diff --git a/crypto/src/crypto/modes/gcm/GcmUtilities.cs b/crypto/src/crypto/modes/gcm/GcmUtilities.cs index 1a2e0bc1e..b5c3d0908 100644 --- a/crypto/src/crypto/modes/gcm/GcmUtilities.cs +++ b/crypto/src/crypto/modes/gcm/GcmUtilities.cs @@ -8,6 +8,11 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm { internal abstract class GcmUtilities { + internal struct FieldElement + { + internal ulong n0, n1; + } + private const uint E1 = 0xe1000000; private const ulong E1UL = (ulong)E1 << 32; @@ -54,6 +59,18 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm Pack.UInt64_To_BE(x, z, 0); } + internal static void AsBytes(ref FieldElement x, byte[] z) + { + Pack.UInt64_To_BE(x.n0, z, 0); + Pack.UInt64_To_BE(x.n1, z, 8); + } + + internal static void AsFieldElement(byte[] x, out FieldElement z) + { + z.n0 = Pack.BE_To_UInt64(x, 0); + z.n1 = Pack.BE_To_UInt64(x, 8); + } + internal static uint[] AsUints(byte[] bs) { uint[] output = new uint[4]; @@ -121,6 +138,15 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z[zOff + 1] = (x1 << 1) | (ulong)(-(long)m); } + internal static void DivideP(ref FieldElement x, out FieldElement z) + { + ulong x0 = x.n0, x1 = x.n1; + ulong m = (ulong)((long)x0 >> 63); + x0 ^= (m & E1UL); + z.n0 = (x0 << 1) | (x1 >> 63); + z.n1 = (x1 << 1) | (ulong)(-(long)m); + } + internal static void Multiply(byte[] x, byte[] y) { ulong[] t1 = AsUlongs(x); @@ -343,6 +369,14 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z[zOff + 1] = (x1 >> 7) | (x0 << 57); } + internal static void MultiplyP7(ref FieldElement x) + { + ulong x0 = x.n0, x1 = x.n1; + ulong c = x1 << 57; + x.n0 = (x0 >> 7) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); + x.n1 = (x1 >> 7) | (x0 << 57); + } + internal static void MultiplyP8(uint[] x) { uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; @@ -387,6 +421,22 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm y[yOff + 1] = (x1 >> 8) | (x0 << 56); } + internal static void MultiplyP8(ref FieldElement x) + { + ulong x0 = x.n0, x1 = x.n1; + ulong c = x1 << 56; + x.n0 = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); + x.n1 = (x1 >> 8) | (x0 << 56); + } + + internal static void MultiplyP8(ref FieldElement x, out FieldElement y) + { + ulong x0 = x.n0, x1 = x.n1; + ulong c = x1 << 56; + y.n0 = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); + y.n1 = (x1 >> 8) | (x0 << 56); + } + internal static void MultiplyP16(ulong[] x) { ulong x0 = x[0], x1 = x[1]; @@ -395,6 +445,14 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm x[1] = (x1 >> 16) | (x0 << 48); } + internal static void MultiplyP16(ref FieldElement x) + { + ulong x0 = x.n0, x1 = x.n1; + ulong c = x1 << 48; + x.n0 = (x0 >> 16) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); + x.n1 = (x1 >> 16) | (x0 << 48); + } + internal static void Square(ulong[] x, ulong[] z) { ulong[] t = new ulong[4]; @@ -522,6 +580,18 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z[zOff + 1] = x[xOff + 1] ^ y[yOff + 1]; } + internal static void Xor(ref FieldElement x, ref FieldElement y, out FieldElement z) + { + z.n0 = x.n0 ^ y.n0; + z.n1 = x.n1 ^ y.n1; + } + + internal static void Xor(ref FieldElement x, ref FieldElement y) + { + x.n0 ^= y.n0; + x.n1 ^= y.n1; + } + private static ulong ImplMul64(ulong x, ulong y) { ulong x0 = x & 0x1111111111111111UL; diff --git a/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs index 3ed596e94..7867a0b99 100644 --- a/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs +++ b/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs @@ -9,13 +9,13 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm : IGcmMultiplier { private byte[] H; - private ulong[] T; + private GcmUtilities.FieldElement[] T; public void Init(byte[] H) { if (T == null) { - T = new ulong[512]; + T = new GcmUtilities.FieldElement[256]; } else if (Arrays.AreEqual(this.H, H)) { @@ -27,40 +27,39 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm // T[0] = 0 // T[1] = H.p^7 - GcmUtilities.AsUlongs(this.H, T, 2); - GcmUtilities.MultiplyP7(T, 2, T, 2); + GcmUtilities.AsFieldElement(this.H, out T[1]); + GcmUtilities.MultiplyP7(ref T[1]); - for (int n = 2; n < 256; n += 2) + for (int n = 1; n < 128; ++n) { // T[2.n] = T[n].p^-1 - GcmUtilities.DivideP(T, n, T, n << 1); + GcmUtilities.DivideP(ref T[n], out T[n << 1]); // T[2.n + 1] = T[2.n] + T[1] - GcmUtilities.Xor(T, n << 1, T, 2, T, (n + 1) << 1); + GcmUtilities.Xor(ref T[n << 1], ref T[1], out T[(n << 1) + 1]); } } public void MultiplyH(byte[] x) { - //ulong[] z = new ulong[2]; - //GcmUtilities.Copy(T, x[15] << 1, z, 0); + //GcmUtilities.FieldElement z = T[x[15]]; //for (int i = 14; i >= 0; --i) //{ - // GcmUtilities.MultiplyP8(z); - // GcmUtilities.Xor(z, 0, T, x[i] << 1); + // GcmUtilities.MultiplyP8(ref z); + // GcmUtilities.Xor(ref z, ref T[x[i]]); //} - //Pack.UInt64_To_BE(z, x, 0); + //GcmUtilities.AsBytes(ref z, x); - int pos = x[15] << 1; - ulong z0 = T[pos + 0], z1 = T[pos + 1]; + int pos = x[15]; + ulong z0 = T[pos].n0, z1 = T[pos].n1; for (int i = 14; i >= 0; --i) { - pos = x[i] << 1; + pos = x[i]; ulong c = z1 << 56; - z1 = T[pos + 1] ^ ((z1 >> 8) | (z0 << 56)); - z0 = T[pos + 0] ^ (z0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); + z1 = T[pos].n1 ^ ((z1 >> 8) | (z0 << 56)); + z0 = T[pos].n0 ^ (z0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); } Pack.UInt64_To_BE(z0, x, 0); diff --git a/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs index 0c15ff6b6..364c070e7 100644 --- a/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs +++ b/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs @@ -9,13 +9,13 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm : IGcmMultiplier { private byte[] H; - private ulong[][] T; + private GcmUtilities.FieldElement[][] T; public void Init(byte[] H) { if (T == null) { - T = new ulong[16][]; + T = new GcmUtilities.FieldElement[16][]; } else if (Arrays.AreEqual(this.H, H)) { @@ -26,52 +26,52 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm for (int i = 0; i < 16; ++i) { - ulong[] t = T[i] = new ulong[512]; + GcmUtilities.FieldElement[] t = T[i] = new GcmUtilities.FieldElement[256]; // t[0] = 0 if (i == 0) { // t[1] = H.p^7 - GcmUtilities.AsUlongs(this.H, t, 2); - GcmUtilities.MultiplyP7(t, 2, t, 2); + GcmUtilities.AsFieldElement(this.H, out t[1]); + GcmUtilities.MultiplyP7(ref t[1]); } else { // t[1] = T[i-1][1].p^8 - GcmUtilities.MultiplyP8(T[i - 1], 2, t, 2); + GcmUtilities.MultiplyP8(ref T[i - 1][1], out t[1]); } - for (int n = 2; n < 256; n += 2) + for (int n = 1; n < 128; ++n) { // t[2.n] = t[n].p^-1 - GcmUtilities.DivideP(t, n, t, n << 1); + GcmUtilities.DivideP(ref t[n], out t[n << 1]); // t[2.n + 1] = t[2.n] + t[1] - GcmUtilities.Xor(t, n << 1, t, 2, t, (n + 1) << 1); + GcmUtilities.Xor(ref t[n << 1], ref t[1], out t[(n << 1) + 1]); } } } public void MultiplyH(byte[] x) { - //ulong[] z = new ulong[2]; - //for (int i = 15; i >= 0; --i) + //GcmUtilities.FieldElement z = T[15][x[15]]; + //for (int i = 14; i >= 0; --i) //{ - // GcmUtilities.Xor(z, 0, T[i], x[i] << 1); + // GcmUtilities.Xor(ref z, ref T[i][x[i]]); //} - //Pack.UInt64_To_BE(z, x, 0); + //GcmUtilities.AsBytes(ref z, x); - ulong[] t = T[15]; - int tPos = x[15] << 1; - ulong z0 = t[tPos + 0], z1 = t[tPos + 1]; + GcmUtilities.FieldElement[] t = T[15]; + int tPos = x[15]; + ulong z0 = t[tPos].n0, z1 = t[tPos].n1; for (int i = 14; i >= 0; --i) { t = T[i]; - tPos = x[i] << 1; - z0 ^= t[tPos + 0]; - z1 ^= t[tPos + 1]; + tPos = x[i]; + z0 ^= t[tPos].n0; + z1 ^= t[tPos].n1; } Pack.UInt64_To_BE(z0, x, 0); diff --git a/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs index 8921c8621..67a709a75 100644 --- a/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs +++ b/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs @@ -9,13 +9,13 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm : IGcmMultiplier { private byte[] H; - private ulong[][] T; + private GcmUtilities.FieldElement[][] T; public void Init(byte[] H) { if (T == null) { - T = new ulong[2][]; + T = new GcmUtilities.FieldElement[2][]; } else if (Arrays.AreEqual(this.H, H)) { @@ -26,59 +26,60 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm for (int i = 0; i < 2; ++i) { - ulong[] t = T[i] = new ulong[512]; + GcmUtilities.FieldElement[] t = T[i] = new GcmUtilities.FieldElement[256]; // t[0] = 0 if (i == 0) { // t[1] = H.p^7 - GcmUtilities.AsUlongs(this.H, t, 2); - GcmUtilities.MultiplyP7(t, 2, t, 2); + GcmUtilities.AsFieldElement(this.H, out t[1]); + GcmUtilities.MultiplyP7(ref t[1]); } else { // t[1] = T[i-1][1].p^8 - GcmUtilities.MultiplyP8(T[i - 1], 2, t, 2); + GcmUtilities.MultiplyP8(ref T[i - 1][1], out t[1]); } - for (int n = 2; n < 256; n += 2) + for (int n = 1; n < 128; ++n) { // t[2.n] = t[n].p^-1 - GcmUtilities.DivideP(t, n, t, n << 1); + GcmUtilities.DivideP(ref t[n], out t[n << 1]); // t[2.n + 1] = t[2.n] + t[1] - GcmUtilities.Xor(t, n << 1, t, 2, t, (n + 1) << 1); + GcmUtilities.Xor(ref t[n << 1], ref t[1], out t[(n << 1) + 1]); } } } public void MultiplyH(byte[] x) { - ulong[] T0 = T[0], T1 = T[1]; + GcmUtilities.FieldElement[] T0 = T[0], T1 = T[1]; - //ulong[] z = new ulong[2]; - //for (int i = 14; i >= 0; i -= 2) + //GcmUtilities.FieldElement z; + //GcmUtilities.Xor(ref T0[x[14]], ref T1[x[15]], out z); + //for (int i = 12; i >= 0; i -= 2) //{ - // GcmUtilities.MultiplyP16(z); - // GcmUtilities.Xor(z, 0, T0, x[i] << 1); - // GcmUtilities.Xor(z, 0, T1, x[i + 1] << 1); + // GcmUtilities.MultiplyP16(ref z); + // GcmUtilities.Xor(ref z, ref T0[x[i]]); + // GcmUtilities.Xor(ref z, ref T1[x[i + 1]]); //} - //Pack.UInt64_To_BE(z, x, 0); + //GcmUtilities.AsBytes(ref z, x); - int vPos = x[15] << 1; - int uPos = x[14] << 1; - ulong z1 = T0[uPos + 1] ^ T1[vPos + 1]; - ulong z0 = T0[uPos] ^ T1[vPos]; + int vPos = x[15]; + int uPos = x[14]; + ulong z1 = T0[uPos].n1 ^ T1[vPos].n1; + ulong z0 = T0[uPos].n0 ^ T1[vPos].n0; for (int i = 12; i >= 0; i -= 2) { - vPos = x[i + 1] << 1; - uPos = x[i] << 1; + vPos = x[i + 1]; + uPos = x[i]; ulong c = z1 << 48; - z1 = T0[uPos + 1] ^ T1[vPos + 1] ^ ((z1 >> 16) | (z0 << 48)); - z0 = T0[uPos] ^ T1[vPos] ^ (z0 >> 16) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); + z1 = T0[uPos].n1 ^ T1[vPos].n1 ^ ((z1 >> 16) | (z0 << 48)); + z0 = T0[uPos].n0 ^ T1[vPos].n0 ^ (z0 >> 16) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); } Pack.UInt64_To_BE(z0, x, 0); -- cgit 1.4.1