summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-11-13 12:06:50 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-11-13 12:06:50 +0700
commited22f90340a32b32f20833837c9ef8a2c4203554 (patch)
tree5a84db52dc21ed2c32adbedc160f881da08f6335
parentRefactoring in Pqc.Crypto (diff)
downloadBouncyCastle.NET-ed25519-ed22f90340a32b32f20833837c9ef8a2c4203554.tar.xz
X25519 perf. opts.
-rw-r--r--crypto/src/math/ec/rfc7748/X25519Field.cs198
1 files changed, 190 insertions, 8 deletions
diff --git a/crypto/src/math/ec/rfc7748/X25519Field.cs b/crypto/src/math/ec/rfc7748/X25519Field.cs
index 5c9eadc6b..cddf03faa 100644
--- a/crypto/src/math/ec/rfc7748/X25519Field.cs
+++ b/crypto/src/math/ec/rfc7748/X25519Field.cs
@@ -1,5 +1,13 @@
 using System;
 using System.Diagnostics;
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+using System.Runtime.CompilerServices;
+#endif
+#if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 
 using Org.BouncyCastle.Math.Raw;
 
@@ -18,11 +26,63 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
         private static readonly int[] RootNegOne = { 0x020EA0B0, 0x0386C9D2, 0x00478C4E, 0x0035697F, 0x005E8630,
             0x01FBD7A7, 0x0340264F, 0x01F0B2B4, 0x00027E0E, 0x00570649 };
 
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static void Add(int[] x, int[] y, int[] z)
         {
-            for (int i = 0; i < Size; ++i)
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32)
             {
-                z[i] = x[i] + y[i];
+                var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
+                var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
+                var Z = MemoryMarshal.AsBytes(z.AsSpan(0, 8));
+
+                var X0 = MemoryMarshal.Read<Vector256<int>>(X);
+                var Y0 = MemoryMarshal.Read<Vector256<int>>(Y);
+
+                var R0 = Avx2.Add(X0, Y0);
+
+                MemoryMarshal.Write(Z, ref R0);
+
+                z[8] = x[8] + y[8];
+                z[9] = x[9] + y[9];
+
+                return;
+            }
+
+            if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
+                var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
+                var Z = MemoryMarshal.AsBytes(z.AsSpan(0, 8));
+
+                var X0 = MemoryMarshal.Read<Vector128<int>>(X);
+                var Y0 = MemoryMarshal.Read<Vector128<int>>(Y);
+
+                var R0 = Sse2.Add(X0, Y0);
+
+                MemoryMarshal.Write(Z, ref R0);
+
+                var X1 = MemoryMarshal.Read<Vector128<int>>(X[0x10..]);
+                var Y1 = MemoryMarshal.Read<Vector128<int>>(Y[0x10..]);
+
+                var R1 = Sse2.Add(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x10..], ref R1);
+
+                z[8] = x[8] + y[8];
+                z[9] = x[9] + y[9];
+
+                return;
+            }
+#endif
+
+            {
+                for (int i = 0; i < Size; ++i)
+                {
+                    z[i] = x[i] + y[i];
+                }
             }
         }
 
@@ -36,13 +96,83 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
             z[zOff] += 1;
         }
 
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static void Apm(int[] x, int[] y, int[] zp, int[] zm)
         {
-            for (int i = 0; i < Size; ++i)
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
+                var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
+                var ZP = MemoryMarshal.AsBytes(zp.AsSpan(0, 8));
+                var ZM = MemoryMarshal.AsBytes(zm.AsSpan(0, 8));
+
+                var X0 = MemoryMarshal.Read<Vector256<int>>(X);
+                var Y0 = MemoryMarshal.Read<Vector256<int>>(Y);
+
+                var RP0 = Avx2.Add(X0, Y0);
+                var RM0 = Avx2.Subtract(X0, Y0);
+
+                MemoryMarshal.Write(ZP, ref RP0);
+                MemoryMarshal.Write(ZM, ref RM0);
+
+                int x8 = x[8], y8 = y[8];
+                zp[8] = x8 + y8;
+                zm[8] = x8 - y8;
+
+                int x9 = x[9], y9 = y[9];
+                zp[9] = x9 + y9;
+                zm[9] = x9 - y9;
+
+                return;
+            }
+
+            if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16)
             {
-                int xi = x[i], yi = y[i];
-                zp[i] = xi + yi;
-                zm[i] = xi - yi;
+                var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
+                var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
+                var ZP = MemoryMarshal.AsBytes(zp.AsSpan(0, 8));
+                var ZM = MemoryMarshal.AsBytes(zm.AsSpan(0, 8));
+
+                var X0 = MemoryMarshal.Read<Vector128<int>>(X);
+                var Y0 = MemoryMarshal.Read<Vector128<int>>(Y);
+
+                var RP0 = Sse2.Add(X0, Y0);
+                var RM0 = Sse2.Subtract(X0, Y0);
+
+                MemoryMarshal.Write(ZP, ref RP0);
+                MemoryMarshal.Write(ZM, ref RM0);
+
+                var X1 = MemoryMarshal.Read<Vector128<int>>(X[0x10..]);
+                var Y1 = MemoryMarshal.Read<Vector128<int>>(Y[0x10..]);
+
+                var RP1 = Sse2.Add(X1, Y1);
+                var RM1 = Sse2.Subtract(X1, Y1);
+
+                MemoryMarshal.Write(ZP[0x10..], ref RP1);
+                MemoryMarshal.Write(ZM[0x10..], ref RM1);
+
+                int x8 = x[8], y8 = y[8];
+                zp[8] = x8 + y8;
+                zm[8] = x8 - y8;
+
+                int x9 = x[9], y9 = y[9];
+                zp[9] = x9 + y9;
+                zm[9] = x9 - y9;
+
+                return;
+            }
+#endif
+
+            {
+                for (int i = 0; i < Size; ++i)
+                {
+                    int xi = x[i], yi = y[i];
+                    zp[i] = xi + yi;
+                    zm[i] = xi - yi;
+                }
             }
         }
 
@@ -935,11 +1065,63 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
             return false;
         }
 
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static void Sub(int[] x, int[] y, int[] z)
         {
-            for (int i = 0; i < Size; ++i)
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
+                var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
+                var Z = MemoryMarshal.AsBytes(z.AsSpan(0, 8));
+
+                var X0 = MemoryMarshal.Read<Vector256<int>>(X);
+                var Y0 = MemoryMarshal.Read<Vector256<int>>(Y);
+
+                var R0 = Avx2.Subtract(X0, Y0);
+
+                MemoryMarshal.Write(Z, ref R0);
+
+                z[8] = x[8] - y[8];
+                z[9] = x[9] - y[9];
+
+                return;
+            }
+
+            if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
+                var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
+                var Z = MemoryMarshal.AsBytes(z.AsSpan(0, 8));
+
+                var X0 = MemoryMarshal.Read<Vector128<int>>(X);
+                var Y0 = MemoryMarshal.Read<Vector128<int>>(Y);
+
+                var R0 = Sse2.Subtract(X0, Y0);
+
+                MemoryMarshal.Write(Z, ref R0);
+
+                var X1 = MemoryMarshal.Read<Vector128<int>>(X[0x10..]);
+                var Y1 = MemoryMarshal.Read<Vector128<int>>(Y[0x10..]);
+
+                var R1 = Sse2.Subtract(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x10..], ref R1);
+
+                z[8] = x[8] - y[8];
+                z[9] = x[9] - y[9];
+
+                return;
+            }
+#endif
+
             {
-                z[i] = x[i] - y[i];
+                for (int i = 0; i < Size; ++i)
+                {
+                    z[i] = x[i] - y[i];
+                }
             }
         }