summary refs log tree commit diff
path: root/crypto/src/math/ec/rfc7748/X448Field.cs
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/src/math/ec/rfc7748/X448Field.cs')
-rw-r--r--crypto/src/math/ec/rfc7748/X448Field.cs181
1 files changed, 181 insertions, 0 deletions
diff --git a/crypto/src/math/ec/rfc7748/X448Field.cs b/crypto/src/math/ec/rfc7748/X448Field.cs
index 1e1d379fe..67e71afa8 100644
--- a/crypto/src/math/ec/rfc7748/X448Field.cs
+++ b/crypto/src/math/ec/rfc7748/X448Field.cs
@@ -1,5 +1,16 @@
 using System;
 using System.Diagnostics;
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+using System.Runtime.CompilerServices;
+#endif
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+using System.Numerics;
+#endif
+#if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 
 using Org.BouncyCastle.Math.Raw;
 
@@ -16,14 +27,47 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
             0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
             0xFFFFFFFFU, 0xFFFFFFFFU };
 
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
         public static void Add(uint[] x, uint[] y, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Add(x.AsSpan(), y.AsSpan(), z.AsSpan());
+#else
             for (int i = 0; i < Size; ++i)
             {
                 z[i] = x[i] + y[i];
             }
+#endif
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void Add(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+        {
+            int i = 0;
+            if (Vector.IsHardwareAccelerated)
+            {
+                int limit = Size - Vector<uint>.Count;
+                while (i <= limit)
+                {
+                    var vx = new Vector<uint>(x[i..]);
+                    var vy = new Vector<uint>(y[i..]);
+                    (vx + vy).CopyTo(z[i..]);
+                    i += Vector<uint>.Count;
+                }
+            }
+            {
+                while (i < Size)
+                {
+                    z[i] = x[i] + y[i];
+                    ++i;
+                }
+            }
+        }
+#endif
+
         public static void AddOne(uint[] z)
         {
             z[0] += 1;
@@ -140,10 +184,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 
         public static void Copy(uint[] x, int xOff, uint[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Copy(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
             for (int i = 0; i < Size; ++i)
             {
                 z[zOff + i] = x[xOff + i];
             }
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1289,6 +1337,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 
         public static void Sub(uint[] x, uint[] y, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Sub(x.AsSpan(), y.AsSpan(), z.AsSpan());
+#else
             uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7];
             uint x8 = x[8], x9 = x[9], x10 = x[10], x11 = x[11], x12 = x[12], x13 = x[13], x14 = x[14], x15 = x[15];
             uint y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4], y5 = y[5], y6 = y[6], y7 = y[7];
@@ -1350,8 +1401,138 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
             z[13] = z13;
             z[14] = z14;
             z[15] = z15;
+#endif
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Sub(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<uint>>() == 32)
+            {
+                var ControlCarry = Vector256.Create(7U, 0U, 1U, 2U, 3U, 4U, 5U, 6U);
+                var Mask28 = Vector256.Create(M28);
+                var MaskScalar = Vector256.CreateScalar(uint.MaxValue);
+
+                var P0 = Vector256.Create(0x1FFFFFFEU);
+                var P1 = P0.WithElement(0, 0x1FFFFFFCU);
+
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Y = MemoryMarshal.AsBytes(y[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector256<uint>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<uint>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<uint>>(Y[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<uint>>(Y[0x20..0x40]);
+
+                var Z0 = Avx2.Subtract(Avx2.Add(X0, P0), Y0);
+                var Z1 = Avx2.Subtract(Avx2.Add(X1, P1), Y1);
+
+                // First carry
+                {
+                    var C0 = Avx2.ShiftRightLogical(Z0, 28);
+                    var C1 = Avx2.ShiftRightLogical(Z1, 28);
+
+                    Z0 = Avx2.And(Z0, Mask28);
+                    Z1 = Avx2.And(Z1, Mask28);
+
+                    C0 = Avx2.PermuteVar8x32(C0, ControlCarry);
+                    C1 = Avx2.PermuteVar8x32(C1, ControlCarry);
+
+                    Z0 = Avx2.Add(Z0, Avx2.Blend(C0, C1, 0x01));
+                    Z1 = Avx2.Add(Z1, C1);
+                    Z1 = Avx2.Add(Z1, Avx2.And(C0, MaskScalar));
+                }
+
+                // Second carry
+                {
+                    var C0 = Avx2.ShiftRightLogical(Z0, 28);
+                    var C1 = Avx2.ShiftRightLogical(Z1, 28);
+
+                    Z0 = Avx2.And(Z0, Mask28);
+                    Z1 = Avx2.And(Z1, Mask28);
+
+                    C0 = Avx2.PermuteVar8x32(C0, ControlCarry);
+                    C1 = Avx2.PermuteVar8x32(C1, ControlCarry);
+
+                    Z0 = Avx2.Add(Z0, Avx2.Blend(C0, C1, 0x01));
+                    Z1 = Avx2.Add(Z1, C1);
+                    Z1 = Avx2.Add(Z1, Avx2.And(C0, MaskScalar));
+                }
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+
+                return;
+            }
+#endif
+
+            uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7];
+            uint x8 = x[8], x9 = x[9], x10 = x[10], x11 = x[11], x12 = x[12], x13 = x[13], x14 = x[14], x15 = x[15];
+            uint y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4], y5 = y[5], y6 = y[6], y7 = y[7];
+            uint y8 = y[8], y9 = y[9], y10 = y[10], y11 = y[11], y12 = y[12], y13 = y[13], y14 = y[14], y15 = y[15];
+
+            uint z0 = x0 + 0x1FFFFFFEU - y0;
+            uint z1 = x1 + 0x1FFFFFFEU - y1;
+            uint z2 = x2 + 0x1FFFFFFEU - y2;
+            uint z3 = x3 + 0x1FFFFFFEU - y3;
+            uint z4 = x4 + 0x1FFFFFFEU - y4;
+            uint z5 = x5 + 0x1FFFFFFEU - y5;
+            uint z6 = x6 + 0x1FFFFFFEU - y6;
+            uint z7 = x7 + 0x1FFFFFFEU - y7;
+            uint z8 = x8 + 0x1FFFFFFCU - y8;
+            uint z9 = x9 + 0x1FFFFFFEU - y9;
+            uint z10 = x10 + 0x1FFFFFFEU - y10;
+            uint z11 = x11 + 0x1FFFFFFEU - y11;
+            uint z12 = x12 + 0x1FFFFFFEU - y12;
+            uint z13 = x13 + 0x1FFFFFFEU - y13;
+            uint z14 = x14 + 0x1FFFFFFEU - y14;
+            uint z15 = x15 + 0x1FFFFFFEU - y15;
+
+            z2 += z1 >> 28; z1 &= M28;
+            z6 += z5 >> 28; z5 &= M28;
+            z10 += z9 >> 28; z9 &= M28;
+            z14 += z13 >> 28; z13 &= M28;
+
+            z3 += z2 >> 28; z2 &= M28;
+            z7 += z6 >> 28; z6 &= M28;
+            z11 += z10 >> 28; z10 &= M28;
+            z15 += z14 >> 28; z14 &= M28;
+
+            uint t = z15 >> 28; z15 &= M28;
+            z0 += t;
+            z8 += t;
+
+            z4 += z3 >> 28; z3 &= M28;
+            z8 += z7 >> 28; z7 &= M28;
+            z12 += z11 >> 28; z11 &= M28;
+
+            z1 += z0 >> 28; z0 &= M28;
+            z5 += z4 >> 28; z4 &= M28;
+            z9 += z8 >> 28; z8 &= M28;
+            z13 += z12 >> 28; z12 &= M28;
+
+            z[0] = z0;
+            z[1] = z1;
+            z[2] = z2;
+            z[3] = z3;
+            z[4] = z4;
+            z[5] = z5;
+            z[6] = z6;
+            z[7] = z7;
+            z[8] = z8;
+            z[9] = z9;
+            z[10] = z10;
+            z[11] = z11;
+            z[12] = z12;
+            z[13] = z13;
+            z[14] = z14;
+            z[15] = z15;
+        }
+#endif
+
         public static void SubOne(uint[] z)
         {
             uint[] one = Create();