diff --git a/crypto/src/math/ec/rfc7748/X448Field.cs b/crypto/src/math/ec/rfc7748/X448Field.cs
index 1e1d379fe..67e71afa8 100644
--- a/crypto/src/math/ec/rfc7748/X448Field.cs
+++ b/crypto/src/math/ec/rfc7748/X448Field.cs
@@ -1,5 +1,16 @@
using System;
using System.Diagnostics;
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+using System.Runtime.CompilerServices;
+#endif
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+using System.Numerics;
+#endif
+#if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
using Org.BouncyCastle.Math.Raw;
@@ -16,14 +27,47 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
0xFFFFFFFFU, 0xFFFFFFFFU };
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
public static void Add(uint[] x, uint[] y, uint[] z)
{
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ Add(x.AsSpan(), y.AsSpan(), z.AsSpan());
+#else
for (int i = 0; i < Size; ++i)
{
z[i] = x[i] + y[i];
}
+#endif
}
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static void Add(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+ {
+ int i = 0;
+ if (Vector.IsHardwareAccelerated)
+ {
+ int limit = Size - Vector<uint>.Count;
+ while (i <= limit)
+ {
+ var vx = new Vector<uint>(x[i..]);
+ var vy = new Vector<uint>(y[i..]);
+ (vx + vy).CopyTo(z[i..]);
+ i += Vector<uint>.Count;
+ }
+ }
+ {
+ while (i < Size)
+ {
+ z[i] = x[i] + y[i];
+ ++i;
+ }
+ }
+ }
+#endif
+
public static void AddOne(uint[] z)
{
z[0] += 1;
@@ -140,10 +184,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
public static void Copy(uint[] x, int xOff, uint[] z, int zOff)
{
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ Copy(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
for (int i = 0; i < Size; ++i)
{
z[zOff + i] = x[xOff + i];
}
+#endif
}
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1289,6 +1337,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
public static void Sub(uint[] x, uint[] y, uint[] z)
{
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ Sub(x.AsSpan(), y.AsSpan(), z.AsSpan());
+#else
uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7];
uint x8 = x[8], x9 = x[9], x10 = x[10], x11 = x[11], x12 = x[12], x13 = x[13], x14 = x[14], x15 = x[15];
uint y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4], y5 = y[5], y6 = y[6], y7 = y[7];
@@ -1350,8 +1401,138 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
z[13] = z13;
z[14] = z14;
z[15] = z15;
+#endif
}
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ public static void Sub(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+ {
+#if NETCOREAPP3_0_OR_GREATER
+ if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<uint>>() == 32)
+ {
+ var ControlCarry = Vector256.Create(7U, 0U, 1U, 2U, 3U, 4U, 5U, 6U);
+ var Mask28 = Vector256.Create(M28);
+ var MaskScalar = Vector256.CreateScalar(uint.MaxValue);
+
+ var P0 = Vector256.Create(0x1FFFFFFEU);
+ var P1 = P0.WithElement(0, 0x1FFFFFFCU);
+
+ var X = MemoryMarshal.AsBytes(x[..16]);
+ var Y = MemoryMarshal.AsBytes(y[..16]);
+ var Z = MemoryMarshal.AsBytes(z[..16]);
+
+ var X0 = MemoryMarshal.Read<Vector256<uint>>(X[0x00..0x20]);
+ var X1 = MemoryMarshal.Read<Vector256<uint>>(X[0x20..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector256<uint>>(Y[0x00..0x20]);
+ var Y1 = MemoryMarshal.Read<Vector256<uint>>(Y[0x20..0x40]);
+
+ var Z0 = Avx2.Subtract(Avx2.Add(X0, P0), Y0);
+ var Z1 = Avx2.Subtract(Avx2.Add(X1, P1), Y1);
+
+ // First carry
+ {
+ var C0 = Avx2.ShiftRightLogical(Z0, 28);
+ var C1 = Avx2.ShiftRightLogical(Z1, 28);
+
+ Z0 = Avx2.And(Z0, Mask28);
+ Z1 = Avx2.And(Z1, Mask28);
+
+ C0 = Avx2.PermuteVar8x32(C0, ControlCarry);
+ C1 = Avx2.PermuteVar8x32(C1, ControlCarry);
+
+ Z0 = Avx2.Add(Z0, Avx2.Blend(C0, C1, 0x01));
+ Z1 = Avx2.Add(Z1, C1);
+ Z1 = Avx2.Add(Z1, Avx2.And(C0, MaskScalar));
+ }
+
+ // Second carry
+ {
+ var C0 = Avx2.ShiftRightLogical(Z0, 28);
+ var C1 = Avx2.ShiftRightLogical(Z1, 28);
+
+ Z0 = Avx2.And(Z0, Mask28);
+ Z1 = Avx2.And(Z1, Mask28);
+
+ C0 = Avx2.PermuteVar8x32(C0, ControlCarry);
+ C1 = Avx2.PermuteVar8x32(C1, ControlCarry);
+
+ Z0 = Avx2.Add(Z0, Avx2.Blend(C0, C1, 0x01));
+ Z1 = Avx2.Add(Z1, C1);
+ Z1 = Avx2.Add(Z1, Avx2.And(C0, MaskScalar));
+ }
+
+ MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+ MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+
+ return;
+ }
+#endif
+
+ uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7];
+ uint x8 = x[8], x9 = x[9], x10 = x[10], x11 = x[11], x12 = x[12], x13 = x[13], x14 = x[14], x15 = x[15];
+ uint y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4], y5 = y[5], y6 = y[6], y7 = y[7];
+ uint y8 = y[8], y9 = y[9], y10 = y[10], y11 = y[11], y12 = y[12], y13 = y[13], y14 = y[14], y15 = y[15];
+
+ uint z0 = x0 + 0x1FFFFFFEU - y0;
+ uint z1 = x1 + 0x1FFFFFFEU - y1;
+ uint z2 = x2 + 0x1FFFFFFEU - y2;
+ uint z3 = x3 + 0x1FFFFFFEU - y3;
+ uint z4 = x4 + 0x1FFFFFFEU - y4;
+ uint z5 = x5 + 0x1FFFFFFEU - y5;
+ uint z6 = x6 + 0x1FFFFFFEU - y6;
+ uint z7 = x7 + 0x1FFFFFFEU - y7;
+ uint z8 = x8 + 0x1FFFFFFCU - y8;
+ uint z9 = x9 + 0x1FFFFFFEU - y9;
+ uint z10 = x10 + 0x1FFFFFFEU - y10;
+ uint z11 = x11 + 0x1FFFFFFEU - y11;
+ uint z12 = x12 + 0x1FFFFFFEU - y12;
+ uint z13 = x13 + 0x1FFFFFFEU - y13;
+ uint z14 = x14 + 0x1FFFFFFEU - y14;
+ uint z15 = x15 + 0x1FFFFFFEU - y15;
+
+ z2 += z1 >> 28; z1 &= M28;
+ z6 += z5 >> 28; z5 &= M28;
+ z10 += z9 >> 28; z9 &= M28;
+ z14 += z13 >> 28; z13 &= M28;
+
+ z3 += z2 >> 28; z2 &= M28;
+ z7 += z6 >> 28; z6 &= M28;
+ z11 += z10 >> 28; z10 &= M28;
+ z15 += z14 >> 28; z14 &= M28;
+
+ uint t = z15 >> 28; z15 &= M28;
+ z0 += t;
+ z8 += t;
+
+ z4 += z3 >> 28; z3 &= M28;
+ z8 += z7 >> 28; z7 &= M28;
+ z12 += z11 >> 28; z11 &= M28;
+
+ z1 += z0 >> 28; z0 &= M28;
+ z5 += z4 >> 28; z4 &= M28;
+ z9 += z8 >> 28; z8 &= M28;
+ z13 += z12 >> 28; z12 &= M28;
+
+ z[0] = z0;
+ z[1] = z1;
+ z[2] = z2;
+ z[3] = z3;
+ z[4] = z4;
+ z[5] = z5;
+ z[6] = z6;
+ z[7] = z7;
+ z[8] = z8;
+ z[9] = z9;
+ z[10] = z10;
+ z[11] = z11;
+ z[12] = z12;
+ z[13] = z13;
+ z[14] = z14;
+ z[15] = z15;
+ }
+#endif
+
public static void SubOne(uint[] z)
{
uint[] one = Create();
|