diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs
index d94ae40da..2312e1cf2 100644
--- a/crypto/src/math/raw/Nat512.cs
+++ b/crypto/src/math/raw/Nat512.cs
@@ -125,5 +125,235 @@ namespace Org.BouncyCastle.Math.Raw
}
}
#endif
+
+ public static void XorTo(uint[] x, int xOff, uint[] z, int zOff)
+ {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ XorTo(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
+ for (int i = 0; i < 16; i += 4)
+ {
+ z[zOff + i + 0] ^= x[xOff + i + 0];
+ z[zOff + i + 1] ^= x[xOff + i + 1];
+ z[zOff + i + 2] ^= x[xOff + i + 2];
+ z[zOff + i + 3] ^= x[xOff + i + 3];
+ }
+#endif
+ }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z)
+ {
+#if NETCOREAPP3_0_OR_GREATER
+ if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+ {
+ var X = MemoryMarshal.AsBytes(x[..16]);
+ var Z = MemoryMarshal.AsBytes(z[..16]);
+
+ var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+ var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]);
+ var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]);
+
+ var Z0 = Avx2.Xor(X0, Y0);
+ var Z1 = Avx2.Xor(X1, Y1);
+
+ MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+ MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+ return;
+ }
+
+ if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+ {
+ var X = MemoryMarshal.AsBytes(x[..16]);
+ var Z = MemoryMarshal.AsBytes(z[..16]);
+
+ var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+ var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+ var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+ var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]);
+ var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]);
+ var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]);
+ var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]);
+
+ var Z0 = Sse2.Xor(X0, Y0);
+ var Z1 = Sse2.Xor(X1, Y1);
+ var Z2 = Sse2.Xor(X2, Y2);
+ var Z3 = Sse2.Xor(X3, Y3);
+
+ MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+ MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+ MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+ MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+ return;
+ }
+#endif
+
+ for (int i = 0; i < 16; i += 4)
+ {
+ z[i + 0] ^= x[i + 0];
+ z[i + 1] ^= x[i + 1];
+ z[i + 2] ^= x[i + 2];
+ z[i + 3] ^= x[i + 3];
+ }
+ }
+#endif
+
+ public static void Xor64(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
+ {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ Xor64(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff));
+#else
+ for (int i = 0; i < 8; i += 4)
+ {
+ z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0];
+ z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1];
+ z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2];
+ z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3];
+ }
+#endif
+ }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+ {
+#if NETCOREAPP3_0_OR_GREATER
+ if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+ {
+ var X = MemoryMarshal.AsBytes(x[..8]);
+ var Y = MemoryMarshal.AsBytes(y[..8]);
+ var Z = MemoryMarshal.AsBytes(z[..8]);
+
+ var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+ var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]);
+ var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]);
+
+ var Z0 = Avx2.Xor(X0, Y0);
+ var Z1 = Avx2.Xor(X1, Y1);
+
+ MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+ MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+ return;
+ }
+
+ if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+ {
+ var X = MemoryMarshal.AsBytes(x[..8]);
+ var Y = MemoryMarshal.AsBytes(y[..8]);
+ var Z = MemoryMarshal.AsBytes(z[..8]);
+
+ var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+ var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+ var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+ var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]);
+ var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]);
+ var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]);
+ var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]);
+
+ var Z0 = Sse2.Xor(X0, Y0);
+ var Z1 = Sse2.Xor(X1, Y1);
+ var Z2 = Sse2.Xor(X2, Y2);
+ var Z3 = Sse2.Xor(X3, Y3);
+
+ MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+ MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+ MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+ MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+ return;
+ }
+#endif
+
+ for (int i = 0; i < 8; i += 4)
+ {
+ z[i + 0] = x[i + 0] ^ y[i + 0];
+ z[i + 1] = x[i + 1] ^ y[i + 1];
+ z[i + 2] = x[i + 2] ^ y[i + 2];
+ z[i + 3] = x[i + 3] ^ y[i + 3];
+ }
+ }
+#endif
+
+ public static void XorTo64(ulong[] x, int xOff, ulong[] z, int zOff)
+ {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ XorTo64(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
+ for (int i = 0; i < 8; i += 4)
+ {
+ z[zOff + i + 0] ^= x[xOff + i + 0];
+ z[zOff + i + 1] ^= x[xOff + i + 1];
+ z[zOff + i + 2] ^= x[xOff + i + 2];
+ z[zOff + i + 3] ^= x[xOff + i + 3];
+ }
+#endif
+ }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z)
+ {
+#if NETCOREAPP3_0_OR_GREATER
+ if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+ {
+ var X = MemoryMarshal.AsBytes(x[..8]);
+ var Z = MemoryMarshal.AsBytes(z[..8]);
+
+ var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+ var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]);
+ var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]);
+
+ var Z0 = Avx2.Xor(X0, Y0);
+ var Z1 = Avx2.Xor(X1, Y1);
+
+ MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+ MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+ return;
+ }
+
+ if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+ {
+ var X = MemoryMarshal.AsBytes(x[..8]);
+ var Z = MemoryMarshal.AsBytes(z[..8]);
+
+ var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+ var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+ var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+ var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+ var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]);
+ var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]);
+ var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]);
+ var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]);
+
+ var Z0 = Sse2.Xor(X0, Y0);
+ var Z1 = Sse2.Xor(X1, Y1);
+ var Z2 = Sse2.Xor(X2, Y2);
+ var Z3 = Sse2.Xor(X3, Y3);
+
+ MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+ MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+ MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+ MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+ return;
+ }
+#endif
+
+ for (int i = 0; i < 8; i += 4)
+ {
+ z[i + 0] ^= x[i + 0];
+ z[i + 1] ^= x[i + 1];
+ z[i + 2] ^= x[i + 2];
+ z[i + 3] ^= x[i + 3];
+ }
+ }
+#endif
}
}
|