diff options
Diffstat (limited to 'crypto/src/math/raw')
-rw-r--r-- | crypto/src/math/raw/Mod.cs | 29 | ||||
-rw-r--r-- | crypto/src/math/raw/Nat.cs | 593 | ||||
-rw-r--r-- | crypto/src/math/raw/Nat256.cs | 71 | ||||
-rw-r--r-- | crypto/src/math/raw/Nat512.cs | 315 |
4 files changed, 971 insertions, 37 deletions
diff --git a/crypto/src/math/raw/Mod.cs b/crypto/src/math/raw/Mod.cs index ea61bdd83..721134b0c 100644 --- a/crypto/src/math/raw/Mod.cs +++ b/crypto/src/math/raw/Mod.cs @@ -12,10 +12,8 @@ namespace Org.BouncyCastle.Math.Raw * computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. */ - internal abstract class Mod + internal static class Mod { - private static readonly SecureRandom RandomSource = new SecureRandom(); - private const int M30 = 0x3FFFFFFF; private const ulong M32UL = 0xFFFFFFFFUL; @@ -41,7 +39,7 @@ namespace Org.BouncyCastle.Math.Raw public static uint Inverse32(uint d) { - Debug.Assert((d & 1) == 1); + Debug.Assert((d & 1U) == 1U); //int x = d + (((d + 1) & 4) << 1); // d.x == 1 mod 2**4 uint x = d; // d.x == 1 mod 2**3 @@ -53,6 +51,21 @@ namespace Org.BouncyCastle.Math.Raw return x; } + public static ulong Inverse64(ulong d) + { + Debug.Assert((d & 1UL) == 1UL); + + //ulong x = d + (((d + 1) & 4) << 1); // d.x == 1 mod 2**4 + ulong x = d; // d.x == 1 mod 2**3 + x *= 2 - d * x; // d.x == 1 mod 2**6 + x *= 2 - d * x; // d.x == 1 mod 2**12 + x *= 2 - d * x; // d.x == 1 mod 2**24 + x *= 2 - d * x; // d.x == 1 mod 2**48 + x *= 2 - d * x; // d.x == 1 mod 2**96 + Debug.Assert(d * x == 1UL); + return x; + } + public static uint ModOddInverse(uint[] m, uint[] x, uint[] z) { #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -349,7 +362,7 @@ namespace Org.BouncyCastle.Math.Raw } #endif - public static uint[] Random(uint[] p) + public static uint[] Random(SecureRandom random, uint[] p) { int len = p.Length; uint[] s = Nat.Create(len); @@ -364,7 +377,7 @@ namespace Org.BouncyCastle.Math.Raw byte[] bytes = new byte[len << 2]; do { - RandomSource.NextBytes(bytes); + random.NextBytes(bytes); Pack.BE_To_UInt32(bytes, 0, s); s[len - 1] &= m; } @@ -374,7 +387,7 @@ namespace Org.BouncyCastle.Math.Raw } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static void Random(ReadOnlySpan<uint> p, Span<uint> z) + public static void Random(SecureRandom random, ReadOnlySpan<uint> p, Span<uint> z) { int len = p.Length; if (z.Length < len) @@ -395,7 +408,7 @@ namespace Org.BouncyCastle.Math.Raw do { - RandomSource.NextBytes(bytes); + random.NextBytes(bytes); Pack.BE_To_UInt32(bytes, s); s[len - 1] &= m; } diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs index 09c263f4d..3bc983430 100644 --- a/crypto/src/math/raw/Nat.cs +++ b/crypto/src/math/raw/Nat.cs @@ -1580,34 +1580,87 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBit(int len, uint[] z, uint c) { - for (int i = 0; i < len; ++i) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, z.AsSpan(0, len), c); +#else + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = z[i + 0]; + uint next1 = z[i + 1]; + uint next2 = z[i + 2]; + uint next3 = z[i + 3]; + z[i + 0] = (next0 << 1) | (c >> 31); + z[i + 1] = (next1 << 1) | (next0 >> 31); + z[i + 2] = (next2 << 1) | (next1 >> 31); + z[i + 3] = (next3 << 1) | (next2 >> 31); + c = next3; + i += 4; + } + while (i < len) { uint next = z[i]; z[i] = (next << 1) | (c >> 31); c = next; + ++i; } return c >> 31; +#endif } public static uint ShiftUpBit(int len, uint[] z, int zOff, uint c) { - for (int i = 0; i < len; ++i) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, z.AsSpan(zOff, len), c); +#else + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = z[zOff + i + 0]; + uint next1 = z[zOff + i + 1]; + uint next2 = z[zOff + i + 2]; + uint next3 = z[zOff + i + 3]; + z[zOff + i + 0] = (next0 << 1) | (c >> 31); + z[zOff + i + 1] = (next1 << 1) | (next0 >> 31); + z[zOff + i + 2] = (next2 << 1) | (next1 >> 31); + z[zOff + i + 3] = (next3 << 1) | (next2 >> 31); + c = next3; + i += 4; + } + while (i < len) { uint next = z[zOff + i]; z[zOff + i] = (next << 1) | (c >> 31); c = next; + ++i; } return c >> 31; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static uint ShiftUpBit(int len, Span<uint> z, uint c) { - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = z[i + 0]; + uint next1 = z[i + 1]; + uint next2 = z[i + 2]; + uint next3 = z[i + 3]; + z[i + 0] = (next0 << 1) | (c >> 31); + z[i + 1] = (next1 << 1) | (next0 >> 31); + z[i + 2] = (next2 << 1) | (next1 >> 31); + z[i + 3] = (next3 << 1) | (next2 >> 31); + c = next3; + i += 4; + } + while (i < len) { uint next = z[i]; z[i] = (next << 1) | (c >> 31); c = next; + ++i; } return c >> 31; } @@ -1615,34 +1668,87 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBit(int len, uint[] x, uint c, uint[] z) { - for (int i = 0; i < len; ++i) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, x.AsSpan(0, len), c, z.AsSpan(0, len)); +#else + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = x[i + 0]; + uint next1 = x[i + 1]; + uint next2 = x[i + 2]; + uint next3 = x[i + 3]; + z[i + 0] = (next0 << 1) | (c >> 31); + z[i + 1] = (next1 << 1) | (next0 >> 31); + z[i + 2] = (next2 << 1) | (next1 >> 31); + z[i + 3] = (next3 << 1) | (next2 >> 31); + c = next3; + i += 4; + } + while (i < len) { uint next = x[i]; z[i] = (next << 1) | (c >> 31); c = next; + ++i; } return c >> 31; +#endif } public static uint ShiftUpBit(int len, uint[] x, int xOff, uint c, uint[] z, int zOff) { - for (int i = 0; i < len; ++i) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len)); +#else + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = x[xOff + i + 0]; + uint next1 = x[xOff + i + 1]; + uint next2 = x[xOff + i + 2]; + uint next3 = x[xOff + i + 3]; + z[zOff + i + 0] = (next0 << 1) | (c >> 31); + z[zOff + i + 1] = (next1 << 1) | (next0 >> 31); + z[zOff + i + 2] = (next2 << 1) | (next1 >> 31); + z[zOff + i + 3] = (next3 << 1) | (next2 >> 31); + c = next3; + i += 4; + } + while (i < len) { uint next = x[xOff + i]; z[zOff + i] = (next << 1) | (c >> 31); c = next; + ++i; } return c >> 31; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static uint ShiftUpBit(int len, ReadOnlySpan<uint> x, uint c, Span<uint> z) { - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = x[i + 0]; + uint next1 = x[i + 1]; + uint next2 = x[i + 2]; + uint next3 = x[i + 3]; + z[i + 0] = (next0 << 1) | (c >> 31); + z[i + 1] = (next1 << 1) | (next0 >> 31); + z[i + 2] = (next2 << 1) | (next1 >> 31); + z[i + 3] = (next3 << 1) | (next2 >> 31); + c = next3; + i += 4; + } + while (i < len) { uint next = x[i]; z[i] = (next << 1) | (c >> 31); c = next; + ++i; } return c >> 31; } @@ -1650,34 +1756,87 @@ namespace Org.BouncyCastle.Math.Raw public static ulong ShiftUpBit64(int len, ulong[] x, ulong c, ulong[] z) { - for (int i = 0; i < len; ++i) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit64(len, x.AsSpan(0, len), c, z.AsSpan(0, len)); +#else + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = x[i + 0]; + ulong next1 = x[i + 1]; + ulong next2 = x[i + 2]; + ulong next3 = x[i + 3]; + z[i + 0] = (next0 << 1) | (c >> 63); + z[i + 1] = (next1 << 1) | (next0 >> 63); + z[i + 2] = (next2 << 1) | (next1 >> 63); + z[i + 3] = (next3 << 1) | (next2 >> 63); + c = next3; + i += 4; + } + while (i < len) { ulong next = x[i]; z[i] = (next << 1) | (c >> 63); c = next; + ++i; } return c >> 63; +#endif } public static ulong ShiftUpBit64(int len, ulong[] x, int xOff, ulong c, ulong[] z, int zOff) { - for (int i = 0; i < len; ++i) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit64(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len)); +#else + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = x[xOff + i + 0]; + ulong next1 = x[xOff + i + 1]; + ulong next2 = x[xOff + i + 2]; + ulong next3 = x[xOff + i + 3]; + z[zOff + i + 0] = (next0 << 1) | (c >> 63); + z[zOff + i + 1] = (next1 << 1) | (next0 >> 63); + z[zOff + i + 2] = (next2 << 1) | (next1 >> 63); + z[zOff + i + 3] = (next3 << 1) | (next2 >> 63); + c = next3; + i += 4; + } + while (i < len) { ulong next = x[xOff + i]; z[zOff + i] = (next << 1) | (c >> 63); c = next; + ++i; } return c >> 63; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static ulong ShiftUpBit64(int len, ReadOnlySpan<ulong> x, ulong c, Span<ulong> z) { - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = x[i + 0]; + ulong next1 = x[i + 1]; + ulong next2 = x[i + 2]; + ulong next3 = x[i + 3]; + z[i + 0] = (next0 << 1) | (c >> 63); + z[i + 1] = (next1 << 1) | (next0 >> 63); + z[i + 2] = (next2 << 1) | (next1 >> 63); + z[i + 3] = (next3 << 1) | (next2 >> 63); + c = next3; + i += 4; + } + while (i < len) { ulong next = x[i]; z[i] = (next << 1) | (c >> 63); c = next; + ++i; } return c >> 63; } @@ -1685,37 +1844,90 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBits(int len, uint[] z, int bits, uint c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, z.AsSpan(0, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 32); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = z[i + 0]; + uint next1 = z[i + 1]; + uint next2 = z[i + 2]; + uint next3 = z[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { uint next = z[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } public static uint ShiftUpBits(int len, uint[] z, int zOff, int bits, uint c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, z.AsSpan(zOff, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 32); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = z[zOff + i + 0]; + uint next1 = z[zOff + i + 1]; + uint next2 = z[zOff + i + 2]; + uint next3 = z[zOff + i + 3]; + z[zOff + i + 0] = (next0 << bits) | (c >> -bits); + z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits); + z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits); + z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { uint next = z[zOff + i]; z[zOff + i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static uint ShiftUpBits(int len, Span<uint> z, int bits, uint c) { Debug.Assert(bits > 0 && bits < 32); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = z[i + 0]; + uint next1 = z[i + 1]; + uint next2 = z[i + 2]; + uint next3 = z[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { uint next = z[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; } @@ -1723,37 +1935,90 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBits(int len, uint[] x, int bits, uint c, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len)); +#else Debug.Assert(bits > 0 && bits < 32); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = x[i + 0]; + uint next1 = x[i + 1]; + uint next2 = x[i + 2]; + uint next3 = x[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { uint next = x[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } public static uint ShiftUpBits(int len, uint[] x, int xOff, int bits, uint c, uint[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len)); +#else Debug.Assert(bits > 0 && bits < 32); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = x[xOff + i + 0]; + uint next1 = x[xOff + i + 1]; + uint next2 = x[xOff + i + 2]; + uint next3 = x[xOff + i + 3]; + z[zOff + i + 0] = (next0 << bits) | (c >> -bits); + z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits); + z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits); + z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { uint next = x[xOff + i]; z[zOff + i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static uint ShiftUpBits(int len, ReadOnlySpan<uint> x, int bits, uint c, Span<uint> z) { Debug.Assert(bits > 0 && bits < 32); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + uint next0 = x[i + 0]; + uint next1 = x[i + 1]; + uint next2 = x[i + 2]; + uint next3 = x[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { uint next = x[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; } @@ -1761,37 +2026,90 @@ namespace Org.BouncyCastle.Math.Raw public static ulong ShiftUpBits64(int len, ulong[] z, int bits, ulong c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, z.AsSpan(0, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 64); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = z[i + 0]; + ulong next1 = z[i + 1]; + ulong next2 = z[i + 2]; + ulong next3 = z[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { ulong next = z[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } public static ulong ShiftUpBits64(int len, ulong[] z, int zOff, int bits, ulong c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, z.AsSpan(zOff, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 64); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = z[zOff + i + 0]; + ulong next1 = z[zOff + i + 1]; + ulong next2 = z[zOff + i + 2]; + ulong next3 = z[zOff + i + 3]; + z[zOff + i + 0] = (next0 << bits) | (c >> -bits); + z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits); + z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits); + z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { ulong next = z[zOff + i]; z[zOff + i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static ulong ShiftUpBits64(int len, Span<ulong> z, int bits, ulong c) { Debug.Assert(bits > 0 && bits < 64); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = z[i + 0]; + ulong next1 = z[i + 1]; + ulong next2 = z[i + 2]; + ulong next3 = z[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { ulong next = z[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; } @@ -1799,37 +2117,90 @@ namespace Org.BouncyCastle.Math.Raw public static ulong ShiftUpBits64(int len, ulong[] x, int bits, ulong c, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len)); +#else Debug.Assert(bits > 0 && bits < 64); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = x[i + 0]; + ulong next1 = x[i + 1]; + ulong next2 = x[i + 2]; + ulong next3 = x[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { ulong next = x[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } public static ulong ShiftUpBits64(int len, ulong[] x, int xOff, int bits, ulong c, ulong[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len)); +#else Debug.Assert(bits > 0 && bits < 64); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = x[xOff + i + 0]; + ulong next1 = x[xOff + i + 1]; + ulong next2 = x[xOff + i + 2]; + ulong next3 = x[xOff + i + 3]; + z[zOff + i + 0] = (next0 << bits) | (c >> -bits); + z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits); + z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits); + z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { ulong next = x[xOff + i]; z[zOff + i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static ulong ShiftUpBits64(int len, ReadOnlySpan<ulong> x, int bits, ulong c, Span<ulong> z) { Debug.Assert(bits > 0 && bits < 64); - for (int i = 0; i < len; ++i) + int i = 0, limit4 = len - 4; + while (i <= limit4) + { + ulong next0 = x[i + 0]; + ulong next1 = x[i + 1]; + ulong next2 = x[i + 2]; + ulong next3 = x[i + 3]; + z[i + 0] = (next0 << bits) | (c >> -bits); + z[i + 1] = (next1 << bits) | (next0 >> -bits); + z[i + 2] = (next2 << bits) | (next1 >> -bits); + z[i + 3] = (next3 << bits) | (next2 >> -bits); + c = next3; + i += 4; + } + while (i < len) { ulong next = x[i]; z[i] = (next << bits) | (c >> -bits); c = next; + ++i; } return c >> -bits; } @@ -2325,39 +2696,205 @@ namespace Org.BouncyCastle.Math.Raw } #endif - public static void Zero(int len, uint[] z) + public static void Xor(int len, uint[] x, uint[] y, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { - z[i] = 0U; + z[i] = x[i] ^ y[i]; } +#endif } + public static void Xor(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff) + { #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static void Zero(int len, Span<uint> z) + Xor(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len)); +#else + for (int i = 0; i < len; ++i) + { + z[zOff + i] = x[xOff + i] ^ y[yOff + i]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Xor(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) + { + int i = 0, limit16 = len - 16; + while (i <= limit16) + { + Nat512.Xor(x[i..], y[i..], z[i..]); + i += 16; + } + while (i < len) + { + z[i] = x[i] ^ y[i]; + ++i; + } + } +#endif + + public static void Xor64(int len, ulong[] x, ulong[] y, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor64(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { - z[i] = 0U; + z[i] = x[i] ^ y[i]; + } +#endif + } + + public static void Xor64(int len, ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor64(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len)); +#else + for (int i = 0; i < len; ++i) + { + z[zOff + i] = x[xOff + i] ^ y[yOff + i]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Xor64(int len, ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z) + { + int i = 0, limit8 = len - 8; + while (i <= limit8) + { + Nat512.Xor64(x[i..], y[i..], z[i..]); + i += 8; + } + while (i < len) + { + z[i] = x[i] ^ y[i]; + ++i; } } #endif - public static void Zero64(int len, ulong[] z) + public static void XorTo(int len, uint[] x, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo(len, x.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { - z[i] = 0UL; + z[i] ^= x[i]; } +#endif } + public static void XorTo(int len, uint[] x, int xOff, uint[] z, int zOff) + { #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static void Zero64(int len, Span<ulong> z) + XorTo(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len)); +#else + for (int i = 0; i < len; ++i) + { + z[zOff + i] ^= x[xOff + i]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void XorTo(int len, ReadOnlySpan<uint> x, Span<uint> z) + { + int i = 0, limit16 = len - 16; + while (i <= limit16) + { + Nat512.XorTo(x[i..], z[i..]); + i += 16; + } + while (i < len) + { + z[i] ^= x[i]; + ++i; + } + } +#endif + + public static void XorTo64(int len, ulong[] x, ulong[] z) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo64(len, x.AsSpan(0, len), z.AsSpan(0, len)); +#else + for (int i = 0; i < len; ++i) + { + z[i] ^= x[i]; + } +#endif + } + + public static void XorTo64(int len, ulong[] x, int xOff, ulong[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo64(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len)); +#else + for (int i = 0; i < len; ++i) + { + z[zOff + i] ^= x[xOff + i]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void XorTo64(int len, ReadOnlySpan<ulong> x, Span<ulong> z) + { + int i = 0, limit8 = len - 8; + while (i <= limit8) + { + Nat512.XorTo64(x[i..], z[i..]); + i += 8; + } + while (i < len) + { + z[i] ^= x[i]; + ++i; + } + } +#endif + + public static void Zero(int len, uint[] z) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + z.AsSpan(0, len).Fill(0U); +#else + for (int i = 0; i < len; ++i) + { + z[i] = 0U; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Zero(int len, Span<uint> z) + { + z[..len].Fill(0U); + } +#endif + + public static void Zero64(int len, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + z.AsSpan(0, len).Fill(0UL); +#else for (int i = 0; i < len; ++i) { z[i] = 0UL; } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Zero64(int len, Span<ulong> z) + { + z[..len].Fill(0UL); } #endif } diff --git a/crypto/src/math/raw/Nat256.cs b/crypto/src/math/raw/Nat256.cs index 710060bee..47e0644f6 100644 --- a/crypto/src/math/raw/Nat256.cs +++ b/crypto/src/math/raw/Nat256.cs @@ -1,5 +1,11 @@ using System; using System.Diagnostics; +#if NETCOREAPP3_0_OR_GREATER +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using Org.BouncyCastle.Crypto.Utilities; @@ -1364,6 +1370,71 @@ namespace Org.BouncyCastle.Math.Raw return new BigInteger(1, bs); } + public static void Xor(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 8; i += 4) + { + z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0]; + z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1]; + z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2]; + z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Y = MemoryMarshal.AsBytes(y[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]); + var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]); + + var Z0 = Avx2.Xor(X0, Y0); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Y = MemoryMarshal.AsBytes(y[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]); + + var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]); + var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + return; + } +#endif + + for (int i = 0; i < 8; i += 4) + { + z[i + 0] = x[i + 0] ^ y[i + 0]; + z[i + 1] = x[i + 1] ^ y[i + 1]; + z[i + 2] = x[i + 2] ^ y[i + 2]; + z[i + 3] = x[i + 3] ^ y[i + 3]; + } + } +#endif + public static void Zero(uint[] z) { z[0] = 0; diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs index a9ef2b3b6..2312e1cf2 100644 --- a/crypto/src/math/raw/Nat512.cs +++ b/crypto/src/math/raw/Nat512.cs @@ -1,5 +1,10 @@ using System; -using System.Diagnostics; +#if NETCOREAPP3_0_OR_GREATER +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif namespace Org.BouncyCastle.Math.Raw { @@ -42,5 +47,313 @@ namespace Org.BouncyCastle.Math.Raw c24 += (uint)Nat.SubFrom(16, m, 0, zz, 8); Nat.AddWordAt(32, c24, zz, 24); } + + public static void Xor(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 16; i += 4) + { + z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0]; + z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1]; + z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2]; + z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..16]); + var Y = MemoryMarshal.AsBytes(y[..16]); + var Z = MemoryMarshal.AsBytes(z[..16]); + + var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]); + var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..16]); + var Y = MemoryMarshal.AsBytes(y[..16]); + var Z = MemoryMarshal.AsBytes(z[..16]); + + var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]); + var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]); + var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]); + var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 16; i += 4) + { + z[i + 0] = x[i + 0] ^ y[i + 0]; + z[i + 1] = x[i + 1] ^ y[i + 1]; + z[i + 2] = x[i + 2] ^ y[i + 2]; + z[i + 3] = x[i + 3] ^ y[i + 3]; + } + } +#endif + + public static void XorTo(uint[] x, int xOff, uint[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo(x.AsSpan(xOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 16; i += 4) + { + z[zOff + i + 0] ^= x[xOff + i + 0]; + z[zOff + i + 1] ^= x[xOff + i + 1]; + z[zOff + i + 2] ^= x[xOff + i + 2]; + z[zOff + i + 3] ^= x[xOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..16]); + var Z = MemoryMarshal.AsBytes(z[..16]); + + var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]); + var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..16]); + var Z = MemoryMarshal.AsBytes(z[..16]); + + var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]); + var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]); + var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]); + var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 16; i += 4) + { + z[i + 0] ^= x[i + 0]; + z[i + 1] ^= x[i + 1]; + z[i + 2] ^= x[i + 2]; + z[i + 3] ^= x[i + 3]; + } + } +#endif + + public static void Xor64(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor64(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 8; i += 4) + { + z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0]; + z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1]; + z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2]; + z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Y = MemoryMarshal.AsBytes(y[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]); + var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Y = MemoryMarshal.AsBytes(y[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]); + var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]); + var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]); + var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 8; i += 4) + { + z[i + 0] = x[i + 0] ^ y[i + 0]; + z[i + 1] = x[i + 1] ^ y[i + 1]; + z[i + 2] = x[i + 2] ^ y[i + 2]; + z[i + 3] = x[i + 3] ^ y[i + 3]; + } + } +#endif + + public static void XorTo64(ulong[] x, int xOff, ulong[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo64(x.AsSpan(xOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 8; i += 4) + { + z[zOff + i + 0] ^= x[xOff + i + 0]; + z[zOff + i + 1] ^= x[xOff + i + 1]; + z[zOff + i + 2] ^= x[xOff + i + 2]; + z[zOff + i + 3] ^= x[xOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]); + var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]); + var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]); + var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]); + var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 8; i += 4) + { + z[i + 0] ^= x[i + 0]; + z[i + 1] ^= x[i + 1]; + z[i + 2] ^= x[i + 2]; + z[i + 3] ^= x[i + 3]; + } + } +#endif } } |