From d21c219c4985a75204048ce03cf80ffd65765bf5 Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Tue, 25 Oct 2022 17:16:51 +0700 Subject: Xor methods in Nat classes --- crypto/src/math/ec/custom/sec/SecT571Field.cs | 20 +-- crypto/src/math/raw/Nat.cs | 124 +++++++++++++- crypto/src/math/raw/Nat512.cs | 230 ++++++++++++++++++++++++++ crypto/src/pqc/crypto/bike/BikeRing.cs | 17 +- 4 files changed, 361 insertions(+), 30 deletions(-) diff --git a/crypto/src/math/ec/custom/sec/SecT571Field.cs b/crypto/src/math/ec/custom/sec/SecT571Field.cs index 5a393409a..49eaae2d4 100644 --- a/crypto/src/math/ec/custom/sec/SecT571Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT571Field.cs @@ -19,18 +19,12 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec public static void Add(ulong[] x, ulong[] y, ulong[] z) { - for (int i = 0; i < 9; ++i) - { - z[i] = x[i] ^ y[i]; - } + Nat.Xor64(9, x, y, z); } private static void Add(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) { - for (int i = 0; i < 9; ++i) - { - z[zOff + i] = x[xOff + i] ^ y[yOff + i]; - } + Nat.Xor64(9, x, xOff, y, yOff, z, zOff); } public static void AddBothTo(ulong[] x, ulong[] y, ulong[] z) @@ -51,10 +45,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec public static void AddExt(ulong[] xx, ulong[] yy, ulong[] zz) { - for (int i = 0; i < 18; ++i) - { - zz[i] = xx[i] ^ yy[i]; - } + Nat.Xor64(18, xx, yy, zz); } public static void AddOne(ulong[] x, ulong[] z) @@ -68,10 +59,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void AddTo(ulong[] x, ulong[] z) { - for (int i = 0; i < 9; ++i) - { - z[i] ^= x[i]; - } + Nat.XorTo64(9, x, z); } public static ulong[] FromBigInteger(BigInteger x) diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs index 89b6881d3..3bc983430 100644 --- a/crypto/src/math/raw/Nat.cs +++ b/crypto/src/math/raw/Nat.cs @@ -1580,6 +1580,9 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBit(int len, uint[] z, uint c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, z.AsSpan(0, len), c); +#else int i = 0, limit4 = len - 4; while (i <= limit4) { @@ -1602,10 +1605,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> 31; +#endif } public static uint ShiftUpBit(int len, uint[] z, int zOff, uint c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, z.AsSpan(zOff, len), c); +#else int i = 0, limit4 = len - 4; while (i <= limit4) { @@ -1628,6 +1635,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> 31; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -1660,6 +1668,9 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBit(int len, uint[] x, uint c, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, x.AsSpan(0, len), c, z.AsSpan(0, len)); +#else int i = 0, limit4 = len - 4; while (i <= limit4) { @@ -1682,10 +1693,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> 31; +#endif } public static uint ShiftUpBit(int len, uint[] x, int xOff, uint c, uint[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len)); +#else int i = 0, limit4 = len - 4; while (i <= limit4) { @@ -1708,6 +1723,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> 31; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -1740,6 +1756,9 @@ namespace Org.BouncyCastle.Math.Raw public static ulong ShiftUpBit64(int len, ulong[] x, ulong c, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit64(len, x.AsSpan(0, len), c, z.AsSpan(0, len)); +#else int i = 0, limit4 = len - 4; while (i <= limit4) { @@ -1762,10 +1781,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> 63; +#endif } public static ulong ShiftUpBit64(int len, ulong[] x, int xOff, ulong c, ulong[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBit64(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len)); +#else int i = 0, limit4 = len - 4; while (i <= limit4) { @@ -1788,6 +1811,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> 63; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -1820,6 +1844,9 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBits(int len, uint[] z, int bits, uint c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, z.AsSpan(0, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 32); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -1843,10 +1870,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } public static uint ShiftUpBits(int len, uint[] z, int zOff, int bits, uint c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, z.AsSpan(zOff, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 32); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -1870,6 +1901,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -1903,6 +1935,9 @@ namespace Org.BouncyCastle.Math.Raw public static uint ShiftUpBits(int len, uint[] x, int bits, uint c, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len)); +#else Debug.Assert(bits > 0 && bits < 32); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -1926,10 +1961,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } public static uint ShiftUpBits(int len, uint[] x, int xOff, int bits, uint c, uint[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len)); +#else Debug.Assert(bits > 0 && bits < 32); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -1953,6 +1992,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -1986,6 +2026,9 @@ namespace Org.BouncyCastle.Math.Raw public static ulong ShiftUpBits64(int len, ulong[] z, int bits, ulong c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, z.AsSpan(0, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 64); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -2009,10 +2052,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } public static ulong ShiftUpBits64(int len, ulong[] z, int zOff, int bits, ulong c) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, z.AsSpan(zOff, len), bits, c); +#else Debug.Assert(bits > 0 && bits < 64); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -2036,6 +2083,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -2069,6 +2117,9 @@ namespace Org.BouncyCastle.Math.Raw public static ulong ShiftUpBits64(int len, ulong[] x, int bits, ulong c, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len)); +#else Debug.Assert(bits > 0 && bits < 64); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -2092,10 +2143,14 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } public static ulong ShiftUpBits64(int len, ulong[] x, int xOff, int bits, ulong c, ulong[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ShiftUpBits64(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len)); +#else Debug.Assert(bits > 0 && bits < 64); int i = 0, limit4 = len - 4; while (i <= limit4) @@ -2119,6 +2174,7 @@ namespace Org.BouncyCastle.Math.Raw ++i; } return c >> -bits; +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -2642,104 +2698,164 @@ namespace Org.BouncyCastle.Math.Raw public static void Xor(int len, uint[] x, uint[] y, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { z[i] = x[i] ^ y[i]; } +#endif } public static void Xor(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len)); +#else for (int i = 0; i < len; ++i) { z[zOff + i] = x[xOff + i] ^ y[yOff + i]; } +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void Xor(int len, ReadOnlySpan x, ReadOnlySpan y, Span z) { - for (int i = 0; i < len; ++i) + int i = 0, limit16 = len - 16; + while (i <= limit16) + { + Nat512.Xor(x[i..], y[i..], z[i..]); + i += 16; + } + while (i < len) { z[i] = x[i] ^ y[i]; + ++i; } } #endif public static void Xor64(int len, ulong[] x, ulong[] y, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor64(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { z[i] = x[i] ^ y[i]; } +#endif } public static void Xor64(int len, ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor64(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len)); +#else for (int i = 0; i < len; ++i) { z[zOff + i] = x[xOff + i] ^ y[yOff + i]; } +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void Xor64(int len, ReadOnlySpan x, ReadOnlySpan y, Span z) { - for (int i = 0; i < len; ++i) + int i = 0, limit8 = len - 8; + while (i <= limit8) + { + Nat512.Xor64(x[i..], y[i..], z[i..]); + i += 8; + } + while (i < len) { z[i] = x[i] ^ y[i]; + ++i; } } #endif public static void XorTo(int len, uint[] x, uint[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo(len, x.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { z[i] ^= x[i]; } +#endif } public static void XorTo(int len, uint[] x, int xOff, uint[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len)); +#else for (int i = 0; i < len; ++i) { z[zOff + i] ^= x[xOff + i]; } +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void XorTo(int len, ReadOnlySpan x, Span z) { - for (int i = 0; i < len; ++i) + int i = 0, limit16 = len - 16; + while (i <= limit16) + { + Nat512.XorTo(x[i..], z[i..]); + i += 16; + } + while (i < len) { z[i] ^= x[i]; + ++i; } } #endif public static void XorTo64(int len, ulong[] x, ulong[] z) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo64(len, x.AsSpan(0, len), z.AsSpan(0, len)); +#else for (int i = 0; i < len; ++i) { z[i] ^= x[i]; } +#endif } public static void XorTo64(int len, ulong[] x, int xOff, ulong[] z, int zOff) { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo64(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len)); +#else for (int i = 0; i < len; ++i) { z[zOff + i] ^= x[xOff + i]; } +#endif } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void XorTo64(int len, ReadOnlySpan x, Span z) { - for (int i = 0; i < len; ++i) + int i = 0, limit8 = len - 8; + while (i <= limit8) + { + Nat512.XorTo64(x[i..], z[i..]); + i += 8; + } + while (i < len) { z[i] ^= x[i]; + ++i; } } #endif diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs index d94ae40da..2312e1cf2 100644 --- a/crypto/src/math/raw/Nat512.cs +++ b/crypto/src/math/raw/Nat512.cs @@ -125,5 +125,235 @@ namespace Org.BouncyCastle.Math.Raw } } #endif + + public static void XorTo(uint[] x, int xOff, uint[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo(x.AsSpan(xOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 16; i += 4) + { + z[zOff + i + 0] ^= x[xOff + i + 0]; + z[zOff + i + 1] ^= x[xOff + i + 1]; + z[zOff + i + 2] ^= x[xOff + i + 2]; + z[zOff + i + 3] ^= x[xOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void XorTo(ReadOnlySpan x, Span z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..16]); + var Z = MemoryMarshal.AsBytes(z[..16]); + + var X0 = MemoryMarshal.Read>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read>(Z[0x00..0x20]); + var Y1 = MemoryMarshal.Read>(Z[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..16]); + var Z = MemoryMarshal.AsBytes(z[..16]); + + var X0 = MemoryMarshal.Read>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read>(Z[0x00..0x10]); + var Y1 = MemoryMarshal.Read>(Z[0x10..0x20]); + var Y2 = MemoryMarshal.Read>(Z[0x20..0x30]); + var Y3 = MemoryMarshal.Read>(Z[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 16; i += 4) + { + z[i + 0] ^= x[i + 0]; + z[i + 1] ^= x[i + 1]; + z[i + 2] ^= x[i + 2]; + z[i + 3] ^= x[i + 3]; + } + } +#endif + + public static void Xor64(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Xor64(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 8; i += 4) + { + z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0]; + z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1]; + z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2]; + z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void Xor64(ReadOnlySpan x, ReadOnlySpan y, Span z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Y = MemoryMarshal.AsBytes(y[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read>(Y[0x00..0x20]); + var Y1 = MemoryMarshal.Read>(Y[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Y = MemoryMarshal.AsBytes(y[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read>(Y[0x00..0x10]); + var Y1 = MemoryMarshal.Read>(Y[0x10..0x20]); + var Y2 = MemoryMarshal.Read>(Y[0x20..0x30]); + var Y3 = MemoryMarshal.Read>(Y[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 8; i += 4) + { + z[i + 0] = x[i + 0] ^ y[i + 0]; + z[i + 1] = x[i + 1] ^ y[i + 1]; + z[i + 2] = x[i + 2] ^ y[i + 2]; + z[i + 3] = x[i + 3] ^ y[i + 3]; + } + } +#endif + + public static void XorTo64(ulong[] x, int xOff, ulong[] z, int zOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + XorTo64(x.AsSpan(xOff), z.AsSpan(zOff)); +#else + for (int i = 0; i < 8; i += 4) + { + z[zOff + i + 0] ^= x[xOff + i + 0]; + z[zOff + i + 1] ^= x[xOff + i + 1]; + z[zOff + i + 2] ^= x[xOff + i + 2]; + z[zOff + i + 3] ^= x[xOff + i + 3]; + } +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static void XorTo64(ReadOnlySpan x, Span z) + { +#if NETCOREAPP3_0_OR_GREATER + if (Avx2.IsSupported && Unsafe.SizeOf>() == 32) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read>(X[0x00..0x20]); + var X1 = MemoryMarshal.Read>(X[0x20..0x40]); + + var Y0 = MemoryMarshal.Read>(Z[0x00..0x20]); + var Y1 = MemoryMarshal.Read>(Z[0x20..0x40]); + + var Z0 = Avx2.Xor(X0, Y0); + var Z1 = Avx2.Xor(X1, Y1); + + MemoryMarshal.Write(Z[0x00..0x20], ref Z0); + MemoryMarshal.Write(Z[0x20..0x40], ref Z1); + return; + } + + if (Sse2.IsSupported && Unsafe.SizeOf>() == 16) + { + var X = MemoryMarshal.AsBytes(x[..8]); + var Z = MemoryMarshal.AsBytes(z[..8]); + + var X0 = MemoryMarshal.Read>(X[0x00..0x10]); + var X1 = MemoryMarshal.Read>(X[0x10..0x20]); + var X2 = MemoryMarshal.Read>(X[0x20..0x30]); + var X3 = MemoryMarshal.Read>(X[0x30..0x40]); + + var Y0 = MemoryMarshal.Read>(Z[0x00..0x10]); + var Y1 = MemoryMarshal.Read>(Z[0x10..0x20]); + var Y2 = MemoryMarshal.Read>(Z[0x20..0x30]); + var Y3 = MemoryMarshal.Read>(Z[0x30..0x40]); + + var Z0 = Sse2.Xor(X0, Y0); + var Z1 = Sse2.Xor(X1, Y1); + var Z2 = Sse2.Xor(X2, Y2); + var Z3 = Sse2.Xor(X3, Y3); + + MemoryMarshal.Write(Z[0x00..0x10], ref Z0); + MemoryMarshal.Write(Z[0x10..0x20], ref Z1); + MemoryMarshal.Write(Z[0x20..0x30], ref Z2); + MemoryMarshal.Write(Z[0x30..0x40], ref Z3); + return; + } +#endif + + for (int i = 0; i < 8; i += 4) + { + z[i + 0] ^= x[i + 0]; + z[i + 1] ^= x[i + 1]; + z[i + 2] ^= x[i + 2]; + z[i + 3] ^= x[i + 3]; + } + } +#endif } } diff --git a/crypto/src/pqc/crypto/bike/BikeRing.cs b/crypto/src/pqc/crypto/bike/BikeRing.cs index c2b2102b8..9d317fa4b 100644 --- a/crypto/src/pqc/crypto/bike/BikeRing.cs +++ b/crypto/src/pqc/crypto/bike/BikeRing.cs @@ -30,10 +30,12 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike internal void Add(ulong[] x, ulong[] y, ulong[] z) { - for (int i = 0; i < Size; ++i) - { - z[i] = x[i] ^ y[i]; - } + Nat.Xor64(Size, x, y, z); + } + + internal void AddTo(ulong[] x, ulong[] z) + { + Nat.XorTo64(Size, x, z); } internal void Copy(ulong[] x, ulong[] z) @@ -170,12 +172,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike ulong c = Nat.ShiftUpBits64(Size, tt, Size, excessBits, tt[Size - 1], z, 0); Debug.Assert(c == 0UL); - - for (int i = 0; i < Size; ++i) - { - z[i] ^= tt[i]; - } - + AddTo(tt, z); z[Size - 1] &= partialMask; } -- cgit 1.4.1