summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--crypto/src/math/ec/custom/sec/SecT571Field.cs20
-rw-r--r--crypto/src/math/raw/Nat.cs124
-rw-r--r--crypto/src/math/raw/Nat512.cs230
-rw-r--r--crypto/src/pqc/crypto/bike/BikeRing.cs17
4 files changed, 361 insertions, 30 deletions
diff --git a/crypto/src/math/ec/custom/sec/SecT571Field.cs b/crypto/src/math/ec/custom/sec/SecT571Field.cs
index 5a393409a..49eaae2d4 100644
--- a/crypto/src/math/ec/custom/sec/SecT571Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT571Field.cs
@@ -19,18 +19,12 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         public static void Add(ulong[] x, ulong[] y, ulong[] z)
         {
-            for (int i = 0; i < 9; ++i)
-            {
-                z[i] = x[i] ^ y[i]; 
-            }
+            Nat.Xor64(9, x, y, z);
         }
 
         private static void Add(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
         {
-            for (int i = 0; i < 9; ++i)
-            {
-                z[zOff + i] = x[xOff + i] ^ y[yOff + i];
-            }
+            Nat.Xor64(9, x, xOff, y, yOff, z, zOff);
         }
 
         public static void AddBothTo(ulong[] x, ulong[] y, ulong[] z)
@@ -51,10 +45,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         public static void AddExt(ulong[] xx, ulong[] yy, ulong[] zz)
         {
-            for (int i = 0; i < 18; ++i)
-            {
-                zz[i] = xx[i] ^ yy[i]; 
-            }
+            Nat.Xor64(18, xx, yy, zz);
         }
 
         public static void AddOne(ulong[] x, ulong[] z)
@@ -68,10 +59,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         private static void AddTo(ulong[] x, ulong[] z)
         {
-            for (int i = 0; i < 9; ++i)
-            {
-                z[i] ^= x[i];
-            }
+            Nat.XorTo64(9, x, z);
         }
 
         public static ulong[] FromBigInteger(BigInteger x)
diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
index 89b6881d3..3bc983430 100644
--- a/crypto/src/math/raw/Nat.cs
+++ b/crypto/src/math/raw/Nat.cs
@@ -1580,6 +1580,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBit(int len, uint[] z, uint c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, z.AsSpan(0, len), c);
+#else
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
             {
@@ -1602,10 +1605,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> 31;
+#endif
         }
 
         public static uint ShiftUpBit(int len, uint[] z, int zOff, uint c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, z.AsSpan(zOff, len), c);
+#else
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
             {
@@ -1628,6 +1635,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> 31;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1660,6 +1668,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBit(int len, uint[] x, uint c, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, x.AsSpan(0, len), c, z.AsSpan(0, len));
+#else
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
             {
@@ -1682,10 +1693,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> 31;
+#endif
         }
 
         public static uint ShiftUpBit(int len, uint[] x, int xOff, uint c, uint[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len));
+#else
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
             {
@@ -1708,6 +1723,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> 31;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1740,6 +1756,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static ulong ShiftUpBit64(int len, ulong[] x, ulong c, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit64(len, x.AsSpan(0, len), c, z.AsSpan(0, len));
+#else
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
             {
@@ -1762,10 +1781,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> 63;
+#endif
         }
 
         public static ulong ShiftUpBit64(int len, ulong[] x, int xOff, ulong c, ulong[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit64(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len));
+#else
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
             {
@@ -1788,6 +1811,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> 63;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1820,6 +1844,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBits(int len, uint[] z, int bits, uint c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, z.AsSpan(0, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 32);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -1843,10 +1870,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static uint ShiftUpBits(int len, uint[] z, int zOff, int bits, uint c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, z.AsSpan(zOff, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 32);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -1870,6 +1901,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1903,6 +1935,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBits(int len, uint[] x, int bits, uint c, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len));
+#else
             Debug.Assert(bits > 0 && bits < 32);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -1926,10 +1961,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static uint ShiftUpBits(int len, uint[] x, int xOff, int bits, uint c, uint[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len));
+#else
             Debug.Assert(bits > 0 && bits < 32);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -1953,6 +1992,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -1986,6 +2026,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static ulong ShiftUpBits64(int len, ulong[] z, int bits, ulong c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, z.AsSpan(0, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 64);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -2009,10 +2052,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static ulong ShiftUpBits64(int len, ulong[] z, int zOff, int bits, ulong c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, z.AsSpan(zOff, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 64);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -2036,6 +2083,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -2069,6 +2117,9 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static ulong ShiftUpBits64(int len, ulong[] x, int bits, ulong c, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len));
+#else
             Debug.Assert(bits > 0 && bits < 64);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -2092,10 +2143,14 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static ulong ShiftUpBits64(int len, ulong[] x, int xOff, int bits, ulong c, ulong[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len));
+#else
             Debug.Assert(bits > 0 && bits < 64);
             int i = 0, limit4 = len - 4;
             while (i <= limit4)
@@ -2119,6 +2174,7 @@ namespace Org.BouncyCastle.Math.Raw
                 ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -2642,104 +2698,164 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static void Xor(int len, uint[] x, uint[] y, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[i] = x[i] ^ y[i];
             }
+#endif
         }
 
         public static void Xor(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[zOff + i] = x[xOff + i] ^ y[yOff + i];
             }
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void Xor(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit16 = len - 16;
+            while (i <= limit16)
+            {
+                Nat512.Xor(x[i..], y[i..], z[i..]);
+                i += 16;
+            }
+            while (i < len)
             {
                 z[i] = x[i] ^ y[i];
+                ++i;
             }
         }
 #endif
 
         public static void Xor64(int len, ulong[] x, ulong[] y, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[i] = x[i] ^ y[i];
             }
+#endif
         }
 
         public static void Xor64(int len, ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[zOff + i] = x[xOff + i] ^ y[yOff + i];
             }
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void Xor64(int len, ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit8 = len - 8;
+            while (i <= limit8)
+            {
+                Nat512.Xor64(x[i..], y[i..], z[i..]);
+                i += 8;
+            }
+            while (i < len)
             {
                 z[i] = x[i] ^ y[i];
+                ++i;
             }
         }
 #endif
 
         public static void XorTo(int len, uint[] x, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo(len, x.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[i] ^= x[i];
             }
+#endif
         }
 
         public static void XorTo(int len, uint[] x, int xOff, uint[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[zOff + i] ^= x[xOff + i];
             }
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void XorTo(int len, ReadOnlySpan<uint> x, Span<uint> z)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit16 = len - 16;
+            while (i <= limit16)
+            {
+                Nat512.XorTo(x[i..], z[i..]);
+                i += 16;
+            }
+            while (i < len)
             {
                 z[i] ^= x[i];
+                ++i;
             }
         }
 #endif
 
         public static void XorTo64(int len, ulong[] x, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo64(len, x.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[i] ^= x[i];
             }
+#endif
         }
 
         public static void XorTo64(int len, ulong[] x, int xOff, ulong[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo64(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len));
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[zOff + i] ^= x[xOff + i];
             }
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void XorTo64(int len, ReadOnlySpan<ulong> x, Span<ulong> z)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit8 = len - 8;
+            while (i <= limit8)
+            {
+                Nat512.XorTo64(x[i..], z[i..]);
+                i += 8;
+            }
+            while (i < len)
             {
                 z[i] ^= x[i];
+                ++i;
             }
         }
 #endif
diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs
index d94ae40da..2312e1cf2 100644
--- a/crypto/src/math/raw/Nat512.cs
+++ b/crypto/src/math/raw/Nat512.cs
@@ -125,5 +125,235 @@ namespace Org.BouncyCastle.Math.Raw
             }
         }
 #endif
+
+        public static void XorTo(uint[] x, int xOff, uint[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 16; i += 4)
+            {
+                z[zOff + i + 0] ^= x[xOff + i + 0];
+                z[zOff + i + 1] ^= x[xOff + i + 1];
+                z[zOff + i + 2] ^= x[xOff + i + 2];
+                z[zOff + i + 3] ^= x[xOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 16; i += 4)
+            {
+                z[i + 0] ^= x[i + 0];
+                z[i + 1] ^= x[i + 1];
+                z[i + 2] ^= x[i + 2];
+                z[i + 3] ^= x[i + 3];
+            }
+        }
+#endif
+
+        public static void Xor64(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0];
+                z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1];
+                z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2];
+                z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Y = MemoryMarshal.AsBytes(y[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Y = MemoryMarshal.AsBytes(y[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[i + 0] = x[i + 0] ^ y[i + 0];
+                z[i + 1] = x[i + 1] ^ y[i + 1];
+                z[i + 2] = x[i + 2] ^ y[i + 2];
+                z[i + 3] = x[i + 3] ^ y[i + 3];
+            }
+        }
+#endif
+
+        public static void XorTo64(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo64(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[zOff + i + 0] ^= x[xOff + i + 0];
+                z[zOff + i + 1] ^= x[xOff + i + 1];
+                z[zOff + i + 2] ^= x[xOff + i + 2];
+                z[zOff + i + 3] ^= x[xOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[i + 0] ^= x[i + 0];
+                z[i + 1] ^= x[i + 1];
+                z[i + 2] ^= x[i + 2];
+                z[i + 3] ^= x[i + 3];
+            }
+        }
+#endif
     }
 }
diff --git a/crypto/src/pqc/crypto/bike/BikeRing.cs b/crypto/src/pqc/crypto/bike/BikeRing.cs
index c2b2102b8..9d317fa4b 100644
--- a/crypto/src/pqc/crypto/bike/BikeRing.cs
+++ b/crypto/src/pqc/crypto/bike/BikeRing.cs
@@ -30,10 +30,12 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 
         internal void Add(ulong[] x, ulong[] y, ulong[] z)
         {
-            for (int i = 0; i < Size; ++i)
-            {
-                z[i] = x[i] ^ y[i];
-            }
+            Nat.Xor64(Size, x, y, z);
+        }
+
+        internal void AddTo(ulong[] x, ulong[] z)
+        {
+            Nat.XorTo64(Size, x, z);
         }
 
         internal void Copy(ulong[] x, ulong[] z)
@@ -170,12 +172,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 
             ulong c = Nat.ShiftUpBits64(Size, tt, Size, excessBits, tt[Size - 1], z, 0);
             Debug.Assert(c == 0UL);
-
-            for (int i = 0; i < Size; ++i)
-            {
-                z[i] ^= tt[i];
-            }
-
+            AddTo(tt, z);
             z[Size - 1] &= partialMask;
         }