summary refs log tree commit diff
path: root/crypto/src/math/raw
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/src/math/raw')
-rw-r--r--crypto/src/math/raw/Mod.cs29
-rw-r--r--crypto/src/math/raw/Nat.cs593
-rw-r--r--crypto/src/math/raw/Nat256.cs71
-rw-r--r--crypto/src/math/raw/Nat512.cs315
4 files changed, 971 insertions, 37 deletions
diff --git a/crypto/src/math/raw/Mod.cs b/crypto/src/math/raw/Mod.cs
index ea61bdd83..721134b0c 100644
--- a/crypto/src/math/raw/Mod.cs
+++ b/crypto/src/math/raw/Mod.cs
@@ -12,10 +12,8 @@ namespace Org.BouncyCastle.Math.Raw
      * computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
      */
 
-    internal abstract class Mod
+    internal static class Mod
     {
-        private static readonly SecureRandom RandomSource = new SecureRandom();
-
         private const int M30 = 0x3FFFFFFF;
         private const ulong M32UL = 0xFFFFFFFFUL;
 
@@ -41,7 +39,7 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint Inverse32(uint d)
         {
-            Debug.Assert((d & 1) == 1);
+            Debug.Assert((d & 1U) == 1U);
 
             //int x = d + (((d + 1) & 4) << 1);   // d.x == 1 mod 2**4
             uint x = d;                         // d.x == 1 mod 2**3
@@ -53,6 +51,21 @@ namespace Org.BouncyCastle.Math.Raw
             return x;
         }
 
+        public static ulong Inverse64(ulong d)
+        {
+            Debug.Assert((d & 1UL) == 1UL);
+
+            //ulong x = d + (((d + 1) & 4) << 1);   // d.x == 1 mod 2**4
+            ulong x = d;                            // d.x == 1 mod 2**3
+            x *= 2 - d * x;                         // d.x == 1 mod 2**6
+            x *= 2 - d * x;                         // d.x == 1 mod 2**12
+            x *= 2 - d * x;                         // d.x == 1 mod 2**24
+            x *= 2 - d * x;                         // d.x == 1 mod 2**48
+            x *= 2 - d * x;                         // d.x == 1 mod 2**96
+            Debug.Assert(d * x == 1UL);
+            return x;
+        }
+
         public static uint ModOddInverse(uint[] m, uint[] x, uint[] z)
         {
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -349,7 +362,7 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
-        public static uint[] Random(uint[] p)
+        public static uint[] Random(SecureRandom random, uint[] p)
         {
             int len = p.Length;
             uint[] s = Nat.Create(len);
@@ -364,7 +377,7 @@ namespace Org.BouncyCastle.Math.Raw
             byte[] bytes = new byte[len << 2];
             do
             {
-                RandomSource.NextBytes(bytes);
+                random.NextBytes(bytes);
                 Pack.BE_To_UInt32(bytes, 0, s);
                 s[len - 1] &= m;
             }
@@ -374,7 +387,7 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static void Random(ReadOnlySpan<uint> p, Span<uint> z)
+        public static void Random(SecureRandom random, ReadOnlySpan<uint> p, Span<uint> z)
         {
             int len = p.Length;
             if (z.Length < len)
@@ -395,7 +408,7 @@ namespace Org.BouncyCastle.Math.Raw
 
             do
             {
-                RandomSource.NextBytes(bytes);
+                random.NextBytes(bytes);
                 Pack.BE_To_UInt32(bytes, s);
                 s[len - 1] &= m;
             }
diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
index 09c263f4d..3bc983430 100644
--- a/crypto/src/math/raw/Nat.cs
+++ b/crypto/src/math/raw/Nat.cs
@@ -1580,34 +1580,87 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBit(int len, uint[] z, uint c)
         {
-            for (int i = 0; i < len; ++i)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, z.AsSpan(0, len), c);
+#else
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = z[i + 0];
+                uint next1 = z[i + 1];
+                uint next2 = z[i + 2];
+                uint next3 = z[i + 3];
+                z[i + 0] = (next0 << 1) | (c     >> 31);
+                z[i + 1] = (next1 << 1) | (next0 >> 31);
+                z[i + 2] = (next2 << 1) | (next1 >> 31);
+                z[i + 3] = (next3 << 1) | (next2 >> 31);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = z[i];
                 z[i] = (next << 1) | (c >> 31);
                 c = next;
+                ++i;
             }
             return c >> 31;
+#endif
         }
 
         public static uint ShiftUpBit(int len, uint[] z, int zOff, uint c)
         {
-            for (int i = 0; i < len; ++i)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, z.AsSpan(zOff, len), c);
+#else
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = z[zOff + i + 0];
+                uint next1 = z[zOff + i + 1];
+                uint next2 = z[zOff + i + 2];
+                uint next3 = z[zOff + i + 3];
+                z[zOff + i + 0] = (next0 << 1) | (c     >> 31);
+                z[zOff + i + 1] = (next1 << 1) | (next0 >> 31);
+                z[zOff + i + 2] = (next2 << 1) | (next1 >> 31);
+                z[zOff + i + 3] = (next3 << 1) | (next2 >> 31);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = z[zOff + i];
                 z[zOff + i] = (next << 1) | (c >> 31);
                 c = next;
+                ++i;
             }
             return c >> 31;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static uint ShiftUpBit(int len, Span<uint> z, uint c)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = z[i + 0];
+                uint next1 = z[i + 1];
+                uint next2 = z[i + 2];
+                uint next3 = z[i + 3];
+                z[i + 0] = (next0 << 1) | (c     >> 31);
+                z[i + 1] = (next1 << 1) | (next0 >> 31);
+                z[i + 2] = (next2 << 1) | (next1 >> 31);
+                z[i + 3] = (next3 << 1) | (next2 >> 31);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = z[i];
                 z[i] = (next << 1) | (c >> 31);
                 c = next;
+                ++i;
             }
             return c >> 31;
         }
@@ -1615,34 +1668,87 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBit(int len, uint[] x, uint c, uint[] z)
         {
-            for (int i = 0; i < len; ++i)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, x.AsSpan(0, len), c, z.AsSpan(0, len));
+#else
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = x[i + 0];
+                uint next1 = x[i + 1];
+                uint next2 = x[i + 2];
+                uint next3 = x[i + 3];
+                z[i + 0] = (next0 << 1) | (c     >> 31);
+                z[i + 1] = (next1 << 1) | (next0 >> 31);
+                z[i + 2] = (next2 << 1) | (next1 >> 31);
+                z[i + 3] = (next3 << 1) | (next2 >> 31);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = x[i];
                 z[i] = (next << 1) | (c >> 31);
                 c = next;
+                ++i;
             }
             return c >> 31;
+#endif
         }
 
         public static uint ShiftUpBit(int len, uint[] x, int xOff, uint c, uint[] z, int zOff)
         {
-            for (int i = 0; i < len; ++i)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len));
+#else
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = x[xOff + i + 0];
+                uint next1 = x[xOff + i + 1];
+                uint next2 = x[xOff + i + 2];
+                uint next3 = x[xOff + i + 3];
+                z[zOff + i + 0] = (next0 << 1) | (c     >> 31);
+                z[zOff + i + 1] = (next1 << 1) | (next0 >> 31);
+                z[zOff + i + 2] = (next2 << 1) | (next1 >> 31);
+                z[zOff + i + 3] = (next3 << 1) | (next2 >> 31);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = x[xOff + i];
                 z[zOff + i] = (next << 1) | (c >> 31);
                 c = next;
+                ++i;
             }
             return c >> 31;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static uint ShiftUpBit(int len, ReadOnlySpan<uint> x, uint c, Span<uint> z)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = x[i + 0];
+                uint next1 = x[i + 1];
+                uint next2 = x[i + 2];
+                uint next3 = x[i + 3];
+                z[i + 0] = (next0 << 1) | (c     >> 31);
+                z[i + 1] = (next1 << 1) | (next0 >> 31);
+                z[i + 2] = (next2 << 1) | (next1 >> 31);
+                z[i + 3] = (next3 << 1) | (next2 >> 31);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = x[i];
                 z[i] = (next << 1) | (c >> 31);
                 c = next;
+                ++i;
             }
             return c >> 31;
         }
@@ -1650,34 +1756,87 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static ulong ShiftUpBit64(int len, ulong[] x, ulong c, ulong[] z)
         {
-            for (int i = 0; i < len; ++i)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit64(len, x.AsSpan(0, len), c, z.AsSpan(0, len));
+#else
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = x[i + 0];
+                ulong next1 = x[i + 1];
+                ulong next2 = x[i + 2];
+                ulong next3 = x[i + 3];
+                z[i + 0] = (next0 << 1) | (c     >> 63);
+                z[i + 1] = (next1 << 1) | (next0 >> 63);
+                z[i + 2] = (next2 << 1) | (next1 >> 63);
+                z[i + 3] = (next3 << 1) | (next2 >> 63);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = x[i];
                 z[i] = (next << 1) | (c >> 63);
                 c = next;
+                ++i;
             }
             return c >> 63;
+#endif
         }
 
         public static ulong ShiftUpBit64(int len, ulong[] x, int xOff, ulong c, ulong[] z, int zOff)
         {
-            for (int i = 0; i < len; ++i)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBit64(len, x.AsSpan(xOff, len), c, z.AsSpan(zOff, len));
+#else
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = x[xOff + i + 0];
+                ulong next1 = x[xOff + i + 1];
+                ulong next2 = x[xOff + i + 2];
+                ulong next3 = x[xOff + i + 3];
+                z[zOff + i + 0] = (next0 << 1) | (c     >> 63);
+                z[zOff + i + 1] = (next1 << 1) | (next0 >> 63);
+                z[zOff + i + 2] = (next2 << 1) | (next1 >> 63);
+                z[zOff + i + 3] = (next3 << 1) | (next2 >> 63);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = x[xOff + i];
                 z[zOff + i] = (next << 1) | (c >> 63);
                 c = next;
+                ++i;
             }
             return c >> 63;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static ulong ShiftUpBit64(int len, ReadOnlySpan<ulong> x, ulong c, Span<ulong> z)
         {
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = x[i + 0];
+                ulong next1 = x[i + 1];
+                ulong next2 = x[i + 2];
+                ulong next3 = x[i + 3];
+                z[i + 0] = (next0 << 1) | (c     >> 63);
+                z[i + 1] = (next1 << 1) | (next0 >> 63);
+                z[i + 2] = (next2 << 1) | (next1 >> 63);
+                z[i + 3] = (next3 << 1) | (next2 >> 63);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = x[i];
                 z[i] = (next << 1) | (c >> 63);
                 c = next;
+                ++i;
             }
             return c >> 63;
         }
@@ -1685,37 +1844,90 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBits(int len, uint[] z, int bits, uint c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, z.AsSpan(0, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 32);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = z[i + 0];
+                uint next1 = z[i + 1];
+                uint next2 = z[i + 2];
+                uint next3 = z[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = z[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static uint ShiftUpBits(int len, uint[] z, int zOff, int bits, uint c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, z.AsSpan(zOff, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 32);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = z[zOff + i + 0];
+                uint next1 = z[zOff + i + 1];
+                uint next2 = z[zOff + i + 2];
+                uint next3 = z[zOff + i + 3];
+                z[zOff + i + 0] = (next0 << bits) | (c     >> -bits);
+                z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = z[zOff + i];
                 z[zOff + i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static uint ShiftUpBits(int len, Span<uint> z, int bits, uint c)
         {
             Debug.Assert(bits > 0 && bits < 32);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = z[i + 0];
+                uint next1 = z[i + 1];
+                uint next2 = z[i + 2];
+                uint next3 = z[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = z[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
         }
@@ -1723,37 +1935,90 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static uint ShiftUpBits(int len, uint[] x, int bits, uint c, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len));
+#else
             Debug.Assert(bits > 0 && bits < 32);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = x[i + 0];
+                uint next1 = x[i + 1];
+                uint next2 = x[i + 2];
+                uint next3 = x[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = x[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static uint ShiftUpBits(int len, uint[] x, int xOff, int bits, uint c, uint[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len));
+#else
             Debug.Assert(bits > 0 && bits < 32);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = x[xOff + i + 0];
+                uint next1 = x[xOff + i + 1];
+                uint next2 = x[xOff + i + 2];
+                uint next3 = x[xOff + i + 3];
+                z[zOff + i + 0] = (next0 << bits) | (c     >> -bits);
+                z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = x[xOff + i];
                 z[zOff + i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static uint ShiftUpBits(int len, ReadOnlySpan<uint> x, int bits, uint c, Span<uint> z)
         {
             Debug.Assert(bits > 0 && bits < 32);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                uint next0 = x[i + 0];
+                uint next1 = x[i + 1];
+                uint next2 = x[i + 2];
+                uint next3 = x[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 uint next = x[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
         }
@@ -1761,37 +2026,90 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static ulong ShiftUpBits64(int len, ulong[] z, int bits, ulong c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, z.AsSpan(0, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 64);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = z[i + 0];
+                ulong next1 = z[i + 1];
+                ulong next2 = z[i + 2];
+                ulong next3 = z[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = z[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static ulong ShiftUpBits64(int len, ulong[] z, int zOff, int bits, ulong c)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, z.AsSpan(zOff, len), bits, c);
+#else
             Debug.Assert(bits > 0 && bits < 64);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = z[zOff + i + 0];
+                ulong next1 = z[zOff + i + 1];
+                ulong next2 = z[zOff + i + 2];
+                ulong next3 = z[zOff + i + 3];
+                z[zOff + i + 0] = (next0 << bits) | (c     >> -bits);
+                z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = z[zOff + i];
                 z[zOff + i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static ulong ShiftUpBits64(int len, Span<ulong> z, int bits, ulong c)
         {
             Debug.Assert(bits > 0 && bits < 64);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = z[i + 0];
+                ulong next1 = z[i + 1];
+                ulong next2 = z[i + 2];
+                ulong next3 = z[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = z[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
         }
@@ -1799,37 +2117,90 @@ namespace Org.BouncyCastle.Math.Raw
 
         public static ulong ShiftUpBits64(int len, ulong[] x, int bits, ulong c, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, x.AsSpan(0, len), bits, c, z.AsSpan(0, len));
+#else
             Debug.Assert(bits > 0 && bits < 64);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = x[i + 0];
+                ulong next1 = x[i + 1];
+                ulong next2 = x[i + 2];
+                ulong next3 = x[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = x[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
         public static ulong ShiftUpBits64(int len, ulong[] x, int xOff, int bits, ulong c, ulong[] z, int zOff)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            return ShiftUpBits64(len, x.AsSpan(xOff, len), bits, c, z.AsSpan(zOff, len));
+#else
             Debug.Assert(bits > 0 && bits < 64);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = x[xOff + i + 0];
+                ulong next1 = x[xOff + i + 1];
+                ulong next2 = x[xOff + i + 2];
+                ulong next3 = x[xOff + i + 3];
+                z[zOff + i + 0] = (next0 << bits) | (c     >> -bits);
+                z[zOff + i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[zOff + i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[zOff + i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = x[xOff + i];
                 z[zOff + i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
+#endif
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static ulong ShiftUpBits64(int len, ReadOnlySpan<ulong> x, int bits, ulong c, Span<ulong> z)
         {
             Debug.Assert(bits > 0 && bits < 64);
-            for (int i = 0; i < len; ++i)
+            int i = 0, limit4 = len - 4;
+            while (i <= limit4)
+            {
+                ulong next0 = x[i + 0];
+                ulong next1 = x[i + 1];
+                ulong next2 = x[i + 2];
+                ulong next3 = x[i + 3];
+                z[i + 0] = (next0 << bits) | (c     >> -bits);
+                z[i + 1] = (next1 << bits) | (next0 >> -bits);
+                z[i + 2] = (next2 << bits) | (next1 >> -bits);
+                z[i + 3] = (next3 << bits) | (next2 >> -bits);
+                c = next3;
+                i += 4;
+            }
+            while (i < len)
             {
                 ulong next = x[i];
                 z[i] = (next << bits) | (c >> -bits);
                 c = next;
+                ++i;
             }
             return c >> -bits;
         }
@@ -2325,39 +2696,205 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
-        public static void Zero(int len, uint[] z)
+        public static void Xor(int len, uint[] x, uint[] y, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
-                z[i] = 0U;
+                z[i] = x[i] ^ y[i];
             }
+#endif
         }
 
+        public static void Xor(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static void Zero(int len, Span<uint> z)
+            Xor(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[zOff + i] = x[xOff + i] ^ y[yOff + i];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+        {
+            int i = 0, limit16 = len - 16;
+            while (i <= limit16)
+            {
+                Nat512.Xor(x[i..], y[i..], z[i..]);
+                i += 16;
+            }
+            while (i < len)
+            {
+                z[i] = x[i] ^ y[i];
+                ++i;
+            }
+        }
+#endif
+
+        public static void Xor64(int len, ulong[] x, ulong[] y, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(len, x.AsSpan(0, len), y.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
-                z[i] = 0U;
+                z[i] = x[i] ^ y[i];
+            }
+#endif
+        }
+
+        public static void Xor64(int len, ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(len, x.AsSpan(xOff, len), y.AsSpan(yOff, len), z.AsSpan(zOff, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[zOff + i] = x[xOff + i] ^ y[yOff + i];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor64(int len, ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+        {
+            int i = 0, limit8 = len - 8;
+            while (i <= limit8)
+            {
+                Nat512.Xor64(x[i..], y[i..], z[i..]);
+                i += 8;
+            }
+            while (i < len)
+            {
+                z[i] = x[i] ^ y[i];
+                ++i;
             }
         }
 #endif
 
-        public static void Zero64(int len, ulong[] z)
+        public static void XorTo(int len, uint[] x, uint[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo(len, x.AsSpan(0, len), z.AsSpan(0, len));
+#else
             for (int i = 0; i < len; ++i)
             {
-                z[i] = 0UL;
+                z[i] ^= x[i];
             }
+#endif
         }
 
+        public static void XorTo(int len, uint[] x, int xOff, uint[] z, int zOff)
+        {
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static void Zero64(int len, Span<ulong> z)
+            XorTo(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[zOff + i] ^= x[xOff + i];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void XorTo(int len, ReadOnlySpan<uint> x, Span<uint> z)
+        {
+            int i = 0, limit16 = len - 16;
+            while (i <= limit16)
+            {
+                Nat512.XorTo(x[i..], z[i..]);
+                i += 16;
+            }
+            while (i < len)
+            {
+                z[i] ^= x[i];
+                ++i;
+            }
+        }
+#endif
+
+        public static void XorTo64(int len, ulong[] x, ulong[] z)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo64(len, x.AsSpan(0, len), z.AsSpan(0, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[i] ^= x[i];
+            }
+#endif
+        }
+
+        public static void XorTo64(int len, ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo64(len, x.AsSpan(xOff, len), z.AsSpan(zOff, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[zOff + i] ^= x[xOff + i];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void XorTo64(int len, ReadOnlySpan<ulong> x, Span<ulong> z)
+        {
+            int i = 0, limit8 = len - 8;
+            while (i <= limit8)
+            {
+                Nat512.XorTo64(x[i..], z[i..]);
+                i += 8;
+            }
+            while (i < len)
+            {
+                z[i] ^= x[i];
+                ++i;
+            }
+        }
+#endif
+
+        public static void Zero(int len, uint[] z)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            z.AsSpan(0, len).Fill(0U);
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[i] = 0U;
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Zero(int len, Span<uint> z)
+        {
+            z[..len].Fill(0U);
+        }
+#endif
+
+        public static void Zero64(int len, ulong[] z)
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            z.AsSpan(0, len).Fill(0UL);
+#else
             for (int i = 0; i < len; ++i)
             {
                 z[i] = 0UL;
             }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Zero64(int len, Span<ulong> z)
+        {
+            z[..len].Fill(0UL);
         }
 #endif
     }
diff --git a/crypto/src/math/raw/Nat256.cs b/crypto/src/math/raw/Nat256.cs
index 710060bee..47e0644f6 100644
--- a/crypto/src/math/raw/Nat256.cs
+++ b/crypto/src/math/raw/Nat256.cs
@@ -1,5 +1,11 @@
 using System;
 using System.Diagnostics;
+#if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 
 using Org.BouncyCastle.Crypto.Utilities;
 
@@ -1364,6 +1370,71 @@ namespace Org.BouncyCastle.Math.Raw
             return new BigInteger(1, bs);
         }
 
+        public static void Xor(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0];
+                z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1];
+                z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2];
+                z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Y = MemoryMarshal.AsBytes(y[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Y = MemoryMarshal.AsBytes(y[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[i + 0] = x[i + 0] ^ y[i + 0];
+                z[i + 1] = x[i + 1] ^ y[i + 1];
+                z[i + 2] = x[i + 2] ^ y[i + 2];
+                z[i + 3] = x[i + 3] ^ y[i + 3];
+            }
+        }
+#endif
+
         public static void Zero(uint[] z)
         {
             z[0] = 0;
diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs
index a9ef2b3b6..2312e1cf2 100644
--- a/crypto/src/math/raw/Nat512.cs
+++ b/crypto/src/math/raw/Nat512.cs
@@ -1,5 +1,10 @@
 using System;
-using System.Diagnostics;
+#if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 
 namespace Org.BouncyCastle.Math.Raw
 {
@@ -42,5 +47,313 @@ namespace Org.BouncyCastle.Math.Raw
             c24 += (uint)Nat.SubFrom(16, m, 0, zz, 8);
             Nat.AddWordAt(32, c24, zz, 24); 
         }
+
+        public static void Xor(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 16; i += 4)
+            {
+                z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0];
+                z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1];
+                z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2];
+                z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Y = MemoryMarshal.AsBytes(y[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Y = MemoryMarshal.AsBytes(y[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 16; i += 4)
+            {
+                z[i + 0] = x[i + 0] ^ y[i + 0];
+                z[i + 1] = x[i + 1] ^ y[i + 1];
+                z[i + 2] = x[i + 2] ^ y[i + 2];
+                z[i + 3] = x[i + 3] ^ y[i + 3];
+            }
+        }
+#endif
+
+        public static void XorTo(uint[] x, int xOff, uint[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 16; i += 4)
+            {
+                z[zOff + i + 0] ^= x[xOff + i + 0];
+                z[zOff + i + 1] ^= x[xOff + i + 1];
+                z[zOff + i + 2] ^= x[xOff + i + 2];
+                z[zOff + i + 3] ^= x[xOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..16]);
+                var Z = MemoryMarshal.AsBytes(z[..16]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 16; i += 4)
+            {
+                z[i + 0] ^= x[i + 0];
+                z[i + 1] ^= x[i + 1];
+                z[i + 2] ^= x[i + 2];
+                z[i + 3] ^= x[i + 3];
+            }
+        }
+#endif
+
+        public static void Xor64(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0];
+                z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1];
+                z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2];
+                z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Y = MemoryMarshal.AsBytes(y[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Y = MemoryMarshal.AsBytes(y[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[i + 0] = x[i + 0] ^ y[i + 0];
+                z[i + 1] = x[i + 1] ^ y[i + 1];
+                z[i + 2] = x[i + 2] ^ y[i + 2];
+                z[i + 3] = x[i + 3] ^ y[i + 3];
+            }
+        }
+#endif
+
+        public static void XorTo64(ulong[] x, int xOff, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            XorTo64(x.AsSpan(xOff), z.AsSpan(zOff));
+#else
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[zOff + i + 0] ^= x[xOff + i + 0];
+                z[zOff + i + 1] ^= x[xOff + i + 1];
+                z[zOff + i + 2] ^= x[xOff + i + 2];
+                z[zOff + i + 3] ^= x[xOff + i + 3];
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z)
+        {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
+                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Z[0x00..0x20]);
+                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Z[0x20..0x40]);
+
+                var Z0 = Avx2.Xor(X0, Y0);
+                var Z1 = Avx2.Xor(X1, Y1);
+
+                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
+                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
+                return;
+            }
+
+            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            {
+                var X = MemoryMarshal.AsBytes(x[..8]);
+                var Z = MemoryMarshal.AsBytes(z[..8]);
+
+                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
+                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
+                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
+                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);
+
+                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Z[0x00..0x10]);
+                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Z[0x10..0x20]);
+                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Z[0x20..0x30]);
+                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Z[0x30..0x40]);
+
+                var Z0 = Sse2.Xor(X0, Y0);
+                var Z1 = Sse2.Xor(X1, Y1);
+                var Z2 = Sse2.Xor(X2, Y2);
+                var Z3 = Sse2.Xor(X3, Y3);
+
+                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
+                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
+                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
+                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
+                return;
+            }
+#endif
+
+            for (int i = 0; i < 8; i += 4)
+            {
+                z[i + 0] ^= x[i + 0];
+                z[i + 1] ^= x[i + 1];
+                z[i + 2] ^= x[i + 2];
+                z[i + 3] ^= x[i + 3];
+            }
+        }
+#endif
     }
 }