summary refs log tree commit diff
path: root/crypto/src/math/raw
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/src/math/raw')
-rw-r--r--crypto/src/math/raw/Interleave.cs32
-rw-r--r--crypto/src/math/raw/Mod.cs514
-rw-r--r--crypto/src/math/raw/Nat.cs105
-rw-r--r--crypto/src/math/raw/Nat256.cs6
-rw-r--r--crypto/src/math/raw/Nat512.cs24
5 files changed, 354 insertions, 327 deletions
diff --git a/crypto/src/math/raw/Interleave.cs b/crypto/src/math/raw/Interleave.cs
index 8082ce57c..e71f8e394 100644
--- a/crypto/src/math/raw/Interleave.cs
+++ b/crypto/src/math/raw/Interleave.cs
@@ -17,7 +17,7 @@ namespace Org.BouncyCastle.Math.Raw
             uint t = x;
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(t, 0x55555555U);
             }
@@ -33,7 +33,7 @@ namespace Org.BouncyCastle.Math.Raw
             uint t = x;
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(t, 0x55555555U);
             }
@@ -48,7 +48,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Expand32to64(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return (ulong)Bmi2.ParallelBitDeposit(x >> 16, 0x55555555U) << 32
                     |         Bmi2.ParallelBitDeposit(x      , 0x55555555U);
@@ -67,7 +67,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static void Expand64To128(ulong x, ulong[] z, int zOff)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 z[zOff    ] = Bmi2.X64.ParallelBitDeposit(x      , 0x5555555555555555UL);
                 z[zOff + 1] = Bmi2.X64.ParallelBitDeposit(x >> 32, 0x5555555555555555UL);
@@ -90,7 +90,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static void Expand64To128(ulong x, Span<ulong> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 z[0] = Bmi2.X64.ParallelBitDeposit(x      , 0x5555555555555555UL);
                 z[1] = Bmi2.X64.ParallelBitDeposit(x >> 32, 0x5555555555555555UL);
@@ -136,7 +136,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Expand64To128Rev(ulong x, out ulong low)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 low  = Bmi2.X64.ParallelBitDeposit(x >> 32, 0xAAAAAAAAAAAAAAAAUL);
                 return Bmi2.X64.ParallelBitDeposit(x      , 0xAAAAAAAAAAAAAAAAUL);
@@ -157,7 +157,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Shuffle(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(x >> 16, 0xAAAAAAAAU)
                     |  Bmi2.ParallelBitDeposit(x      , 0x55555555U);
@@ -175,7 +175,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Shuffle(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitDeposit(x >> 32, 0xAAAAAAAAAAAAAAAAUL)
                     |  Bmi2.X64.ParallelBitDeposit(x      , 0x5555555555555555UL);
@@ -194,7 +194,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Shuffle2(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(x >> 24, 0x88888888U)
                     |  Bmi2.ParallelBitDeposit(x >> 16, 0x44444444U)
@@ -219,7 +219,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Shuffle2(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitDeposit(x >> 48, 0x8888888888888888UL)
                     |  Bmi2.X64.ParallelBitDeposit(x >> 32, 0x4444444444444444UL)
@@ -242,7 +242,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Unshuffle(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitExtract(x, 0xAAAAAAAAU) << 16
                     |  Bmi2.ParallelBitExtract(x, 0x55555555U);
@@ -260,7 +260,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitExtract(x, 0xAAAAAAAAAAAAAAAAUL) << 32
                     |  Bmi2.X64.ParallelBitExtract(x, 0x5555555555555555UL);
@@ -279,7 +279,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle(ulong x, out ulong even)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 even = Bmi2.X64.ParallelBitExtract(x, 0x5555555555555555UL);
                 return Bmi2.X64.ParallelBitExtract(x, 0xAAAAAAAAAAAAAAAAUL);
@@ -294,7 +294,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle(ulong x0, ulong x1, out ulong even)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 even = Bmi2.X64.ParallelBitExtract(x0, 0x5555555555555555UL)
                     |  Bmi2.X64.ParallelBitExtract(x1, 0x5555555555555555UL) << 32;
@@ -312,7 +312,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Unshuffle2(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitExtract(x, 0x88888888U) << 24
                     |  Bmi2.ParallelBitExtract(x, 0x44444444U) << 16
@@ -337,7 +337,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle2(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitExtract(x, 0x8888888888888888UL) << 48
                     |  Bmi2.X64.ParallelBitExtract(x, 0x4444444444444444UL) << 32
diff --git a/crypto/src/math/raw/Mod.cs b/crypto/src/math/raw/Mod.cs
index f1ca2ebf0..9059e479c 100644
--- a/crypto/src/math/raw/Mod.cs
+++ b/crypto/src/math/raw/Mod.cs
@@ -7,16 +7,21 @@ using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Math.Raw
 {
-    /*
-     * Modular inversion as implemented in this class is based on the paper "Fast constant-time gcd
-     * computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
-     */
-
+    /// <summary>
+    /// Modular inversion as implemented in this class is based on the paper "Fast constant-time gcd computation and
+    /// modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
+    /// </summary>
+    /// <remarks>
+    /// In some cases (when it is faster) we use the "half delta" variant of safegcd based on
+    /// <a href="https://github.com/sipa/safegcd-bounds">hddivsteps</a>. 
+    /// </remarks>
     internal static class Mod
     {
         private const int M30 = 0x3FFFFFFF;
         private const ulong M32UL = 0xFFFFFFFFUL;
 
+        private static readonly int MaxStackAlloc = Platform.Is64BitProcess ? 4096 : 1024;
+
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void CheckedModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
 #else
@@ -66,11 +71,12 @@ namespace Org.BouncyCastle.Math.Raw
             return x;
         }
 
-        public static uint ModOddInverse(uint[] m, uint[] x, uint[] z)
-        {
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            return ModOddInverse(m.AsSpan(), x.AsSpan(), z.AsSpan());
+        public static uint ModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
 #else
+        public static uint ModOddInverse(uint[] m, uint[] x, uint[] z)
+#endif
+        {
             int len32 = m.Length;
             Debug.Assert(len32 > 0);
             Debug.Assert((m[0] & 1) != 0);
@@ -79,25 +85,45 @@ namespace Org.BouncyCastle.Math.Raw
             int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
             int len30 = (bits + 29) / 30;
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 5;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
+
+            Span<int> t = stackalloc int[4];
+            Span<int> D = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> E = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> F = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> G = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> M = alloc[..len30];
+#else
             int[] t = new int[4];
             int[] D = new int[len30];
             int[] E = new int[len30];
             int[] F = new int[len30];
             int[] G = new int[len30];
             int[] M = new int[len30];
+#endif
 
             E[0] = 1;
-            Encode30(bits, x, 0, G, 0);
-            Encode30(bits, m, 0, M, 0);
+            Encode30(bits, x, G);
+            Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            M.CopyTo(F);
+#else
             Array.Copy(M, 0, F, 0, len30);
+#endif
 
-            int delta = 0;
+            // We use the "half delta" variant here, with theta == delta - 1/2
+            int theta = 0;
             int m0Inv32 = (int)Inverse32((uint)M[0]);
-            int maxDivsteps = GetMaximumDivsteps(bits);
+            int maxDivsteps = GetMaximumHDDivsteps(bits);
 
             for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30)
             {
-                delta = Divsteps30(delta, F[0], G[0], t);
+                theta = HDDivsteps30(theta, F[0], G[0], t);
                 UpdateDE30(len30, D, E, t, m0Inv32, M);
                 UpdateFG30(len30, F, G, t);
             }
@@ -107,15 +133,17 @@ namespace Org.BouncyCastle.Math.Raw
 
             CNormalize30(len30, signF, D, M);
 
-            Decode30(bits, D, 0, z, 0);
+            Decode30(bits, D, z);
             Debug.Assert(0 != Nat.LessThan(m.Length, z, m));
 
-            return (uint)(EqualTo(len30, F, 1) & EqualToZero(len30, G));
-#endif
+            return (uint)(EqualTo(len30, F, 1) & EqualTo(len30, G, 0));
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint ModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
+        public static bool ModOddInverseVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
+#else
+        public static bool ModOddInverseVar(uint[] m, uint[] x, uint[] z)
+#endif
         {
             int len32 = m.Length;
             Debug.Assert(len32 > 0);
@@ -125,9 +153,14 @@ namespace Org.BouncyCastle.Math.Raw
             int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
             int len30 = (bits + 29) / 30;
 
-            Span<int> alloc = len30 <= 50
-                ? stackalloc int[len30 * 5]
-                : new int[len30 * 5];
+            int clz = bits - Nat.GetBitLength(len32, x);
+            Debug.Assert(clz >= 0);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 5;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
 
             Span<int> t = stackalloc int[4];
             Span<int> D = alloc[..len30]; alloc = alloc[len30..];
@@ -135,68 +168,34 @@ namespace Org.BouncyCastle.Math.Raw
             Span<int> F = alloc[..len30]; alloc = alloc[len30..];
             Span<int> G = alloc[..len30]; alloc = alloc[len30..];
             Span<int> M = alloc[..len30];
-
-            E[0] = 1;
-            Encode30(bits, x, G);
-            Encode30(bits, m, M);
-            M.CopyTo(F);
-
-            int delta = 0;
-            int m0Inv32 = (int)Inverse32((uint)M[0]);
-            int maxDivsteps = GetMaximumDivsteps(bits);
-
-            for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30)
-            {
-                delta = Divsteps30(delta, F[0], G[0], t);
-                UpdateDE30(len30, D, E, t, m0Inv32, M);
-                UpdateFG30(len30, F, G, t);
-            }
-
-            int signF = F[len30 - 1] >> 31;
-            CNegate30(len30, signF, F);
-
-            CNormalize30(len30, signF, D, M);
-
-            Decode30(bits, D, z);
-            Debug.Assert(0 != Nat.LessThan(m.Length, z, m));
-
-            return (uint)(EqualTo(len30, F, 1) & EqualToZero(len30, G));
-        }
-#endif
-
-        public static bool ModOddInverseVar(uint[] m, uint[] x, uint[] z)
-        {
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            return ModOddInverseVar(m.AsSpan(), x.AsSpan(), z.AsSpan());
 #else
-            int len32 = m.Length;
-            Debug.Assert(len32 > 0);
-            Debug.Assert((m[0] & 1) != 0);
-            Debug.Assert(m[len32 - 1] != 0);
-
-            int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
-            int len30 = (bits + 29) / 30;
-
             int[] t = new int[4];
             int[] D = new int[len30];
             int[] E = new int[len30];
             int[] F = new int[len30];
             int[] G = new int[len30];
             int[] M = new int[len30];
+#endif
 
             E[0] = 1;
-            Encode30(bits, x, 0, G, 0);
-            Encode30(bits, m, 0, M, 0);
+            Encode30(bits, x, G);
+            Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            M.CopyTo(F);
+#else
             Array.Copy(M, 0, F, 0, len30);
+#endif
 
-            int clzG = Integers.NumberOfLeadingZeros(G[len30 - 1] | 1) - (len30 * 30 + 2 - bits);
-            int eta = -1 - clzG;
+            // We use the original safegcd here, with eta == 1 - delta
+            // For shorter x, configure as if low zeros of x had been shifted away by divsteps
+            int eta = -clz;
             int lenDE = len30, lenFG = len30;
             int m0Inv32 = (int)Inverse32((uint)M[0]);
             int maxDivsteps = GetMaximumDivsteps(bits);
 
-            int divsteps = 0;
-            while (!EqualToZeroVar_Unlikely(lenFG, G))
+            int divsteps = clz;
+            while (!EqualToVar(lenFG, G, 0))
             {
                 if (divsteps >= maxDivsteps)
                     return false;
@@ -206,20 +205,7 @@ namespace Org.BouncyCastle.Math.Raw
                 eta = Divsteps30Var(eta, F[0], G[0], t);
                 UpdateDE30(lenDE, D, E, t, m0Inv32, M);
                 UpdateFG30(lenFG, F, G, t);
-
-                int fn = F[lenFG - 1];
-                int gn = G[lenFG - 1];
-
-                int cond = (lenFG - 2) >> 31;
-                cond |= fn ^ (fn >> 31);
-                cond |= gn ^ (gn >> 31);
-
-                if (cond == 0)
-                {
-                    F[lenFG - 2] |= fn << 30;
-                    G[lenFG - 2] |= gn << 30;
-                    --lenFG;
-                }
+                lenFG = TrimFG30Var(lenFG, F, G);
             }
 
             int signF = F[lenFG - 1] >> 31;
@@ -241,7 +227,7 @@ namespace Org.BouncyCastle.Math.Raw
             }
             Debug.Assert(0 == signF);
 
-            if (!EqualToOneVar_Expected(lenFG, F))
+            if (!EqualToVar(lenFG, F, 1))
                 return false;
 
             if (signD < 0)
@@ -250,15 +236,73 @@ namespace Org.BouncyCastle.Math.Raw
             }
             Debug.Assert(0 == signD);
 
-            Decode30(bits, D, 0, z, 0);
+            Decode30(bits, D, z);
             Debug.Assert(!Nat.Gte(m.Length, z, m));
 
             return true;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint ModOddIsCoprime(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x)
+#else
+        public static uint ModOddIsCoprime(uint[] m, uint[] x)
+#endif
+        {
+            int len32 = m.Length;
+            Debug.Assert(len32 > 0);
+            Debug.Assert((m[0] & 1) != 0);
+            Debug.Assert(m[len32 - 1] != 0);
+
+            int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
+            int len30 = (bits + 29) / 30;
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 3;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
+
+            Span<int> t = stackalloc int[4];
+            Span<int> F = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> G = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> M = alloc[..len30];
+#else
+            int[] t = new int[4];
+            int[] F = new int[len30];
+            int[] G = new int[len30];
+            int[] M = new int[len30];
 #endif
+
+            Encode30(bits, x, G);
+            Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            M.CopyTo(F);
+#else
+            Array.Copy(M, 0, F, 0, len30);
+#endif
+
+            // We use the "half delta" variant here, with theta == delta - 1/2
+            int theta = 0;
+            int maxDivsteps = GetMaximumHDDivsteps(bits);
+
+            for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30)
+            {
+                theta = HDDivsteps30(theta, F[0], G[0], t);
+                UpdateFG30(len30, F, G, t);
+            }
+
+            int signF = F[len30 - 1] >> 31;
+            CNegate30(len30, signF, F);
+
+            return (uint)(EqualTo(len30, F, 1) & EqualTo(len30, G, 0));
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static bool ModOddInverseVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
+        public static bool ModOddIsCoprimeVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x)
+#else
+        public static bool ModOddIsCoprimeVar(uint[] m, uint[] x)
+#endif
         {
             int len32 = m.Length;
             Debug.Assert(len32 > 0);
@@ -268,30 +312,43 @@ namespace Org.BouncyCastle.Math.Raw
             int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
             int len30 = (bits + 29) / 30;
 
-            Span<int> alloc = len30 <= 50
-                ? stackalloc int[len30 * 5]
-                : new int[len30 * 5];
+            int clz = bits - Nat.GetBitLength(len32, x);
+            Debug.Assert(clz >= 0);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 3;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
 
             Span<int> t = stackalloc int[4];
-            Span<int> D = alloc[..len30]; alloc = alloc[len30..];
-            Span<int> E = alloc[..len30]; alloc = alloc[len30..];
             Span<int> F = alloc[..len30]; alloc = alloc[len30..];
             Span<int> G = alloc[..len30]; alloc = alloc[len30..];
             Span<int> M = alloc[..len30];
+#else
+            int[] t = new int[4];
+            int[] F = new int[len30];
+            int[] G = new int[len30];
+            int[] M = new int[len30];
+#endif
 
-            E[0] = 1;
             Encode30(bits, x, G);
             Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
             M.CopyTo(F);
+#else
+            Array.Copy(M, 0, F, 0, len30);
+#endif
 
-            int clzG = Integers.NumberOfLeadingZeros(G[len30 - 1] | 1) - (len30 * 30 + 2 - bits);
-            int eta = -1 - clzG;
-            int lenDE = len30, lenFG = len30;
-            int m0Inv32 = (int)Inverse32((uint)M[0]);
+            // We use the original safegcd here, with eta == 1 - delta
+            // For shorter x, configure as if low zeros of x had been shifted away by divsteps
+            int eta = -clz;
+            int lenFG = len30;
             int maxDivsteps = GetMaximumDivsteps(bits);
 
-            int divsteps = 0;
-            while (!EqualToZeroVar_Unlikely(lenFG, G))
+            int divsteps = clz;
+            while (!EqualToVar(lenFG, G, 0))
             {
                 if (divsteps >= maxDivsteps)
                     return false;
@@ -299,58 +356,19 @@ namespace Org.BouncyCastle.Math.Raw
                 divsteps += 30;
 
                 eta = Divsteps30Var(eta, F[0], G[0], t);
-                UpdateDE30(lenDE, D, E, t, m0Inv32, M);
                 UpdateFG30(lenFG, F, G, t);
-
-                int fn = F[lenFG - 1];
-                int gn = G[lenFG - 1];
-
-                int cond = (lenFG - 2) >> 31;
-                cond |= fn ^ (fn >> 31);
-                cond |= gn ^ (gn >> 31);
-
-                if (cond == 0)
-                {
-                    F[lenFG - 2] |= fn << 30;
-                    G[lenFG - 2] |= gn << 30;
-                    --lenFG;
-                }
+                lenFG = TrimFG30Var(lenFG, F, G);
             }
 
             int signF = F[lenFG - 1] >> 31;
-
-            /*
-             * D is in the range (-2.M, M). First, conditionally add M if D is negative, to bring it
-             * into the range (-M, M). Then normalize by conditionally negating (according to signF)
-             * and/or then adding M, to bring it into the range [0, M).
-             */
-            int signD = D[lenDE - 1] >> 31;
-            if (signD < 0)
-            {
-                signD = Add30(lenDE, D, M);
-            }
             if (signF < 0)
             {
-                signD = Negate30(lenDE, D);
                 signF = Negate30(lenFG, F);
             }
             Debug.Assert(0 == signF);
 
-            if (!EqualToOneVar_Expected(lenFG, F))
-                return false;
-
-            if (signD < 0)
-            {
-                signD = Add30(lenDE, D, M);
-            }
-            Debug.Assert(0 == signD);
-
-            Decode30(bits, D, z);
-            Debug.Assert(!Nat.Gte(m.Length, z, m));
-
-            return true;
+            return EqualToVar(lenFG, F, 1);
         }
-#endif
 
         public static uint[] Random(SecureRandom random, uint[] p)
         {
@@ -392,9 +410,10 @@ namespace Org.BouncyCastle.Math.Raw
             m |= m >> 8;
             m |= m >> 16;
 
-            Span<byte> bytes = len <= 256
-                ? stackalloc byte[len << 2]
-                : new byte[len << 2];
+            int allocSize = len * Integers.NumBytes;
+            Span<byte> bytes = allocSize <= MaxStackAlloc
+                ? stackalloc byte[allocSize]
+                : new byte[allocSize];
 
             do
             {
@@ -496,34 +515,16 @@ namespace Org.BouncyCastle.Math.Raw
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static void Decode30(int bits, ReadOnlySpan<int> x, Span<uint> z)
-        {
-            Debug.Assert(bits > 0);
-
-            int avail = 0;
-            ulong data = 0L;
-
-            int xOff = 0, zOff = 0;
-            while (bits > 0)
-            {
-                while (avail < System.Math.Min(32, bits))
-                {
-                    data |= (ulong)x[xOff++] << avail;
-                    avail += 30;
-                }
-
-                z[zOff++] = (uint)data; data >>= 32;
-                avail -= 32;
-                bits -= 32;
-            }
-        }
 #else
-        private static void Decode30(int bits, int[] x, int xOff, uint[] z, int zOff)
+        private static void Decode30(int bits, int[] x, uint[] z)
+#endif
         {
             Debug.Assert(bits > 0);
 
             int avail = 0;
-            ulong data = 0L;
+            ulong data = 0UL;
 
+            int xOff = 0, zOff = 0;
             while (bits > 0)
             {
                 while (avail < System.Math.Min(32, bits))
@@ -537,53 +538,6 @@ namespace Org.BouncyCastle.Math.Raw
                 bits -= 32;
             }
         }
-#endif
-
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static int Divsteps30(int delta, int f0, int g0, Span<int> t)
-#else
-        private static int Divsteps30(int delta, int f0, int g0, int[] t)
-#endif
-        {
-            int u = 1 << 30, v = 0, q = 0, r = 1 << 30;
-            int f = f0, g = g0;
-
-            for (int i = 0; i < 30; ++i)
-            {
-                Debug.Assert((f & 1) == 1);
-                Debug.Assert(((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0) == f << i);
-                Debug.Assert(((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0) == g << i);
-
-                int c1 = delta >> 31;
-                int c2 = -(g & 1);
-
-                int x = f ^ c1;
-                int y = u ^ c1;
-                int z = v ^ c1;
-
-                g -= x & c2;
-                q -= y & c2;
-                r -= z & c2;
-
-                c2 &= ~c1;
-                delta = (delta ^ c2) - (c2 - 1);
-
-                f += g & c2;
-                u += q & c2;
-                v += r & c2;
-
-                g >>= 1;
-                q >>= 1;
-                r >>= 1;
-            }
-
-            t[0] = u;
-            t[1] = v;
-            t[2] = q;
-            t[3] = r;
-
-            return delta;
-        }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static int Divsteps30Var(int eta, int f0, int g0, Span<int> t)
@@ -595,7 +549,7 @@ namespace Org.BouncyCastle.Math.Raw
             int f = f0, g = g0, m, w, x, y, z;
             int i = 30, limit, zeros;
 
-            for (; ; )
+            for (;;)
             {
                 // Use a sentinel bit to count zeros only up to i.
                 zeros = Integers.NumberOfTrailingZeros(g | (-1 << i));
@@ -614,15 +568,15 @@ namespace Org.BouncyCastle.Math.Raw
                 Debug.Assert((u * f0 + v * g0) == f << (30 - i));
                 Debug.Assert((q * f0 + r * g0) == g << (30 - i));
 
-                if (eta < 0)
+                if (eta <= 0)
                 {
-                    eta = -eta;
+                    eta = 2 - eta;
                     x = f; f = g; g = -x;
                     y = u; u = q; q = -y;
                     z = v; v = r; r = -z;
 
                     // Handle up to 6 divsteps at once, subject to eta and i.
-                    limit = (eta + 1) > i ? i : (eta + 1);
+                    limit = eta > i ? i : eta;
                     m = (int)((uint.MaxValue >> (32 - limit)) & 63U);
 
                     w = (f * g * (f * f - 2)) & m;
@@ -630,11 +584,11 @@ namespace Org.BouncyCastle.Math.Raw
                 else
                 {
                     // Handle up to 4 divsteps at once, subject to eta and i.
-                    limit = (eta + 1) > i ? i : (eta + 1);
+                    limit = eta > i ? i : eta;
                     m = (int)((uint.MaxValue >> (32 - limit)) & 15U);
 
                     w = f + (((f + 1) & 4) << 1);
-                    w = (-w * g) & m;
+                    w = (w * -g) & m;
                 }
 
                 g += f * w;
@@ -654,34 +608,16 @@ namespace Org.BouncyCastle.Math.Raw
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static void Encode30(int bits, ReadOnlySpan<uint> x, Span<int> z)
-        {
-            Debug.Assert(bits > 0);
-
-            int avail = 0;
-            ulong data = 0UL;
-
-            int xOff = 0, zOff = 0;
-            while (bits > 0)
-            {
-                if (avail < System.Math.Min(30, bits))
-                {
-                    data |= (x[xOff++] & M32UL) << avail;
-                    avail += 32;
-                }
-
-                z[zOff++] = (int)data & M30; data >>= 30;
-                avail -= 30;
-                bits -= 30;
-            }
-        }
 #else
-        private static void Encode30(int bits, uint[] x, int xOff, int[] z, int zOff)
+        private static void Encode30(int bits, uint[] x, int[] z)
+#endif
         {
             Debug.Assert(bits > 0);
 
             int avail = 0;
             ulong data = 0UL;
 
+            int xOff = 0, zOff = 0;
             while (bits > 0)
             {
                 if (avail < System.Math.Min(30, bits))
@@ -695,7 +631,6 @@ namespace Org.BouncyCastle.Math.Raw
                 bits -= 30;
             }
         }
-#endif
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static int EqualTo(int len, ReadOnlySpan<int> x, int y)
@@ -713,12 +648,15 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static bool EqualToOneVar_Expected(int len, ReadOnlySpan<int> x)
+        private static bool EqualToVar(int len, ReadOnlySpan<int> x, int y)
 #else
-        private static bool EqualToOneVar_Expected(int len, int[] x)
+        private static bool EqualToVar(int len, int[] x, int y)
 #endif
         {
-            int d = x[0] ^ 1;
+            int d = x[0] ^ y;
+            if (d != 0)
+                return false;
+
             for (int i = 1; i < len; ++i)
             {
                 d |= x[i];
@@ -726,41 +664,62 @@ namespace Org.BouncyCastle.Math.Raw
             return d == 0;
         }
 
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static int EqualToZero(int len, ReadOnlySpan<int> x)
-#else
-        private static int EqualToZero(int len, int[] x)
-#endif
+        private static int GetMaximumDivsteps(int bits)
         {
-            int d = 0;
-            for (int i = 0; i < len; ++i)
-            {
-                d |= x[i];
-            }
-            d = (int)((uint)d >> 1) | (d & 1);
-            return (d - 1) >> 31;
+            //return (49 * bits + (bits < 46 ? 80 : 47)) / 17;
+            return (int)((188898L * bits + (bits < 46 ? 308405 : 181188)) >> 16);
+        }
+
+        private static int GetMaximumHDDivsteps(int bits)
+        {
+            //return (int)((45907L * bits + 30179) / 19929);
+            return (int)((150964L * bits + 99243) >> 16);
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static bool EqualToZeroVar_Unlikely(int len, ReadOnlySpan<int> x)
+        private static int HDDivsteps30(int theta, int f0, int g0, Span<int> t)
 #else
-        private static bool EqualToZeroVar_Unlikely(int len, int[] x)
+        private static int HDDivsteps30(int theta, int f0, int g0, int[] t)
 #endif
         {
-            int d = x[0];
-            if (d != 0)
-                return false;
+            int u = 1 << 30, v = 0, q = 0, r = 1 << 30;
+            int f = f0, g = g0;
 
-            for (int i = 1; i < len; ++i)
+            for (int i = 0; i < 30; ++i)
             {
-                d |= x[i];
+                Debug.Assert((f & 1) == 1);
+                Debug.Assert(((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0) == f << i);
+                Debug.Assert(((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0) == g << i);
+
+                int c1 = theta >> 31;
+                int c2 = -(g & 1);
+
+                int x = f ^ c1;
+                int y = u ^ c1;
+                int z = v ^ c1;
+
+                g -= x & c2;
+                q -= y & c2;
+                r -= z & c2;
+
+                int c3 = c2 & ~c1;
+                theta = (theta ^ c3) + 1;
+
+                f += g & c3;
+                u += q & c3;
+                v += r & c3;
+
+                g >>= 1;
+                q >>= 1;
+                r >>= 1;
             }
-            return d == 0;
-        }
 
-        private static int GetMaximumDivsteps(int bits)
-        {
-            return (49 * bits + (bits < 46 ? 80 : 47)) / 17;
+            t[0] = u;
+            t[1] = v;
+            t[2] = q;
+            t[3] = r;
+
+            return theta;
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -784,6 +743,33 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static int TrimFG30Var(int len30, Span<int> F, Span<int> G)
+#else
+        private static int TrimFG30Var(int len30, int[] F, int[] G)
+#endif
+        {
+            Debug.Assert(len30 > 0);
+            Debug.Assert(F.Length >= len30);
+            Debug.Assert(G.Length >= len30);
+
+            int fn = F[len30 - 1];
+            int gn = G[len30 - 1];
+
+            int cond = (len30 - 2) >> 31;
+            cond |= fn ^ (fn >> 31);
+            cond |= gn ^ (gn >> 31);
+
+            if (cond == 0)
+            {
+                F[len30 - 2] |= fn << 30;
+                G[len30 - 2] |= gn << 30;
+                --len30;
+            }
+
+            return len30;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static void UpdateDE30(int len30, Span<int> D, Span<int> E, ReadOnlySpan<int> t, int m0Inv32,
             ReadOnlySpan<int> M)
 #else
diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
index d748e04c5..b524750d8 100644
--- a/crypto/src/math/raw/Nat.cs
+++ b/crypto/src/math/raw/Nat.cs
@@ -6,6 +6,7 @@ using System.Runtime.InteropServices;
 #endif
 
 using Org.BouncyCastle.Crypto.Utilities;
+using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Math.Raw
 {
@@ -400,6 +401,36 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+        public static uint CAddTo(int len, int mask, uint[] x, uint[] z)
+        {
+            uint MASK = (uint)-(mask & 1);
+
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)z[i] + (x[i] & MASK);
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint CAddTo(int len, int mask, ReadOnlySpan<uint> x, Span<uint> z)
+        {
+            uint MASK = (uint)-(mask & 1);
+
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)z[i] + (x[i] & MASK);
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+#endif
+
         public static void CMov(int len, int mask, uint[] x, int xOff, uint[] z, int zOff)
         {
             uint MASK = (uint)-(mask & 1);
@@ -712,7 +743,11 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint EqualTo(int len, ReadOnlySpan<uint> x, uint y)
+#else
         public static uint EqualTo(int len, uint[] x, uint y)
+#endif
         {
             uint d = x[0] ^ y;
             for (int i = 1; i < len; ++i)
@@ -735,19 +770,10 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint EqualTo(int len, ReadOnlySpan<uint> x, uint y)
-        {
-            uint d = x[0] ^ y;
-            for (int i = 1; i < len; ++i)
-            {
-                d |= x[i];
-            }
-            d = (d >> 1) | (d & 1);
-            return (uint)(((int)d - 1) >> 31);
-        }
-#endif
-
+        public static uint EqualTo(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y)
+#else
         public static uint EqualTo(int len, uint[] x, uint[] y)
+#endif
         {
             uint d = 0;
             for (int i = 0; i < len; ++i)
@@ -769,20 +795,12 @@ namespace Org.BouncyCastle.Math.Raw
             return (uint)(((int)d - 1) >> 31);
         }
 
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint EqualTo(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y)
-        {
-            uint d = 0;
-            for (int i = 0; i < len; ++i)
-            {
-                d |= x[i] ^ y[i];
-            }
-            d = (d >> 1) | (d & 1);
-            return (uint)(((int)d - 1) >> 31);
-        }
-#endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint EqualToZero(int len, ReadOnlySpan<uint> x)
+#else
         public static uint EqualToZero(int len, uint[] x)
+#endif
         {
             uint d = 0;
             for (int i = 0; i < len; ++i)
@@ -804,19 +822,6 @@ namespace Org.BouncyCastle.Math.Raw
             return (uint)(((int)d - 1) >> 31);
         }
 
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint EqualToZero(int len, ReadOnlySpan<uint> x)
-        {
-            uint d = 0;
-            for (int i = 0; i < len; ++i)
-            {
-                d |= x[i];
-            }
-            d = (d >> 1) | (d & 1);
-            return (uint)(((int)d - 1) >> 31);
-        }
-#endif
-
         public static uint[] FromBigInteger(int bits, BigInteger x)
         {
             if (x.SignValue < 0 || x.BitLength > bits)
@@ -924,6 +929,32 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static int GetBitLength(int len, ReadOnlySpan<uint> x)
+#else
+        public static int GetBitLength(int len, uint[] x)
+#endif
+        {
+            for (int i = len - 1; i >= 0; --i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                    return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x_i);
+            }
+            return 0;
+        }
+
+        public static int GetBitLength(int len, uint[] x, int xOff)
+        {
+            for (int i = len - 1; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i];
+                if (x_i != 0)
+                    return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x_i);
+            }
+            return 0;
+        }
+
         public static int GetLengthForBits(int bits)
         {
             if (bits < 1)
diff --git a/crypto/src/math/raw/Nat256.cs b/crypto/src/math/raw/Nat256.cs
index 59039d3fa..49adf04af 100644
--- a/crypto/src/math/raw/Nat256.cs
+++ b/crypto/src/math/raw/Nat256.cs
@@ -1865,7 +1865,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
@@ -1880,7 +1881,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs
index 56fa9a2c9..71b53214c 100644
--- a/crypto/src/math/raw/Nat512.cs
+++ b/crypto/src/math/raw/Nat512.cs
@@ -67,7 +67,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Y = MemoryMarshal.AsBytes(y[..16]);
@@ -87,7 +88,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Y = MemoryMarshal.AsBytes(y[..16]);
@@ -145,7 +147,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Z = MemoryMarshal.AsBytes(z[..16]);
@@ -164,7 +167,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Z = MemoryMarshal.AsBytes(z[..16]);
@@ -221,7 +225,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
@@ -241,7 +246,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
@@ -299,7 +305,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Z = MemoryMarshal.AsBytes(z[..8]);
@@ -318,7 +325,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Z = MemoryMarshal.AsBytes(z[..8]);