39 files changed, 1367 insertions, 599 deletions
diff --git a/crypto/src/math/BigInteger.cs b/crypto/src/math/BigInteger.cs
index 7da886c4f..42b5b5089 100644
--- a/crypto/src/math/BigInteger.cs
+++ b/crypto/src/math/BigInteger.cs
@@ -139,6 +139,8 @@ namespace Org.BouncyCastle.Math
         public static readonly BigInteger Two;
         public static readonly BigInteger Three;
         public static readonly BigInteger Four;
+        public static readonly BigInteger Five;
+        public static readonly BigInteger Six;
         public static readonly BigInteger Ten;
 
 #if !NETCOREAPP3_0_OR_GREATER
@@ -181,27 +183,34 @@ namespace Org.BouncyCastle.Math
         static BigInteger()
         {
             Zero = new BigInteger(0, ZeroMagnitude, false);
-            Zero.nBits = 0; Zero.nBitLength = 0;
+            Zero.nBits = 0;
+            Zero.nBitLength = 0;
 
             SMALL_CONSTANTS[0] = Zero;
             for (uint i = 1; i < SMALL_CONSTANTS.Length; ++i)
             {
-                SMALL_CONSTANTS[i] = CreateUValueOf(i);
+                var sc = CreateUValueOf(i);
+                sc.nBits = Integers.PopCount(i);
+                sc.nBitLength = BitLen(i);
+
+                SMALL_CONSTANTS[i] = sc;
             }
 
             One = SMALL_CONSTANTS[1];
             Two = SMALL_CONSTANTS[2];
             Three = SMALL_CONSTANTS[3];
             Four = SMALL_CONSTANTS[4];
+            Five = SMALL_CONSTANTS[5];
+            Six = SMALL_CONSTANTS[6];
             Ten = SMALL_CONSTANTS[10];
 
-            radix2 = ValueOf(2);
+            radix2 = Two;
             radix2E = radix2.Pow(chunk2);
 
             radix8 = ValueOf(8);
             radix8E = radix8.Pow(chunk8);
 
-            radix10 = ValueOf(10);
+            radix10 = Ten;
             radix10E = radix10.Pow(chunk10);
 
             radix16 = ValueOf(16);
@@ -1171,7 +1180,7 @@ namespace Org.BouncyCastle.Math
                 ?  1
                 :  sign == 0
                 ?  0
-                :  sign * CompareNoLeadingZeroes(0, magnitude, 0, other.magnitude);
+                :  sign * CompareNoLeadingZeros(0, magnitude, 0, other.magnitude);
         }
 
         /**
@@ -1190,10 +1199,10 @@ namespace Org.BouncyCastle.Math
                 yIndx++;
             }
 
-            return CompareNoLeadingZeroes(xIndx, x, yIndx, y);
+            return CompareNoLeadingZeros(xIndx, x, yIndx, y);
         }
 
-        private static int CompareNoLeadingZeroes(int xIndx, uint[] x, int yIndx, uint[] y)
+        private static int CompareNoLeadingZeros(int xIndx, uint[] x, int yIndx, uint[] y)
         {
             int diff = (x.Length - y.Length) - (xIndx - yIndx);
 
@@ -1234,7 +1243,7 @@ namespace Org.BouncyCastle.Math
 
             Debug.Assert(yStart < y.Length);
 
-            int xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y);
+            int xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y);
             uint[] count;
 
             if (xyCmp > 0)
@@ -1271,7 +1280,7 @@ namespace Org.BouncyCastle.Math
                 for (;;)
                 {
                     if (cBitLength < xBitLength
-                        || CompareNoLeadingZeroes(xStart, x, cStart, c) >= 0)
+                        || CompareNoLeadingZeros(xStart, x, cStart, c) >= 0)
                     {
                         Subtract(xStart, x, cStart, c);
                         AddMagnitudes(count, iCount);
@@ -1289,7 +1298,7 @@ namespace Org.BouncyCastle.Math
                             if (xBitLength < yBitLength)
                                 return count;
 
-                            xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y);
+                            xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y);
 
                             if (xyCmp <= 0)
                                 break;
@@ -1623,6 +1632,8 @@ namespace Org.BouncyCastle.Math
             BigInteger montRadix = One.ShiftLeft(32 * n.magnitude.Length).Remainder(n);
             BigInteger minusMontRadix = n.Subtract(montRadix);
 
+            uint[] yAccum = new uint[n.magnitude.Length + 1];
+
             do
             {
                 BigInteger a;
@@ -1633,7 +1644,7 @@ namespace Org.BouncyCastle.Math
                 while (a.sign == 0 || a.CompareTo(n) >= 0
                     || a.IsEqualMagnitude(montRadix) || a.IsEqualMagnitude(minusMontRadix));
 
-                BigInteger y = ModPowMonty(a, r, n, false);
+                BigInteger y = ModPowMonty(yAccum, a, r, n, false);
 
                 if (!y.Equals(montRadix))
                 {
@@ -1643,7 +1654,7 @@ namespace Org.BouncyCastle.Math
                         if (++j == s)
                             return false;
 
-                        y = ModPowMonty(y, Two, n, false);
+                        y = ModSquareMonty(yAccum, y, n);
 
                         if (y.Equals(montRadix))
                             return false;
@@ -1725,12 +1736,12 @@ namespace Org.BouncyCastle.Math
 //				for (;;)
 //				{
 //					// While F is even, do F=F/u, C=C*u, k=k+1.
-//					int zeroes = F.GetLowestSetBit();
-//					if (zeroes > 0)
+//					int zeros = F.GetLowestSetBit();
+//					if (zeros > 0)
 //					{
-//						F = F.ShiftRight(zeroes);
-//						C = C.ShiftLeft(zeroes);
-//						k += zeroes;
+//						F = F.ShiftRight(zeros);
+//						C = C.ShiftLeft(zeros);
+//						k += zeros;
 //					}
 //
 //					// If F = 1, then return B,k.
@@ -1891,7 +1902,8 @@ namespace Org.BouncyCastle.Math
                 }
                 else
                 {
-                    result = ModPowMonty(result, e, m, true);
+                    uint[] yAccum = new uint[m.magnitude.Length + 1];
+                    result = ModPowMonty(yAccum, result, e, m, true);
                 }
             }
 
@@ -1925,17 +1937,17 @@ namespace Org.BouncyCastle.Math
                 oddPowers[i] = ReduceBarrett(oddPowers[i - 1].Multiply(b2), m, mr, yu);
             }
 
-            int[] windowList = GetWindowList(e.magnitude, extraBits);
+            uint[] windowList = GetWindowList(e.magnitude, extraBits);
             Debug.Assert(windowList.Length > 0);
 
-            int window = windowList[0];
-            int mult = window & 0xFF, lastZeroes = window >> 8;
+            uint window = windowList[0];
+            uint mult = window & 0xFFU, lastZeros = window >> 8;
 
             BigInteger y;
             if (mult == 1)
             {
                 y = b2;
-                --lastZeroes;
+                --lastZeros;
             }
             else
             {
@@ -1943,11 +1955,11 @@ namespace Org.BouncyCastle.Math
             }
 
             int windowPos = 1;
-            while ((window = windowList[windowPos++]) != -1)
+            while ((window = windowList[windowPos++]) != uint.MaxValue)
             {
                 mult = window & 0xFF;
 
-                int bits = lastZeroes + BitLen((byte)mult);
+                int bits = (int)lastZeros + BitLen((byte)mult);
                 for (int j = 0; j < bits; ++j)
                 {
                     y = ReduceBarrett(y.Square(), m, mr, yu);
@@ -1955,10 +1967,10 @@ namespace Org.BouncyCastle.Math
 
                 y = ReduceBarrett(y.Multiply(oddPowers[mult >> 1]), m, mr, yu);
 
-                lastZeroes = window >> 8;
+                lastZeros = window >> 8;
             }
 
-            for (int i = 0; i < lastZeroes; ++i)
+            for (int i = 0; i < lastZeros; ++i)
             {
                 y = ReduceBarrett(y.Square(), m, mr, yu);
             }
@@ -1999,7 +2011,7 @@ namespace Org.BouncyCastle.Math
             return x;
         }
 
-        private static BigInteger ModPowMonty(BigInteger b, BigInteger e, BigInteger m, bool convert)
+        private static BigInteger ModPowMonty(uint[] yAccum, BigInteger b, BigInteger e, BigInteger m, bool convert)
         {
             int n = m.magnitude.Length;
             int powR = 32 * n;
@@ -2012,7 +2024,7 @@ namespace Org.BouncyCastle.Math
                 b = b.ShiftLeft(powR).Remainder(m);
             }
 
-            uint[] yAccum = new uint[n + 1];
+            Debug.Assert(yAccum.Length == n + 1);
 
             uint[] zVal = b.magnitude;
             Debug.Assert(zVal.Length <= n);
@@ -2050,17 +2062,17 @@ namespace Org.BouncyCastle.Math
                 MultiplyMonty(yAccum, oddPowers[i], zSquared, m.magnitude, mDash, smallMontyModulus);
             }
 
-            int[] windowList = GetWindowList(e.magnitude, extraBits);
+            uint[] windowList = GetWindowList(e.magnitude, extraBits);
             Debug.Assert(windowList.Length > 1);
 
-            int window = windowList[0];
-            int mult = window & 0xFF, lastZeroes = window >> 8;
+            uint window = windowList[0];
+            uint mult = window & 0xFF, lastZeros = window >> 8;
 
             uint[] yVal;
             if (mult == 1)
             {
                 yVal = zSquared;
-                --lastZeroes;
+                --lastZeros;
             }
             else
             {
@@ -2068,11 +2080,11 @@ namespace Org.BouncyCastle.Math
             }
 
             int windowPos = 1;
-            while ((window = windowList[windowPos++]) != -1)
+            while ((window = windowList[windowPos++]) != uint.MaxValue)
             {
                 mult = window & 0xFF;
 
-                int bits = lastZeroes + BitLen((byte)mult);
+                int bits = (int)lastZeros + BitLen((byte)mult);
                 for (int j = 0; j < bits; ++j)
                 {
                     SquareMonty(yAccum, yVal, m.magnitude, mDash, smallMontyModulus);
@@ -2080,10 +2092,10 @@ namespace Org.BouncyCastle.Math
 
                 MultiplyMonty(yAccum, yVal, oddPowers[mult >> 1], m.magnitude, mDash, smallMontyModulus);
 
-                lastZeroes = window >> 8;
+                lastZeros = window >> 8;
             }
 
-            for (int i = 0; i < lastZeroes; ++i)
+            for (int i = 0; i < lastZeros; ++i)
             {
                 SquareMonty(yAccum, yVal, m.magnitude, mDash, smallMontyModulus);
             }
@@ -2101,22 +2113,49 @@ namespace Org.BouncyCastle.Math
             return new BigInteger(1, yVal, true);
         }
 
-        private static int[] GetWindowList(uint[] mag, int extraBits)
+        private static BigInteger ModSquareMonty(uint[] yAccum, BigInteger b, BigInteger m)
+        {
+            int n = m.magnitude.Length;
+            int powR = 32 * n;
+            bool smallMontyModulus = m.BitLength + 2 <= powR;
+            uint mDash = m.GetMQuote();
+
+            Debug.Assert(yAccum.Length == n + 1);
+
+            uint[] zVal = b.magnitude;
+            Debug.Assert(zVal.Length <= n);
+
+            uint[] yVal = new uint[n];
+            zVal.CopyTo(yVal, n - zVal.Length);
+
+            SquareMonty(yAccum, yVal, m.magnitude, mDash, smallMontyModulus);
+
+            if (smallMontyModulus && CompareTo(0, yVal, 0, m.magnitude) >= 0)
+            {
+                Subtract(0, yVal, 0, m.magnitude);
+            }
+
+            return new BigInteger(1, yVal, true);
+        }
+
+        private static uint[] GetWindowList(uint[] mag, int extraBits)
         {
-            int v = (int)mag[0];
-            Debug.Assert(v != 0);
+            uint v = mag[0];
+            Debug.Assert(v != 0U);
 
-            int leadingBits = BitLen((uint)v);
+            int leadingBits = BitLen(v);
+            int totalBits = ((mag.Length - 1) << 5) + leadingBits;
 
-            int resultSize = (((mag.Length - 1) << 5) + leadingBits) / (1 + extraBits) + 2;
-            int[] result = new int[resultSize];
+            int resultSize = (totalBits + extraBits) / (1 + extraBits) + 1;
+            uint[] result = new uint[resultSize];
             int resultPos = 0;
 
             int bitPos = 33 - leadingBits;
             v <<= bitPos;
 
-            int mult = 1, multLimit = 1 << extraBits;
-            int zeroes = 0;
+            uint mult = 1U;
+            uint multLimit = 1U << extraBits;
+            uint zeros = 0U;
 
             int i = 0;
             for (;;)
@@ -2125,17 +2164,17 @@ namespace Org.BouncyCastle.Math
                 {
                     if (mult < multLimit)
                     {
-                        mult = (mult << 1) | (int)((uint)v >> 31);
+                        mult = (mult << 1) | (v >> 31);
                     }
-                    else if (v < 0)
+                    else if ((int)v < 0)
                     {
-                        result[resultPos++] = CreateWindowEntry(mult, zeroes);
-                        mult = 1;
-                        zeroes = 0;
+                        result[resultPos++] = CreateWindowEntry(mult, zeros);
+                        mult = 1U;
+                        zeros = 0U;
                     }
                     else
                     {
-                        ++zeroes;
+                        ++zeros;
                     }
 
                     v <<= 1;
@@ -2143,35 +2182,35 @@ namespace Org.BouncyCastle.Math
 
                 if (++i == mag.Length)
                 {
-                    result[resultPos++] = CreateWindowEntry(mult, zeroes);
+                    result[resultPos++] = CreateWindowEntry(mult, zeros);
                     break;
                 }
 
-                v = (int)mag[i];
+                v = mag[i];
                 bitPos = 0;
             }
 
-            result[resultPos] = -1;
+            result[resultPos] = uint.MaxValue; // Sentinel value
             return result;
         }
 
-        private static int CreateWindowEntry(int mult, int zeroes)
+        private static uint CreateWindowEntry(uint mult, uint zeros)
         {
             Debug.Assert(mult > 0);
 
 #if NETCOREAPP3_0_OR_GREATER
             int tz = BitOperations.TrailingZeroCount(mult);
             mult >>= tz;
-            zeroes += tz;
+            zeros += (uint)tz;
 #else
-            while ((mult & 1) == 0)
+            while ((mult & 1U) == 0U)
             {
                 mult >>= 1;
-                ++zeroes;
+                ++zeros;
             }
 #endif
 
-            return mult | (zeroes << 8);
+            return mult | (zeros << 8);
         }
 
         /**
@@ -2682,7 +2721,7 @@ namespace Org.BouncyCastle.Math
 
             Debug.Assert(yStart < y.Length);
 
-            int xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y);
+            int xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y);
 
             if (xyCmp > 0)
             {
@@ -2709,7 +2748,7 @@ namespace Org.BouncyCastle.Math
                 for (;;)
                 {
                     if (cBitLength < xBitLength
-                        || CompareNoLeadingZeroes(xStart, x, cStart, c) >= 0)
+                        || CompareNoLeadingZeros(xStart, x, cStart, c) >= 0)
                     {
                         Subtract(xStart, x, cStart, c);
 
@@ -2726,7 +2765,7 @@ namespace Org.BouncyCastle.Math
                             if (xBitLength < yBitLength)
                                 return x;
 
-                            xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y);
+                            xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y);
 
                             if (xyCmp <= 0)
                                 break;
@@ -2799,7 +2838,7 @@ namespace Org.BouncyCastle.Math
                 }
             }
 
-            if (CompareNoLeadingZeroes(0, magnitude, 0, n.magnitude) < 0)
+            if (CompareNoLeadingZeros(0, magnitude, 0, n.magnitude) < 0)
                 return this;
 
             uint[] result;
@@ -3094,7 +3133,7 @@ namespace Org.BouncyCastle.Math
             if (this.sign != n.sign)
                 return Add(n.Negate());
 
-            int compare = CompareNoLeadingZeroes(0, magnitude, 0, n.magnitude);
+            int compare = CompareNoLeadingZeros(0, magnitude, 0, n.magnitude);
             if (compare == 0)
                 return Zero;
 
@@ -3607,47 +3646,55 @@ namespace Org.BouncyCastle.Math
             sb.Append(s);
         }
 
+        private static BigInteger CreateUValueOf(uint value)
+        {
+            if (value == 0)
+                return Zero;
+
+            return new BigInteger(1, new uint[]{ value }, false);
+        }
+
         private static BigInteger CreateUValueOf(ulong value)
         {
             uint msw = (uint)(value >> 32);
             uint lsw = (uint)value;
 
-            if (msw != 0)
-                return new BigInteger(1, new uint[]{ msw, lsw }, false);
-
-            if (lsw != 0)
-            {
-                BigInteger n = new BigInteger(1, new uint[]{ lsw }, false);
-                // Check for a power of two
-                if ((lsw & -lsw) == lsw)
-                {
-                    n.nBits = 1;
-                }
-                return n;
-            }
+            if (msw == 0)
+                return CreateUValueOf(lsw);
 
-            return Zero;
+            return new BigInteger(1, new uint[]{ msw, lsw }, false);
         }
 
-        private static BigInteger CreateValueOf(long value)
+        public static BigInteger ValueOf(int value)
         {
-            if (value < 0)
+            if (value >= 0)
             {
-                if (value == long.MinValue)
-                    return CreateValueOf(~value).Not();
+                if (value < SMALL_CONSTANTS.Length)
+                    return SMALL_CONSTANTS[value];
 
-                return CreateValueOf(-value).Negate();
+                return CreateUValueOf((uint)value);
             }
 
-            return CreateUValueOf((ulong)value);
+            if (value == int.MinValue)
+                return CreateUValueOf((uint)~value).Not();
+
+            return ValueOf(-value).Negate();
         }
 
         public static BigInteger ValueOf(long value)
         {
-            if (value >= 0 && value < SMALL_CONSTANTS.Length)
-                return SMALL_CONSTANTS[value];
+            if (value >= 0L)
+            {
+                if (value < SMALL_CONSTANTS.Length)
+                    return SMALL_CONSTANTS[value];
+
+                return CreateUValueOf((ulong)value);
+            }
+
+            if (value == long.MinValue)
+                return CreateUValueOf((ulong)~value).Not();
 
-            return CreateValueOf(value);
+            return ValueOf(-value).Negate();
         }
 
         public int GetLowestSetBit()
diff --git a/crypto/src/math/ec/ECCurve.cs b/crypto/src/math/ec/ECCurve.cs
index 624495051..245ca1941 100644
--- a/crypto/src/math/ec/ECCurve.cs
+++ b/crypto/src/math/ec/ECCurve.cs
@@ -1,7 +1,7 @@
 using System;
+using System.Collections.Concurrent;
 using System.Collections.Generic;
 
-using Org.BouncyCastle.Math.EC.Abc;
 using Org.BouncyCastle.Math.EC.Endo;
 using Org.BouncyCastle.Math.EC.Multiplier;
 using Org.BouncyCastle.Math.Field;
@@ -675,40 +675,26 @@ namespace Org.BouncyCastle.Math.EC
     public abstract class AbstractFpCurve
         : ECCurve
     {
-        private static readonly HashSet<BigInteger> KnownQs = new HashSet<BigInteger>();
+        private static readonly ConcurrentDictionary<BigInteger, bool> KnownPrimes =
+            new ConcurrentDictionary<BigInteger, bool>();
 
         protected AbstractFpCurve(BigInteger q)
-            : this(q, false)
+            : this(q, isInternal: false)
         {
         }
 
         internal AbstractFpCurve(BigInteger q, bool isInternal)
             : base(FiniteFields.GetPrimeField(q))
         {
-            if (!isInternal)
+            if (isInternal)
             {
-                bool unknownQ;
-                lock (KnownQs) unknownQ = !KnownQs.Contains(q);
-
-                if (unknownQ)
-                {
-                    int maxBitLength = ImplGetInteger("Org.BouncyCastle.EC.Fp_MaxSize", 1042); // 2 * 521
-                    int certainty = ImplGetInteger("Org.BouncyCastle.EC.Fp_Certainty", 100);
-
-                    int qBitLength = q.BitLength;
-                    if (maxBitLength < qBitLength)
-                        throw new ArgumentException("Fp q value out of range");
-
-                    if (Primes.HasAnySmallFactors(q) ||
-                        !Primes.IsMRProbablePrime(q, SecureRandom.ArbitraryRandom,
-                            ImplGetNumberOfIterations(qBitLength, certainty)))
-                    {
-                        throw new ArgumentException("Fp q value not prime");
-                    }
-                }
+                KnownPrimes.AddOrUpdate(q, true, (key, value) => true);
+            }
+            else if (!KnownPrimes.ContainsKey(q))
+            {
+                ImplCheckQ(q);
+                KnownPrimes.TryAdd(q, false);
             }
-
-            lock (KnownQs) KnownQs.Add(q);
         }
 
         public override bool IsValidFieldElement(BigInteger x)
@@ -761,16 +747,24 @@ namespace Org.BouncyCastle.Math.EC
             return CreateRawPoint(x, y);
         }
 
+        private static void ImplCheckQ(BigInteger q)
+        {
+            int maxBitLength = ImplGetInteger("Org.BouncyCastle.EC.Fp_MaxSize", 1042); // 2 * 521
+            if (q.BitLength > maxBitLength)
+                throw new ArgumentException("Fp q value out of range");
+
+            if (!ImplIsPrime(q))
+                throw new ArgumentException("Fp q value not prime");
+        }
+
         private static int ImplGetInteger(string envVariable, int defaultValue)
         {
-            string v = Platform.GetEnvironmentVariable(envVariable);
-            if (v == null)
-                return defaultValue;
+            string property = Platform.GetEnvironmentVariable(envVariable);
 
-            return int.Parse(v);
+            return int.TryParse(property, out int value) ? value : defaultValue;
         }
 
-        private static int ImplGetNumberOfIterations(int bits, int certainty)
+        private static int ImplGetIterations(int bits, int certainty)
         {
             /*
              * NOTE: We enforce a minimum 'certainty' of 100 for bits >= 1024 (else 80). Where the
@@ -802,6 +796,17 @@ namespace Org.BouncyCastle.Math.EC
             }
         }
 
+        private static bool ImplIsPrime(BigInteger q)
+        {
+            if (Primes.HasAnySmallFactors(q))
+                return false;
+
+            int certainty = ImplGetInteger("Org.BouncyCastle.EC.Fp_Certainty", 100);
+            int iterations = ImplGetIterations(q.BitLength, certainty);
+
+            return Primes.IsMRProbablePrime(q, SecureRandom.ArbitraryRandom, iterations);
+        }
+
         private static BigInteger ImplRandomFieldElement(SecureRandom r, BigInteger p)
         {
             BigInteger x;
@@ -843,15 +848,15 @@ namespace Org.BouncyCastle.Math.EC
         }
 
         public FpCurve(BigInteger q, BigInteger a, BigInteger b, BigInteger order, BigInteger cofactor)
-            : this(q, a, b, order, cofactor, false)
+            : this(q, a, b, order, cofactor, isInternal: false)
         {
         }
 
-        internal FpCurve(BigInteger q, BigInteger a, BigInteger b, BigInteger order, BigInteger cofactor, bool isInternal)
+        internal FpCurve(BigInteger q, BigInteger a, BigInteger b, BigInteger order, BigInteger cofactor,
+            bool isInternal)
             : base(q, isInternal)
         {
             this.m_q = q;
-
             this.m_r = FpFieldElement.CalculateResidue(q);
             this.m_infinity = new FpPoint(this, null, null);
 
@@ -864,7 +869,7 @@ namespace Org.BouncyCastle.Math.EC
 
         internal FpCurve(BigInteger q, BigInteger r, ECFieldElement a, ECFieldElement b, BigInteger order,
             BigInteger cofactor)
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_q = q;
             this.m_r = r;
diff --git a/crypto/src/math/ec/abc/Tnaf.cs b/crypto/src/math/ec/abc/Tnaf.cs
index 88a4eeb96..d8e9b6ae0 100644
--- a/crypto/src/math/ec/abc/Tnaf.cs
+++ b/crypto/src/math/ec/abc/Tnaf.cs
@@ -500,12 +500,12 @@ namespace Org.BouncyCastle.Math.EC.Abc
             {
                 if (mu == 1)
                 {
-                    return BigInteger.ValueOf(6);
+                    return BigInteger.Six;
                 }
                 else
                 {
                     // mu == -1
-                    return BigInteger.ValueOf(10);
+                    return BigInteger.Ten;
                 }
             }
             else
diff --git a/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs b/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs
index 3147ccf98..6456120a6 100644
--- a/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs
+++ b/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.GM
         protected readonly SM2P256V1Point m_infinity;
 
         public SM2P256V1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SM2P256V1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs
index 5fa18d470..b96fa75d0 100644
--- a/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP128R1Point m_infinity;
 
         public SecP128R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP128R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs
index b757659d2..471f7f992 100644
--- a/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP160K1Point m_infinity;
 
         public SecP160K1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP160K1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs
index 3b7e1aa06..491b10fd3 100644
--- a/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP160R1Point m_infinity;
 
         public SecP160R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP160R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs b/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs
index 0f226ad19..97d8b6d00 100644
--- a/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP160R2Point m_infinity;
 
         public SecP160R2Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP160R2Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs
index b9ff71ac8..b4a884e83 100644
--- a/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP192K1Point m_infinity;
 
         public SecP192K1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP192K1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs
index 77524b362..accb5a786 100644
--- a/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP192R1Point m_infinity;
 
         public SecP192R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP192R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs
index 04be47202..7f828bc87 100644
--- a/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP224K1Point m_infinity;
 
         public SecP224K1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP224K1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs
index 8cd2b7272..ca2b876af 100644
--- a/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP224R1Point m_infinity;
 
         public SecP224R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP224R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs
index 804b65d60..391ac7b17 100644
--- a/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP256K1Point m_infinity;
 
         public SecP256K1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP256K1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs
index dd2b964c6..a9d1a4a2b 100644
--- a/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP256R1Point m_infinity;
 
         public SecP256R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP256R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs
index f54dd44c2..4704bb16f 100644
--- a/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP384R1Point m_infinity;
 
         public SecP384R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP384R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs
index a5f4cf957..136af8a1a 100644
--- a/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs
+++ b/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs
@@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected readonly SecP521R1Point m_infinity;
 
         public SecP521R1Curve()
-            : base(q, true)
+            : base(q, isInternal: true)
         {
             this.m_infinity = new SecP521R1Point(this, null, null);
 
diff --git a/crypto/src/math/ec/custom/sec/SecT113Field.cs b/crypto/src/math/ec/custom/sec/SecT113Field.cs
index 596d8070b..2477b9c78 100644
--- a/crypto/src/math/ec/custom/sec/SecT113Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT113Field.cs
@@ -287,7 +287,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var Y01 = Vector128.Create(y[0], y[1]);
@@ -424,7 +425,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 #endif
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
                 zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT131Field.cs b/crypto/src/math/ec/custom/sec/SecT131Field.cs
index 743fa6a5d..49d504afb 100644
--- a/crypto/src/math/ec/custom/sec/SecT131Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT131Field.cs
@@ -324,7 +324,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var X2_ = Vector128.CreateScalar(x[2]);
@@ -620,7 +621,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             zz[4] = Interleave.Expand8to16((byte)x[2]);
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
                 zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT163Field.cs b/crypto/src/math/ec/custom/sec/SecT163Field.cs
index d0f09cd8b..e4fda48ac 100644
--- a/crypto/src/math/ec/custom/sec/SecT163Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT163Field.cs
@@ -335,7 +335,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var X2_ = Vector128.CreateScalar(x[2]);
@@ -623,7 +624,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 #endif
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
                 zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT193Field.cs b/crypto/src/math/ec/custom/sec/SecT193Field.cs
index b610bf554..7eebc29c6 100644
--- a/crypto/src/math/ec/custom/sec/SecT193Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT193Field.cs
@@ -360,7 +360,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var X2_ = Vector128.CreateScalar(x[2]);
@@ -545,7 +545,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             zz[6] = x[3] & M01;
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
                 zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT233Field.cs b/crypto/src/math/ec/custom/sec/SecT233Field.cs
index 00bbc0635..bf7b33139 100644
--- a/crypto/src/math/ec/custom/sec/SecT233Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT233Field.cs
@@ -378,7 +378,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var X23 = Vector128.Create(x[2], x[3]);
@@ -576,7 +577,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 #endif
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 ulong x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
                 zz[7] = Bmi2.X64.ParallelBitDeposit(x3 >> 32, 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT239Field.cs b/crypto/src/math/ec/custom/sec/SecT239Field.cs
index b90867b76..a6f3c9e4d 100644
--- a/crypto/src/math/ec/custom/sec/SecT239Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT239Field.cs
@@ -387,7 +387,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var X23 = Vector128.Create(x[2], x[3]);
@@ -587,7 +588,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 #endif
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 ulong x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
                 zz[7] = Bmi2.X64.ParallelBitDeposit(x3 >> 32, 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT283Field.cs b/crypto/src/math/ec/custom/sec/SecT283Field.cs
index 498a72c81..92f8ea385 100644
--- a/crypto/src/math/ec/custom/sec/SecT283Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT283Field.cs
@@ -386,7 +386,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X01 = Vector128.Create(x[0], x[1]);
                 var X23 = Vector128.Create(x[2], x[3]);
@@ -746,7 +747,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             zz[8] = Interleave.Expand32to64((uint)x[4]);
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
                 zz[6] = Bmi2.X64.ParallelBitDeposit(x[3]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT409Field.cs b/crypto/src/math/ec/custom/sec/SecT409Field.cs
index 6a5afb0dc..a8a39a575 100644
--- a/crypto/src/math/ec/custom/sec/SecT409Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT409Field.cs
@@ -551,7 +551,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             Debug.Assert(y >> 59 == 0);
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled)
             {
                 var X = Vector128.CreateScalar(x);
                 var Y = Vector128.CreateScalar(y);
@@ -607,7 +607,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             zz[12] = Interleave.Expand32to64((uint)x[6]);
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[11] = Bmi2.X64.ParallelBitDeposit(x[5] >> 32, 0x5555555555555555UL);
                 zz[10] = Bmi2.X64.ParallelBitDeposit(x[5]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/custom/sec/SecT571Field.cs b/crypto/src/math/ec/custom/sec/SecT571Field.cs
index e970027a5..47f157dc3 100644
--- a/crypto/src/math/ec/custom/sec/SecT571Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT571Field.cs
@@ -654,7 +654,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 #endif
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled)
             {
                 var X = Vector128.CreateScalar(x);
                 var Y = Vector128.CreateScalar(y);
@@ -711,7 +711,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 #endif
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 zz[17] = Bmi2.X64.ParallelBitDeposit(x[8] >> 32, 0x5555555555555555UL);
                 zz[16] = Bmi2.X64.ParallelBitDeposit(x[8]      , 0x5555555555555555UL);
diff --git a/crypto/src/math/ec/rfc7748/X25519.cs b/crypto/src/math/ec/rfc7748/X25519.cs
index ffddd4376..0b2be4af0 100644
--- a/crypto/src/math/ec/rfc7748/X25519.cs
+++ b/crypto/src/math/ec/rfc7748/X25519.cs
@@ -269,6 +269,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
             ScalarMultBase(k.AsSpan(kOff), r.AsSpan(rOff));
 #else
+            // Equivalent (but much slower)
+            //byte[] u = new byte[PointSize];
+            //u[0] = 9;
+
+            //ScalarMult(k, kOff, u, 0, r, rOff);
+
             int[] y = F.Create();
             int[] z = F.Create();
 
@@ -287,6 +293,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void ScalarMultBase(ReadOnlySpan<byte> k, Span<byte> r)
         {
+            // Equivalent (but much slower)
+            //Span<byte> u = stackalloc byte[PointSize];
+            //u[0] = 9;
+
+            //ScalarMult(k, u, r);
+
             int[] y = F.Create();
             int[] z = F.Create();
 
diff --git a/crypto/src/math/ec/rfc7748/X25519Field.cs b/crypto/src/math/ec/rfc7748/X25519Field.cs
index 079e673a8..ffe468f73 100644
--- a/crypto/src/math/ec/rfc7748/X25519Field.cs
+++ b/crypto/src/math/ec/rfc7748/X25519Field.cs
@@ -24,10 +24,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
         private const int M25 = 0x01FFFFFF;
         private const int M26 = 0x03FFFFFF;
 
-        private static readonly uint[] P32 = new uint[]{ 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
-            0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU };
-        private static readonly int[] RootNegOne = { 0x020EA0B0, 0x0386C9D2, 0x00478C4E, 0x0035697F, 0x005E8630,
-            0x01FBD7A7, 0x0340264F, 0x01F0B2B4, 0x00027E0E, 0x00570649 };
+        private static readonly uint[] P32 = { 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
+            0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU };
+        private static readonly int[] RootNegOne = { -0x01F15F50, -0x0079362D, 0x00478C4F, 0x0035697F, 0x005E8630,
+            0x01FBD7A7, -0x00BFD9B1, -0x000F4D4B, 0x00027E0F, 0x00570649 };
 
 #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -35,7 +35,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
         public static void Add(int[] x, int[] y, int[] z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
                 var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
@@ -54,7 +55,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
                 return;
             }
 
-            if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
                 var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
@@ -105,7 +107,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
         public static void Apm(int[] x, int[] y, int[] zp, int[] zm)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
                 var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
@@ -132,7 +135,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
                 return;
             }
 
-            if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
                 var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
@@ -833,7 +837,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 
         public static void Normalize(int[] z)
         {
-            int x = (z[9] >> 23) & 1;
+            int x = (z[9] >> (24 - 1)) & 1;
             Reduce(z, x);
             Reduce(z, -x);
             Debug.Assert(z[9] >> 24 == 0);
@@ -842,7 +846,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void Normalize(Span<int> z)
         {
-            int x = (z[9] >> 23) & 1;
+            int x = (z[9] >> (24 - 1)) & 1;
             Reduce(z, x);
             Reduce(z, -x);
             Debug.Assert(z[9] >> 24 == 0);
@@ -1101,7 +1105,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
         public static void Sub(int[] x, int[] y, int[] z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
                 var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
@@ -1120,7 +1125,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
                 return;
             }
 
-            if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8));
                 var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8));
diff --git a/crypto/src/math/ec/rfc7748/X448.cs b/crypto/src/math/ec/rfc7748/X448.cs
index 7e078c5c6..0019f53fc 100644
--- a/crypto/src/math/ec/rfc7748/X448.cs
+++ b/crypto/src/math/ec/rfc7748/X448.cs
@@ -282,6 +282,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
             ScalarMultBase(k.AsSpan(kOff), r.AsSpan(rOff));
 #else
+            // Equivalent (but much slower)
+            //byte[] u = new byte[PointSize];
+            //u[0] = 5;
+
+            //ScalarMult(k, kOff, u, 0, r, rOff);
+
             uint[] x = F.Create();
             uint[] y = F.Create();
 
@@ -299,6 +305,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void ScalarMultBase(ReadOnlySpan<byte> k, Span<byte> r)
         {
+            // Equivalent (but much slower)
+            //Span<byte> u = stackalloc byte[PointSize];
+            //u[0] = 5;
+
+            //ScalarMult(k, u, r);
+
             uint[] x = F.Create();
             uint[] y = F.Create();
 
diff --git a/crypto/src/math/ec/rfc7748/X448Field.cs b/crypto/src/math/ec/rfc7748/X448Field.cs
index f3fe71114..1b9fbb839 100644
--- a/crypto/src/math/ec/rfc7748/X448Field.cs
+++ b/crypto/src/math/ec/rfc7748/X448Field.cs
@@ -24,9 +24,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
 
         private const uint M28 = 0x0FFFFFFFU;
 
-        private static readonly uint[] P32 = new uint[]{ 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
-            0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
-            0xFFFFFFFFU, 0xFFFFFFFFU };
+        private static readonly uint[] P32 = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
+            0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
+            0xFFFFFFFFU };
 
 #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -1458,7 +1458,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748
         public static void Sub(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<uint>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian)
             {
                 var ControlCarry = Vector256.Create(7U, 0U, 1U, 2U, 3U, 4U, 5U, 6U);
                 var Mask28 = Vector256.Create(M28);
diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs
index fd2d5fe93..7318a8a7e 100644
--- a/crypto/src/math/ec/rfc8032/Ed25519.cs
+++ b/crypto/src/math/ec/rfc8032/Ed25519.cs
@@ -55,9 +55,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         public static readonly int SignatureSize = PointBytes + ScalarBytes;
 
         // "SigEd25519 no Ed25519 collisions"
-        private static readonly byte[] Dom2Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31,
-            0x39, 0x20, 0x6e, 0x6f, 0x20, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, 0x63, 0x6f, 0x6c, 0x6c, 0x69,
-            0x73, 0x69, 0x6f, 0x6e, 0x73 };
+        private static readonly byte[] Dom2Prefix = { 0x53, 0x69, 0x67, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20,
+            0x6e, 0x6f, 0x20, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, 0x63, 0x6f, 0x6c, 0x6c, 0x69, 0x73, 0x69,
+            0x6f, 0x6e, 0x73 };
 
         private static readonly uint[] P = { 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
             0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU };
@@ -149,7 +149,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
             byte[] result = new byte[ScalarBytes * 2];
             Codec.Encode32(t, 0, t.Length, result, 0);
-            return Scalar25519.Reduce(result);
+            return Scalar25519.Reduce512(result);
         }
 
         private static bool CheckContextVar(byte[] ctx, byte phflag)
@@ -167,13 +167,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Sqr(p.x, u);
             F.Sqr(p.y, v);
             F.Mul(u, v, t);
-            F.Sub(v, u, v);
+            F.Sub(u, v, u);
             F.Mul(t, C_d, t);
             F.AddOne(t);
-            F.Sub(t, v, t);
+            F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v);
         }
 
         private static int CheckPoint(PointAccum p)
@@ -187,15 +188,17 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Sqr(p.y, v);
             F.Sqr(p.z, w);
             F.Mul(u, v, t);
-            F.Sub(v, u, v);
-            F.Mul(v, w, v);
+            F.Sub(u, v, u);
+            F.Mul(u, w, u);
             F.Sqr(w, w);
             F.Mul(t, C_d, t);
             F.Add(t, w, t);
-            F.Sub(t, v, t);
+            F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
+            F.Normalize(w);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v) & ~F.IsZero(w);
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -592,7 +595,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] r = Scalar25519.Reduce(h);
+            byte[] r = Scalar25519.Reduce512(h);
             byte[] R = new byte[PointBytes];
             ScalarMultBaseEncoded(r, R, 0);
 
@@ -605,7 +608,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
             byte[] S = CalculateS(r, k, s);
 
             Array.Copy(R, 0, sig, sigOff, PointBytes);
@@ -697,7 +700,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.DoFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar25519.Reduce(h, k);
+            Scalar25519.Reduce512(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -739,7 +742,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -799,7 +802,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.DoFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar25519.Reduce(h, k);
+            Scalar25519.Reduce512(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -840,7 +843,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -950,7 +953,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Normalize(p.y);
             F.Normalize(p.z);
 
-            return F.IsZeroVar(p.x) && F.AreEqualVar(p.y, p.z);
+            return F.IsZeroVar(p.x) && !F.IsZeroVar(p.y) && F.AreEqualVar(p.y, p.z);
         }
 
         private static void PointAdd(ref PointExtended p, ref PointExtended q, ref PointExtended r, ref PointTemp t)
@@ -1496,7 +1499,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #endif
 
             Scalar25519.Decode(k, n);
-            Scalar25519.ToSignedDigits(256, n, n);
+            Scalar25519.ToSignedDigits(256, n);
 
             Init(out PointPrecompZ q);
             Init(out PointTemp t);
@@ -1541,7 +1544,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #endif
 
             Scalar25519.Decode(k, n);
-            Scalar25519.ToSignedDigits(PrecompRange, n, n);
+            Scalar25519.ToSignedDigits(PrecompRange, n);
             GroupCombBits(n);
 
             Init(out PointPrecomp p);
@@ -1714,6 +1717,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             int bit = 128;
             while (--bit >= 0)
             {
+                if (((int)ws_b[bit] | (int)ws_b[128 + bit] | (int)ws_p[bit] | (int)ws_q[bit]) != 0)
+                    break;
+            }
+
+            for (; bit >= 0; --bit)
+            {
                 int wb = ws_b[bit];
                 if (wb != 0)
                 {
diff --git a/crypto/src/math/ec/rfc8032/Ed448.cs b/crypto/src/math/ec/rfc8032/Ed448.cs
index 08b64ddf2..aff9b5460 100644
--- a/crypto/src/math/ec/rfc8032/Ed448.cs
+++ b/crypto/src/math/ec/rfc8032/Ed448.cs
@@ -52,7 +52,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         public static readonly int SignatureSize = PointBytes + ScalarBytes;
 
         // "SigEd448"
-        private static readonly byte[] Dom4Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 };
+        private static readonly byte[] Dom4Prefix = { 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 };
 
         private static readonly uint[] P = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
             0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
@@ -73,7 +73,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             0x03AC222BU, 0x0304DB8EU, 0x083EE319U, 0x05E5DB0BU, 0x0ECA503BU, 0x0B1C6539U, 0x078A8DCEU, 0x02D256BCU,
             0x04A8B05EU, 0x0BD9FD57U, 0x0A1C3CB8U };
 
-        private const int C_d = -39081;
+        private const uint C_d = 39081U;
 
         //private const int WnafWidth = 6;
         private const int WnafWidth225 = 5;
@@ -118,7 +118,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
             byte[] result = new byte[ScalarBytes * 2];
             Codec.Encode32(t, 0, t.Length, result, 0);
-            return Scalar448.Reduce(result);
+            return Scalar448.Reduce912(result);
         }
 
         private static bool CheckContextVar(byte[] ctx)
@@ -136,12 +136,13 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Sqr(p.y, v);
             F.Mul(u, v, t);
             F.Add(u, v, u);
-            F.Mul(t, -C_d, t);
+            F.Mul(t, C_d, t);
             F.SubOne(t);
             F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v);
         }
 
         private static int CheckPoint(PointProjective p)
@@ -158,12 +159,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Add(u, v, u);
             F.Mul(u, w, u);
             F.Sqr(w, w);
-            F.Mul(t, -C_d, t);
+            F.Mul(t, C_d, t);
             F.Sub(t, w, t);
             F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
+            F.Normalize(w);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v) & ~F.IsZero(w);
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -310,7 +313,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             uint[] v = F.Create();
 
             F.Sqr(r.y, u);
-            F.Mul(u, (uint)-C_d, v);
+            F.Mul(u, C_d, v);
             F.Negate(u, u);
             F.AddOne(u);
             F.AddOne(v);
@@ -545,7 +548,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] r = Scalar448.Reduce(h);
+            byte[] r = Scalar448.Reduce912(h);
             byte[] R = new byte[PointBytes];
             ScalarMultBaseEncoded(r, R, 0);
 
@@ -555,7 +558,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
             byte[] S = CalculateS(r, k, s);
 
             Array.Copy(R, 0, sig, sigOff, PointBytes);
@@ -644,7 +647,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.OutputFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar448.Reduce(h, k);
+            Scalar448.Reduce912(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -683,7 +686,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -740,7 +743,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.OutputFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar448.Reduce(h, k);
+            Scalar448.Reduce912(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -778,7 +781,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -868,7 +871,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Normalize(p.y);
             F.Normalize(p.z);
 
-            return F.IsZeroVar(p.x) && F.AreEqualVar(p.y, p.z);
+            return F.IsZeroVar(p.x) && !F.IsZeroVar(p.y) && F.AreEqualVar(p.y, p.z);
         }
 
         private static void PointAdd(ref PointAffine p, ref PointProjective r, ref PointTemp t)
@@ -885,7 +888,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, f, g);
             F.Add(b, e, f);
             F.Sub(b, e, g);
@@ -920,7 +923,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, f, g);
             F.Add(b, e, f);
             F.Sub(b, e, g);
@@ -965,7 +968,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, nf, ng);
             F.Add(b, e, nf);
             F.Sub(b, e, ng);
@@ -1011,7 +1014,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, nf, ng);
             F.Add(b, e, nf);
             F.Sub(b, e, ng);
@@ -1149,7 +1152,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             PointCopy(ref p, ref q);
 
             Init(out PointProjective d);
-            PointCopy(ref q, ref d);
+            PointCopy(ref p, ref d);
             PointDouble(ref d, ref t);
 
             uint[] table = F.CreateTable(count * 3);
@@ -1581,6 +1584,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             int bit = 225;
             while (--bit >= 0)
             {
+                if (((int)ws_b[bit] | (int)ws_b[225 + bit] | (int)ws_p[bit] | (int)ws_q[bit]) != 0)
+                    break;
+            }
+
+            for (; bit >= 0; --bit)
+            {
                 int wb = ws_b[bit];
                 if (wb != 0)
                 {
diff --git a/crypto/src/math/ec/rfc8032/Scalar25519.cs b/crypto/src/math/ec/rfc8032/Scalar25519.cs
index 00dcd49a1..67eee6155 100644
--- a/crypto/src/math/ec/rfc8032/Scalar25519.cs
+++ b/crypto/src/math/ec/rfc8032/Scalar25519.cs
@@ -14,9 +14,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
     {
         internal const int Size = 8;
 
-        private const long M08L = 0x000000FFL;
+        private const int ScalarBytes = Size * 4;
+
         private const long M28L = 0x0FFFFFFFL;
-        private const long M32L = 0xFFFFFFFFL;
 
         private const int TargetLength = 254;
 
@@ -70,7 +70,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         internal static void Multiply128Var(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y128, Span<uint> z)
         {
-            Span<uint> tt = stackalloc uint[16];
+            Span<uint> tt = stackalloc uint[12];
             Nat256.Mul128(x, y128, tt);
 
             if ((int)y128[3] < 0)
@@ -79,9 +79,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat256.SubFrom(x, tt[4..], 0);
             }
 
-            Span<byte> r = MemoryMarshal.AsBytes(tt);
-            Reduce(r, r);
-            tt[..Size].CopyTo(z);
+            if (BitConverter.IsLittleEndian)
+            {
+                Span<byte> r = MemoryMarshal.AsBytes(tt);
+                Reduce384(r, r);
+                tt[..Size].CopyTo(z);
+            }
+            else
+            {
+                Span<byte> r = stackalloc byte[48];
+                Codec.Encode32(tt, r);
+
+                Reduce384(r, r);
+                Decode(r, z);
+            }
         }
 #else
         internal static void Multiply128Var(uint[] x, uint[] y128, uint[] z)
@@ -95,40 +106,242 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat256.SubFrom(x, 0, tt, 4, 0);
             }
 
-            byte[] bytes = new byte[64];
+            byte[] bytes = new byte[48];
             Codec.Encode32(tt, 0, 12, bytes, 0);
 
-            byte[] r = Reduce(bytes);
+            byte[] r = Reduce384(bytes);
             Decode(r, z);
         }
 #endif
 
-        internal static byte[] Reduce(byte[] n)
+        internal static byte[] Reduce384(byte[] n)
+        {
+            byte[] r = new byte[ScalarBytes];
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Reduce384(n, r);
+#else
+            long x00 =  Codec.Decode32(n,  0);          // x00:32/--
+            long x01 = (Codec.Decode24(n,  4) << 4);    // x01:28/--
+            long x02 =  Codec.Decode32(n,  7);          // x02:32/--
+            long x03 = (Codec.Decode24(n, 11) << 4);    // x03:28/--
+            long x04 =  Codec.Decode32(n, 14);          // x04:32/--
+            long x05 = (Codec.Decode24(n, 18) << 4);    // x05:28/--
+            long x06 =  Codec.Decode32(n, 21);          // x06:32/--
+            long x07 = (Codec.Decode24(n, 25) << 4);    // x07:28/--
+            long x08 =  Codec.Decode32(n, 28);          // x08:32/--
+            long x09 = (Codec.Decode24(n, 32) << 4);    // x09:28/--
+            long x10 =  Codec.Decode32(n, 35);          // x10:32/--
+            long x11 = (Codec.Decode24(n, 39) << 4);    // x11:28/--
+            long x12 =  Codec.Decode32(n, 42);          // x12:32/--
+            long x13 = (Codec.Decode16(n, 46) << 4);    // x13:20/--
+            long t;
+
+            // TODO Fix bounds calculations which were copied from Reduce512
+
+            x13 += (x12 >> 28); x12 &= M28L;            // x13:28/22, x12:28/--
+            x04 -= x13 * L0;                            // x04:54/49
+            x05 -= x13 * L1;                            // x05:54/53
+            x06 -= x13 * L2;                            // x06:56/--
+            x07 -= x13 * L3;                            // x07:56/52
+            x08 -= x13 * L4;                            // x08:56/52
+
+            x12 += (x11 >> 28); x11 &= M28L;            // x12:28/24, x11:28/--
+            x03 -= x12 * L0;                            // x03:54/49
+            x04 -= x12 * L1;                            // x04:54/51
+            x05 -= x12 * L2;                            // x05:56/--
+            x06 -= x12 * L3;                            // x06:56/52
+            x07 -= x12 * L4;                            // x07:56/53
+
+            x11 += (x10 >> 28); x10 &= M28L;            // x11:29/--, x10:28/--
+            x02 -= x11 * L0;                            // x02:55/32
+            x03 -= x11 * L1;                            // x03:55/--
+            x04 -= x11 * L2;                            // x04:56/55
+            x05 -= x11 * L3;                            // x05:56/52
+            x06 -= x11 * L4;                            // x06:56/53
+
+            x10 += (x09 >> 28); x09 &= M28L;            // x10:29/--, x09:28/--
+            x01 -= x10 * L0;                            // x01:55/28
+            x02 -= x10 * L1;                            // x02:55/54
+            x03 -= x10 * L2;                            // x03:56/55
+            x04 -= x10 * L3;                            // x04:57/--
+            x05 -= x10 * L4;                            // x05:56/53
+
+            x08 += (x07 >> 28); x07 &= M28L;            // x08:56/53, x07:28/--
+            x09 += (x08 >> 28); x08 &= M28L;            // x09:29/25, x08:28/--
+
+            t    = (x08 >> 27) & 1L;
+            x09 += t;                                   // x09:29/26
+
+            x00 -= x09 * L0;                            // x00:55/53
+            x01 -= x09 * L1;                            // x01:55/54
+            x02 -= x09 * L2;                            // x02:57/--
+            x03 -= x09 * L3;                            // x03:57/--
+            x04 -= x09 * L4;                            // x04:57/42
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+            x09  = (x08 >> 28); x08 &= M28L;
+
+            x09 -= t;
+
+            Debug.Assert(x09 == 0L || x09 == -1L);
+
+            x00 += x09 & L0;
+            x01 += x09 & L1;
+            x02 += x09 & L2;
+            x03 += x09 & L3;
+            x04 += x09 & L4;
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+
+            Codec.Encode56((ulong)(x00 | (x01 << 28)), r, 0);
+            Codec.Encode56((ulong)(x02 | (x03 << 28)), r, 7);
+            Codec.Encode56((ulong)(x04 | (x05 << 28)), r, 14);
+            Codec.Encode56((ulong)(x06 | (x07 << 28)), r, 21);
+            Codec.Encode32((uint)x08, r, 28);
+#endif
+
+            return r;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        internal static void Reduce384(ReadOnlySpan<byte> n, Span<byte> r)
+        {
+            long x00 =  Codec.Decode32(n[ 0..]);        // x00:32/--
+            long x01 = (Codec.Decode24(n[ 4..]) << 4);  // x01:28/--
+            long x02 =  Codec.Decode32(n[ 7..]);        // x02:32/--
+            long x03 = (Codec.Decode24(n[11..]) << 4);  // x03:28/--
+            long x04 =  Codec.Decode32(n[14..]);        // x04:32/--
+            long x05 = (Codec.Decode24(n[18..]) << 4);  // x05:28/--
+            long x06 =  Codec.Decode32(n[21..]);        // x06:32/--
+            long x07 = (Codec.Decode24(n[25..]) << 4);  // x07:28/--
+            long x08 =  Codec.Decode32(n[28..]);        // x08:32/--
+            long x09 = (Codec.Decode24(n[32..]) << 4);  // x09:28/--
+            long x10 =  Codec.Decode32(n[35..]);        // x10:32/--
+            long x11 = (Codec.Decode24(n[39..]) << 4);  // x11:28/--
+            long x12 =  Codec.Decode32(n[42..]);        // x12:32/--
+            long x13 = (Codec.Decode16(n[46..]) << 4);  // x13:20/--
+            long t;
+
+            // TODO Fix bounds calculations which were copied from Reduce512
+
+            x13 += (x12 >> 28); x12 &= M28L;            // x13:28/22, x12:28/--
+            x04 -= x13 * L0;                            // x04:54/49
+            x05 -= x13 * L1;                            // x05:54/53
+            x06 -= x13 * L2;                            // x06:56/--
+            x07 -= x13 * L3;                            // x07:56/52
+            x08 -= x13 * L4;                            // x08:56/52
+
+            x12 += (x11 >> 28); x11 &= M28L;            // x12:28/24, x11:28/--
+            x03 -= x12 * L0;                            // x03:54/49
+            x04 -= x12 * L1;                            // x04:54/51
+            x05 -= x12 * L2;                            // x05:56/--
+            x06 -= x12 * L3;                            // x06:56/52
+            x07 -= x12 * L4;                            // x07:56/53
+
+            x11 += (x10 >> 28); x10 &= M28L;            // x11:29/--, x10:28/--
+            x02 -= x11 * L0;                            // x02:55/32
+            x03 -= x11 * L1;                            // x03:55/--
+            x04 -= x11 * L2;                            // x04:56/55
+            x05 -= x11 * L3;                            // x05:56/52
+            x06 -= x11 * L4;                            // x06:56/53
+
+            x10 += (x09 >> 28); x09 &= M28L;            // x10:29/--, x09:28/--
+            x01 -= x10 * L0;                            // x01:55/28
+            x02 -= x10 * L1;                            // x02:55/54
+            x03 -= x10 * L2;                            // x03:56/55
+            x04 -= x10 * L3;                            // x04:57/--
+            x05 -= x10 * L4;                            // x05:56/53
+
+            x08 += (x07 >> 28); x07 &= M28L;            // x08:56/53, x07:28/--
+            x09 += (x08 >> 28); x08 &= M28L;            // x09:29/25, x08:28/--
+
+            t    = (x08 >> 27) & 1L;
+            x09 += t;                                   // x09:29/26
+
+            x00 -= x09 * L0;                            // x00:55/53
+            x01 -= x09 * L1;                            // x01:55/54
+            x02 -= x09 * L2;                            // x02:57/--
+            x03 -= x09 * L3;                            // x03:57/--
+            x04 -= x09 * L4;                            // x04:57/42
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+            x09  = (x08 >> 28); x08 &= M28L;
+
+            x09 -= t;
+
+            Debug.Assert(x09 == 0L || x09 == -1L);
+
+            x00 += x09 & L0;
+            x01 += x09 & L1;
+            x02 += x09 & L2;
+            x03 += x09 & L3;
+            x04 += x09 & L4;
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+
+            Codec.Encode56((ulong)(x00 | (x01 << 28)), r);
+            Codec.Encode56((ulong)(x02 | (x03 << 28)), r[7..]);
+            Codec.Encode56((ulong)(x04 | (x05 << 28)), r[14..]);
+            Codec.Encode56((ulong)(x06 | (x07 << 28)), r[21..]);
+            Codec.Encode32((uint)x08, r[28..]);
+        }
+#endif
+
+        internal static byte[] Reduce512(byte[] n)
         {
-            byte[] r = new byte[64];
+            byte[] r = new byte[ScalarBytes];
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            Reduce(n, r);
+            Reduce512(n, r);
 #else
-            long x00 =  Codec.Decode32(n,  0)       & M32L;         // x00:32/--
-            long x01 = (Codec.Decode24(n,  4) << 4) & M32L;         // x01:28/--
-            long x02 =  Codec.Decode32(n,  7)       & M32L;         // x02:32/--
-            long x03 = (Codec.Decode24(n, 11) << 4) & M32L;         // x03:28/--
-            long x04 =  Codec.Decode32(n, 14)       & M32L;         // x04:32/--
-            long x05 = (Codec.Decode24(n, 18) << 4) & M32L;         // x05:28/--
-            long x06 =  Codec.Decode32(n, 21)       & M32L;         // x06:32/--
-            long x07 = (Codec.Decode24(n, 25) << 4) & M32L;         // x07:28/--
-            long x08 =  Codec.Decode32(n, 28)       & M32L;         // x08:32/--
-            long x09 = (Codec.Decode24(n, 32) << 4) & M32L;         // x09:28/--
-            long x10 =  Codec.Decode32(n, 35)       & M32L;         // x10:32/--
-            long x11 = (Codec.Decode24(n, 39) << 4) & M32L;         // x11:28/--
-            long x12 =  Codec.Decode32(n, 42)       & M32L;         // x12:32/--
-            long x13 = (Codec.Decode24(n, 46) << 4) & M32L;         // x13:28/--
-            long x14 =  Codec.Decode32(n, 49)       & M32L;         // x14:32/--
-            long x15 = (Codec.Decode24(n, 53) << 4) & M32L;         // x15:28/--
-            long x16 =  Codec.Decode32(n, 56)       & M32L;         // x16:32/--
-            long x17 = (Codec.Decode24(n, 60) << 4) & M32L;         // x17:28/--
-            long x18 =                 n[63]        & M08L;         // x18:08/--
+            long x00 =  Codec.Decode32(n,  0);          // x00:32/--
+            long x01 = (Codec.Decode24(n,  4) << 4);    // x01:28/--
+            long x02 =  Codec.Decode32(n,  7);          // x02:32/--
+            long x03 = (Codec.Decode24(n, 11) << 4);    // x03:28/--
+            long x04 =  Codec.Decode32(n, 14);          // x04:32/--
+            long x05 = (Codec.Decode24(n, 18) << 4);    // x05:28/--
+            long x06 =  Codec.Decode32(n, 21);          // x06:32/--
+            long x07 = (Codec.Decode24(n, 25) << 4);    // x07:28/--
+            long x08 =  Codec.Decode32(n, 28);          // x08:32/--
+            long x09 = (Codec.Decode24(n, 32) << 4);    // x09:28/--
+            long x10 =  Codec.Decode32(n, 35);          // x10:32/--
+            long x11 = (Codec.Decode24(n, 39) << 4);    // x11:28/--
+            long x12 =  Codec.Decode32(n, 42);          // x12:32/--
+            long x13 = (Codec.Decode24(n, 46) << 4);    // x13:28/--
+            long x14 =  Codec.Decode32(n, 49);          // x14:32/--
+            long x15 = (Codec.Decode24(n, 53) << 4);    // x15:28/--
+            long x16 =  Codec.Decode32(n, 56);          // x16:32/--
+            long x17 = (Codec.Decode24(n, 60) << 4);    // x17:28/--
+            long x18 =                 n[63];           // x18:08/--
             long t;
 
             //x18 += (x17 >> 28); x17 &= M28L;
@@ -246,27 +459,27 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r)
+        internal static void Reduce512(ReadOnlySpan<byte> n, Span<byte> r)
         {
-            long x00 =  Codec.Decode32(n[ 0..])       & M32L;       // x00:32/--
-            long x01 = (Codec.Decode24(n[ 4..]) << 4) & M32L;       // x01:28/--
-            long x02 =  Codec.Decode32(n[ 7..])       & M32L;       // x02:32/--
-            long x03 = (Codec.Decode24(n[11..]) << 4) & M32L;       // x03:28/--
-            long x04 =  Codec.Decode32(n[14..])       & M32L;       // x04:32/--
-            long x05 = (Codec.Decode24(n[18..]) << 4) & M32L;       // x05:28/--
-            long x06 =  Codec.Decode32(n[21..])       & M32L;       // x06:32/--
-            long x07 = (Codec.Decode24(n[25..]) << 4) & M32L;       // x07:28/--
-            long x08 =  Codec.Decode32(n[28..])       & M32L;       // x08:32/--
-            long x09 = (Codec.Decode24(n[32..]) << 4) & M32L;       // x09:28/--
-            long x10 =  Codec.Decode32(n[35..])       & M32L;       // x10:32/--
-            long x11 = (Codec.Decode24(n[39..]) << 4) & M32L;       // x11:28/--
-            long x12 =  Codec.Decode32(n[42..])       & M32L;       // x12:32/--
-            long x13 = (Codec.Decode24(n[46..]) << 4) & M32L;       // x13:28/--
-            long x14 =  Codec.Decode32(n[49..])       & M32L;       // x14:32/--
-            long x15 = (Codec.Decode24(n[53..]) << 4) & M32L;       // x15:28/--
-            long x16 =  Codec.Decode32(n[56..])       & M32L;       // x16:32/--
-            long x17 = (Codec.Decode24(n[60..]) << 4) & M32L;       // x17:28/--
-            long x18 =                 n[63]          & M08L;       // x18:08/--
+            long x00 =  Codec.Decode32(n[ 0..]);        // x00:32/--
+            long x01 = (Codec.Decode24(n[ 4..]) << 4);  // x01:28/--
+            long x02 =  Codec.Decode32(n[ 7..]);        // x02:32/--
+            long x03 = (Codec.Decode24(n[11..]) << 4);  // x03:28/--
+            long x04 =  Codec.Decode32(n[14..]);        // x04:32/--
+            long x05 = (Codec.Decode24(n[18..]) << 4);  // x05:28/--
+            long x06 =  Codec.Decode32(n[21..]);        // x06:32/--
+            long x07 = (Codec.Decode24(n[25..]) << 4);  // x07:28/--
+            long x08 =  Codec.Decode32(n[28..]);        // x08:32/--
+            long x09 = (Codec.Decode24(n[32..]) << 4);  // x09:28/--
+            long x10 =  Codec.Decode32(n[35..]);        // x10:32/--
+            long x11 = (Codec.Decode24(n[39..]) << 4);  // x11:28/--
+            long x12 =  Codec.Decode32(n[42..]);        // x12:32/--
+            long x13 = (Codec.Decode24(n[46..]) << 4);  // x13:28/--
+            long x14 =  Codec.Decode32(n[49..]);        // x14:32/--
+            long x15 = (Codec.Decode24(n[53..]) << 4);  // x15:28/--
+            long x16 =  Codec.Decode32(n[56..]);        // x16:32/--
+            long x17 = (Codec.Decode24(n[60..]) << 4);  // x17:28/--
+            long x18 =                 n[63];           // x18:08/--
             long t;
 
             //x18 += (x17 >> 28); x17 &= M28L;
@@ -488,15 +701,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #endif
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void ToSignedDigits(int bits, ReadOnlySpan<uint> x, Span<uint> z)
+        internal static void ToSignedDigits(int bits, Span<uint> z)
 #else
-        internal static void ToSignedDigits(int bits, uint[] x, uint[] z)
+        internal static void ToSignedDigits(int bits, uint[] z)
 #endif
         {
             Debug.Assert(bits == 256);
             Debug.Assert(z.Length >= Size);
 
-            uint c1 = Nat.CAdd(Size, ~(int)x[0] & 1, x, L, z);  Debug.Assert(c1 == 0U);
+            uint c1 = Nat.CAddTo(Size, ~(int)z[0] & 1, L, z);   Debug.Assert(c1 == 0U);
             uint c2 = Nat.ShiftDownBit(Size, z, 1U);            Debug.Assert(c2 == (1U << 31));
         }
     }
diff --git a/crypto/src/math/ec/rfc8032/Scalar448.cs b/crypto/src/math/ec/rfc8032/Scalar448.cs
index 4afe1d2d6..124b91250 100644
--- a/crypto/src/math/ec/rfc8032/Scalar448.cs
+++ b/crypto/src/math/ec/rfc8032/Scalar448.cs
@@ -97,7 +97,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         {
             Debug.Assert((int)y225[7] >> 31 == (int)y225[7]);
 
-            Span<uint> tt = stackalloc uint[29];
+            Span<uint> tt = stackalloc uint[22];
             Nat.Mul(y225, x, tt);
 
             if ((int)y225[7] < 0)
@@ -106,9 +106,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat.SubFrom(Size, x, tt[8..]);
             }
 
-            Span<byte> r = MemoryMarshal.AsBytes(tt);
-            Reduce(r, r);
-            tt[..Size].CopyTo(z);
+            if (BitConverter.IsLittleEndian)
+            {
+                Span<byte> r = MemoryMarshal.AsBytes(tt);
+                Reduce704(r, r);
+                tt[..Size].CopyTo(z);
+            }
+            else
+            {
+                Span<byte> r = stackalloc byte[88];
+                Codec.Encode32(tt, r);
+
+                Reduce704(r, r);
+                Decode(r, z);
+            }
         }
 #else
         internal static void Multiply225Var(uint[] x, uint[] y225, uint[] z)
@@ -124,20 +135,430 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat.SubFrom(Size, x, 0, tt, 8);
             }
 
-            byte[] bytes = new byte[114];
+            byte[] bytes = new byte[88];
             Codec.Encode32(tt, 0, 22, bytes, 0);
 
-            byte[] r = Reduce(bytes);
+            byte[] r = Reduce704(bytes);
             Decode(r, z);
         }
 #endif
 
-        internal static byte[] Reduce(byte[] n)
+        internal static byte[] Reduce704(byte[] n)
+        {
+            byte[] r = new byte[ScalarBytes];
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Reduce704(n, r);
+#else
+            ulong x00 =  Codec.Decode32(n,   0);                // x00:32/--
+            ulong x01 = (Codec.Decode24(n,   4) << 4);          // x01:28/--
+            ulong x02 =  Codec.Decode32(n,   7);                // x02:32/--
+            ulong x03 = (Codec.Decode24(n,  11) << 4);          // x03:28/--
+            ulong x04 =  Codec.Decode32(n,  14);                // x04:32/--
+            ulong x05 = (Codec.Decode24(n,  18) << 4);          // x05:28/--
+            ulong x06 =  Codec.Decode32(n,  21);                // x06:32/--
+            ulong x07 = (Codec.Decode24(n,  25) << 4);          // x07:28/--
+            ulong x08 =  Codec.Decode32(n,  28);                // x08:32/--
+            ulong x09 = (Codec.Decode24(n,  32) << 4);          // x09:28/--
+            ulong x10 =  Codec.Decode32(n,  35);                // x10:32/--
+            ulong x11 = (Codec.Decode24(n,  39) << 4);          // x11:28/--
+            ulong x12 =  Codec.Decode32(n,  42);                // x12:32/--
+            ulong x13 = (Codec.Decode24(n,  46) << 4);          // x13:28/--
+            ulong x14 =  Codec.Decode32(n,  49);                // x14:32/--
+            ulong x15 = (Codec.Decode24(n,  53) << 4);          // x15:28/--
+            ulong x16 =  Codec.Decode32(n,  56);                // x16:32/--
+            ulong x17 = (Codec.Decode24(n,  60) << 4);          // x17:28/--
+            ulong x18 =  Codec.Decode32(n,  63);                // x18:32/--
+            ulong x19 = (Codec.Decode24(n,  67) << 4);          // x19:28/--
+            ulong x20 =  Codec.Decode32(n,  70);                // x20:32/--
+            ulong x21 = (Codec.Decode24(n,  74) << 4);          // x21:28/--
+            ulong x22 =  Codec.Decode32(n,  77);                // x22:32/--
+            ulong x23 = (Codec.Decode24(n,  81) << 4);          // x23:28/--
+            ulong x24 =  Codec.Decode32(n,  84);                // x24:32/--
+            ulong x25 = 0UL;
+
+            // TODO Fix bounds calculations which were copied from Reduce912
+
+            x25 += (x24 >> 28); x24 &= M28UL;           // x25:28/--, x24:28/--
+            x09 += x25 * L4_0;                          // x09:54/--
+            x10 += x25 * L4_1;                          // x10:54/53
+            x11 += x25 * L4_2;                          // x11:56/--
+            x12 += x25 * L4_3;                          // x12:57/--
+            x13 += x25 * L4_4;                          // x13:57/55
+            x14 += x25 * L4_5;                          // x14:58/--
+            x15 += x25 * L4_6;                          // x15:58/56
+            x16 += x25 * L4_7;                          // x16:59/--
+
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:58/--, x20:28/--
+            x22 += (x21 >> 28); x21 &= M28UL;           // x22:57/54, x21:28/--
+            x23 += (x22 >> 28); x22 &= M28UL;           // x23:45/42, x22:28/--
+            x24 += (x23 >> 28); x23 &= M28UL;           // x24:28/18, x23:28/--
+
+            x08 += x24 * L4_0;                          // x08:54/--
+            x09 += x24 * L4_1;                          // x09:55/--
+            x10 += x24 * L4_2;                          // x10:56/46
+            x11 += x24 * L4_3;                          // x11:57/46
+            x12 += x24 * L4_4;                          // x12:57/55
+            x13 += x24 * L4_5;                          // x13:58/--
+            x14 += x24 * L4_6;                          // x14:58/56
+            x15 += x24 * L4_7;                          // x15:59/--
+
+            x07 += x23 * L4_0;                          // x07:54/--
+            x08 += x23 * L4_1;                          // x08:54/53
+            x09 += x23 * L4_2;                          // x09:56/53
+            x10 += x23 * L4_3;                          // x10:57/46
+            x11 += x23 * L4_4;                          // x11:57/55
+            x12 += x23 * L4_5;                          // x12:58/--
+            x13 += x23 * L4_6;                          // x13:58/56
+            x14 += x23 * L4_7;                          // x14:59/--
+
+            x06 += x22 * L4_0;                          // x06:54/--
+            x07 += x22 * L4_1;                          // x07:54/53
+            x08 += x22 * L4_2;                          // x08:56/--
+            x09 += x22 * L4_3;                          // x09:57/53
+            x10 += x22 * L4_4;                          // x10:57/55
+            x11 += x22 * L4_5;                          // x11:58/--
+            x12 += x22 * L4_6;                          // x12:58/56
+            x13 += x22 * L4_7;                          // x13:59/--
+
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:59/31, x17:28/--
+            x19 += (x18 >> 28); x18 &= M28UL;           // x19:58/54, x18:28/--
+            x20 += (x19 >> 28); x19 &= M28UL;           // x20:30/29, x19:28/--
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:28/03, x20:28/--
+
+            x05 += x21 * L4_0;                          // x05:54/--
+            x06 += x21 * L4_1;                          // x06:55/--
+            x07 += x21 * L4_2;                          // x07:56/31
+            x08 += x21 * L4_3;                          // x08:57/31
+            x09 += x21 * L4_4;                          // x09:57/56
+            x10 += x21 * L4_5;                          // x10:58/--
+            x11 += x21 * L4_6;                          // x11:58/56
+            x12 += x21 * L4_7;                          // x12:59/--
+
+            x04 += x20 * L4_0;                          // x04:54/--
+            x05 += x20 * L4_1;                          // x05:54/53
+            x06 += x20 * L4_2;                          // x06:56/53
+            x07 += x20 * L4_3;                          // x07:57/31
+            x08 += x20 * L4_4;                          // x08:57/55
+            x09 += x20 * L4_5;                          // x09:58/--
+            x10 += x20 * L4_6;                          // x10:58/56
+            x11 += x20 * L4_7;                          // x11:59/--
+
+            x03 += x19 * L4_0;                          // x03:54/--
+            x04 += x19 * L4_1;                          // x04:54/53
+            x05 += x19 * L4_2;                          // x05:56/--
+            x06 += x19 * L4_3;                          // x06:57/53
+            x07 += x19 * L4_4;                          // x07:57/55
+            x08 += x19 * L4_5;                          // x08:58/--
+            x09 += x19 * L4_6;                          // x09:58/56
+            x10 += x19 * L4_7;                          // x10:59/--
+
+            x15 += (x14 >> 28); x14 &= M28UL;           // x15:59/31, x14:28/--
+            x16 += (x15 >> 28); x15 &= M28UL;           // x16:59/32, x15:28/--
+            x17 += (x16 >> 28); x16 &= M28UL;           // x17:31/29, x16:28/--
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:28/04, x17:28/--
+
+            x02 += x18 * L4_0;                          // x02:54/--
+            x03 += x18 * L4_1;                          // x03:55/--
+            x04 += x18 * L4_2;                          // x04:56/32
+            x05 += x18 * L4_3;                          // x05:57/32
+            x06 += x18 * L4_4;                          // x06:57/56
+            x07 += x18 * L4_5;                          // x07:58/--
+            x08 += x18 * L4_6;                          // x08:58/56
+            x09 += x18 * L4_7;                          // x09:59/--
+
+            x01 += x17 * L4_0;                          // x01:54/--
+            x02 += x17 * L4_1;                          // x02:54/53
+            x03 += x17 * L4_2;                          // x03:56/53
+            x04 += x17 * L4_3;                          // x04:57/32
+            x05 += x17 * L4_4;                          // x05:57/55
+            x06 += x17 * L4_5;                          // x06:58/--
+            x07 += x17 * L4_6;                          // x07:58/56
+            x08 += x17 * L4_7;                          // x08:59/--
+
+            x16 *= 4;
+            x16 += (x15 >> 26); x15 &= M26UL;
+            x16 += 1;                                   // x16:30/01
+
+            x00 += x16 * L_0;
+            x01 += x16 * L_1;
+            x02 += x16 * L_2;
+            x03 += x16 * L_3;
+            x04 += x16 * L_4;
+            x05 += x16 * L_5;
+            x06 += x16 * L_6;
+            x07 += x16 * L_7;
+
+            x01 += (x00 >> 28); x00 &= M28UL;
+            x02 += (x01 >> 28); x01 &= M28UL;
+            x03 += (x02 >> 28); x02 &= M28UL;
+            x04 += (x03 >> 28); x03 &= M28UL;
+            x05 += (x04 >> 28); x04 &= M28UL;
+            x06 += (x05 >> 28); x05 &= M28UL;
+            x07 += (x06 >> 28); x06 &= M28UL;
+            x08 += (x07 >> 28); x07 &= M28UL;
+            x09 += (x08 >> 28); x08 &= M28UL;
+            x10 += (x09 >> 28); x09 &= M28UL;
+            x11 += (x10 >> 28); x10 &= M28UL;
+            x12 += (x11 >> 28); x11 &= M28UL;
+            x13 += (x12 >> 28); x12 &= M28UL;
+            x14 += (x13 >> 28); x13 &= M28UL;
+            x15 += (x14 >> 28); x14 &= M28UL;
+            x16  = (x15 >> 26); x15 &= M26UL;
+
+            x16 -= 1;
+
+            Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue);
+
+            x00 -= x16 & L_0;
+            x01 -= x16 & L_1;
+            x02 -= x16 & L_2;
+            x03 -= x16 & L_3;
+            x04 -= x16 & L_4;
+            x05 -= x16 & L_5;
+            x06 -= x16 & L_6;
+            x07 -= x16 & L_7;
+
+            x01 += (ulong)((long)x00 >> 28); x00 &= M28UL;
+            x02 += (ulong)((long)x01 >> 28); x01 &= M28UL;
+            x03 += (ulong)((long)x02 >> 28); x02 &= M28UL;
+            x04 += (ulong)((long)x03 >> 28); x03 &= M28UL;
+            x05 += (ulong)((long)x04 >> 28); x04 &= M28UL;
+            x06 += (ulong)((long)x05 >> 28); x05 &= M28UL;
+            x07 += (ulong)((long)x06 >> 28); x06 &= M28UL;
+            x08 += (ulong)((long)x07 >> 28); x07 &= M28UL;
+            x09 += (ulong)((long)x08 >> 28); x08 &= M28UL;
+            x10 += (ulong)((long)x09 >> 28); x09 &= M28UL;
+            x11 += (ulong)((long)x10 >> 28); x10 &= M28UL;
+            x12 += (ulong)((long)x11 >> 28); x11 &= M28UL;
+            x13 += (ulong)((long)x12 >> 28); x12 &= M28UL;
+            x14 += (ulong)((long)x13 >> 28); x13 &= M28UL;
+            x15 += (ulong)((long)x14 >> 28); x14 &= M28UL;
+
+            Debug.Assert(x15 >> 26 == 0UL);
+
+            Codec.Encode56(x00 | (x01 << 28), r,  0);
+            Codec.Encode56(x02 | (x03 << 28), r,  7);
+            Codec.Encode56(x04 | (x05 << 28), r, 14);
+            Codec.Encode56(x06 | (x07 << 28), r, 21);
+            Codec.Encode56(x08 | (x09 << 28), r, 28);
+            Codec.Encode56(x10 | (x11 << 28), r, 35);
+            Codec.Encode56(x12 | (x13 << 28), r, 42);
+            Codec.Encode56(x14 | (x15 << 28), r, 49);
+            //r[ScalarBytes - 1] = 0;
+#endif
+
+            return r;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        internal static void Reduce704(ReadOnlySpan<byte> n, Span<byte> r)
+        {
+            ulong x00 =  Codec.Decode32(n[  0..]);              // x00:32/--
+            ulong x01 = (Codec.Decode24(n[  4..]) << 4);        // x01:28/--
+            ulong x02 =  Codec.Decode32(n[  7..]);              // x02:32/--
+            ulong x03 = (Codec.Decode24(n[ 11..]) << 4);        // x03:28/--
+            ulong x04 =  Codec.Decode32(n[ 14..]);              // x04:32/--
+            ulong x05 = (Codec.Decode24(n[ 18..]) << 4);        // x05:28/--
+            ulong x06 =  Codec.Decode32(n[ 21..]);              // x06:32/--
+            ulong x07 = (Codec.Decode24(n[ 25..]) << 4);        // x07:28/--
+            ulong x08 =  Codec.Decode32(n[ 28..]);              // x08:32/--
+            ulong x09 = (Codec.Decode24(n[ 32..]) << 4);        // x09:28/--
+            ulong x10 =  Codec.Decode32(n[ 35..]);              // x10:32/--
+            ulong x11 = (Codec.Decode24(n[ 39..]) << 4);        // x11:28/--
+            ulong x12 =  Codec.Decode32(n[ 42..]);              // x12:32/--
+            ulong x13 = (Codec.Decode24(n[ 46..]) << 4);        // x13:28/--
+            ulong x14 =  Codec.Decode32(n[ 49..]);              // x14:32/--
+            ulong x15 = (Codec.Decode24(n[ 53..]) << 4);        // x15:28/--
+            ulong x16 =  Codec.Decode32(n[ 56..]);              // x16:32/--
+            ulong x17 = (Codec.Decode24(n[ 60..]) << 4);        // x17:28/--
+            ulong x18 =  Codec.Decode32(n[ 63..]);              // x18:32/--
+            ulong x19 = (Codec.Decode24(n[ 67..]) << 4);        // x19:28/--
+            ulong x20 =  Codec.Decode32(n[ 70..]);              // x20:32/--
+            ulong x21 = (Codec.Decode24(n[ 74..]) << 4);        // x21:28/--
+            ulong x22 =  Codec.Decode32(n[ 77..]);              // x22:32/--
+            ulong x23 = (Codec.Decode24(n[ 81..]) << 4);        // x23:28/--
+            ulong x24 =  Codec.Decode32(n[ 84..]);              // x24:32/--
+            ulong x25 = 0UL;
+
+            // TODO Fix bounds calculations which were copied from Reduce912
+
+            x25 += (x24 >> 28); x24 &= M28UL;           // x25:28/--, x24:28/--
+            x09 += x25 * L4_0;                          // x09:54/--
+            x10 += x25 * L4_1;                          // x10:54/53
+            x11 += x25 * L4_2;                          // x11:56/--
+            x12 += x25 * L4_3;                          // x12:57/--
+            x13 += x25 * L4_4;                          // x13:57/55
+            x14 += x25 * L4_5;                          // x14:58/--
+            x15 += x25 * L4_6;                          // x15:58/56
+            x16 += x25 * L4_7;                          // x16:59/--
+
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:58/--, x20:28/--
+            x22 += (x21 >> 28); x21 &= M28UL;           // x22:57/54, x21:28/--
+            x23 += (x22 >> 28); x22 &= M28UL;           // x23:45/42, x22:28/--
+            x24 += (x23 >> 28); x23 &= M28UL;           // x24:28/18, x23:28/--
+
+            x08 += x24 * L4_0;                          // x08:54/--
+            x09 += x24 * L4_1;                          // x09:55/--
+            x10 += x24 * L4_2;                          // x10:56/46
+            x11 += x24 * L4_3;                          // x11:57/46
+            x12 += x24 * L4_4;                          // x12:57/55
+            x13 += x24 * L4_5;                          // x13:58/--
+            x14 += x24 * L4_6;                          // x14:58/56
+            x15 += x24 * L4_7;                          // x15:59/--
+
+            x07 += x23 * L4_0;                          // x07:54/--
+            x08 += x23 * L4_1;                          // x08:54/53
+            x09 += x23 * L4_2;                          // x09:56/53
+            x10 += x23 * L4_3;                          // x10:57/46
+            x11 += x23 * L4_4;                          // x11:57/55
+            x12 += x23 * L4_5;                          // x12:58/--
+            x13 += x23 * L4_6;                          // x13:58/56
+            x14 += x23 * L4_7;                          // x14:59/--
+
+            x06 += x22 * L4_0;                          // x06:54/--
+            x07 += x22 * L4_1;                          // x07:54/53
+            x08 += x22 * L4_2;                          // x08:56/--
+            x09 += x22 * L4_3;                          // x09:57/53
+            x10 += x22 * L4_4;                          // x10:57/55
+            x11 += x22 * L4_5;                          // x11:58/--
+            x12 += x22 * L4_6;                          // x12:58/56
+            x13 += x22 * L4_7;                          // x13:59/--
+
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:59/31, x17:28/--
+            x19 += (x18 >> 28); x18 &= M28UL;           // x19:58/54, x18:28/--
+            x20 += (x19 >> 28); x19 &= M28UL;           // x20:30/29, x19:28/--
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:28/03, x20:28/--
+
+            x05 += x21 * L4_0;                          // x05:54/--
+            x06 += x21 * L4_1;                          // x06:55/--
+            x07 += x21 * L4_2;                          // x07:56/31
+            x08 += x21 * L4_3;                          // x08:57/31
+            x09 += x21 * L4_4;                          // x09:57/56
+            x10 += x21 * L4_5;                          // x10:58/--
+            x11 += x21 * L4_6;                          // x11:58/56
+            x12 += x21 * L4_7;                          // x12:59/--
+
+            x04 += x20 * L4_0;                          // x04:54/--
+            x05 += x20 * L4_1;                          // x05:54/53
+            x06 += x20 * L4_2;                          // x06:56/53
+            x07 += x20 * L4_3;                          // x07:57/31
+            x08 += x20 * L4_4;                          // x08:57/55
+            x09 += x20 * L4_5;                          // x09:58/--
+            x10 += x20 * L4_6;                          // x10:58/56
+            x11 += x20 * L4_7;                          // x11:59/--
+
+            x03 += x19 * L4_0;                          // x03:54/--
+            x04 += x19 * L4_1;                          // x04:54/53
+            x05 += x19 * L4_2;                          // x05:56/--
+            x06 += x19 * L4_3;                          // x06:57/53
+            x07 += x19 * L4_4;                          // x07:57/55
+            x08 += x19 * L4_5;                          // x08:58/--
+            x09 += x19 * L4_6;                          // x09:58/56
+            x10 += x19 * L4_7;                          // x10:59/--
+
+            x15 += (x14 >> 28); x14 &= M28UL;           // x15:59/31, x14:28/--
+            x16 += (x15 >> 28); x15 &= M28UL;           // x16:59/32, x15:28/--
+            x17 += (x16 >> 28); x16 &= M28UL;           // x17:31/29, x16:28/--
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:28/04, x17:28/--
+
+            x02 += x18 * L4_0;                          // x02:54/--
+            x03 += x18 * L4_1;                          // x03:55/--
+            x04 += x18 * L4_2;                          // x04:56/32
+            x05 += x18 * L4_3;                          // x05:57/32
+            x06 += x18 * L4_4;                          // x06:57/56
+            x07 += x18 * L4_5;                          // x07:58/--
+            x08 += x18 * L4_6;                          // x08:58/56
+            x09 += x18 * L4_7;                          // x09:59/--
+
+            x01 += x17 * L4_0;                          // x01:54/--
+            x02 += x17 * L4_1;                          // x02:54/53
+            x03 += x17 * L4_2;                          // x03:56/53
+            x04 += x17 * L4_3;                          // x04:57/32
+            x05 += x17 * L4_4;                          // x05:57/55
+            x06 += x17 * L4_5;                          // x06:58/--
+            x07 += x17 * L4_6;                          // x07:58/56
+            x08 += x17 * L4_7;                          // x08:59/--
+
+            x16 *= 4;
+            x16 += (x15 >> 26); x15 &= M26UL;
+            x16 += 1;                                   // x16:30/01
+
+            x00 += x16 * L_0;
+            x01 += x16 * L_1;
+            x02 += x16 * L_2;
+            x03 += x16 * L_3;
+            x04 += x16 * L_4;
+            x05 += x16 * L_5;
+            x06 += x16 * L_6;
+            x07 += x16 * L_7;
+
+            x01 += (x00 >> 28); x00 &= M28UL;
+            x02 += (x01 >> 28); x01 &= M28UL;
+            x03 += (x02 >> 28); x02 &= M28UL;
+            x04 += (x03 >> 28); x03 &= M28UL;
+            x05 += (x04 >> 28); x04 &= M28UL;
+            x06 += (x05 >> 28); x05 &= M28UL;
+            x07 += (x06 >> 28); x06 &= M28UL;
+            x08 += (x07 >> 28); x07 &= M28UL;
+            x09 += (x08 >> 28); x08 &= M28UL;
+            x10 += (x09 >> 28); x09 &= M28UL;
+            x11 += (x10 >> 28); x10 &= M28UL;
+            x12 += (x11 >> 28); x11 &= M28UL;
+            x13 += (x12 >> 28); x12 &= M28UL;
+            x14 += (x13 >> 28); x13 &= M28UL;
+            x15 += (x14 >> 28); x14 &= M28UL;
+            x16  = (x15 >> 26); x15 &= M26UL;
+
+            x16 -= 1;
+
+            Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue);
+
+            x00 -= x16 & L_0;
+            x01 -= x16 & L_1;
+            x02 -= x16 & L_2;
+            x03 -= x16 & L_3;
+            x04 -= x16 & L_4;
+            x05 -= x16 & L_5;
+            x06 -= x16 & L_6;
+            x07 -= x16 & L_7;
+
+            x01 += (ulong)((long)x00 >> 28); x00 &= M28UL;
+            x02 += (ulong)((long)x01 >> 28); x01 &= M28UL;
+            x03 += (ulong)((long)x02 >> 28); x02 &= M28UL;
+            x04 += (ulong)((long)x03 >> 28); x03 &= M28UL;
+            x05 += (ulong)((long)x04 >> 28); x04 &= M28UL;
+            x06 += (ulong)((long)x05 >> 28); x05 &= M28UL;
+            x07 += (ulong)((long)x06 >> 28); x06 &= M28UL;
+            x08 += (ulong)((long)x07 >> 28); x07 &= M28UL;
+            x09 += (ulong)((long)x08 >> 28); x08 &= M28UL;
+            x10 += (ulong)((long)x09 >> 28); x09 &= M28UL;
+            x11 += (ulong)((long)x10 >> 28); x10 &= M28UL;
+            x12 += (ulong)((long)x11 >> 28); x11 &= M28UL;
+            x13 += (ulong)((long)x12 >> 28); x12 &= M28UL;
+            x14 += (ulong)((long)x13 >> 28); x13 &= M28UL;
+            x15 += (ulong)((long)x14 >> 28); x14 &= M28UL;
+
+            Debug.Assert(x15 >> 26 == 0UL);
+
+            Codec.Encode56(x00 | (x01 << 28), r);
+            Codec.Encode56(x02 | (x03 << 28), r[7..]);
+            Codec.Encode56(x04 | (x05 << 28), r[14..]);
+            Codec.Encode56(x06 | (x07 << 28), r[21..]);
+            Codec.Encode56(x08 | (x09 << 28), r[28..]);
+            Codec.Encode56(x10 | (x11 << 28), r[35..]);
+            Codec.Encode56(x12 | (x13 << 28), r[42..]);
+            Codec.Encode56(x14 | (x15 << 28), r[49..]);
+            r[ScalarBytes - 1] = 0;
+        }
+#endif
+
+        internal static byte[] Reduce912(byte[] n)
         {
             byte[] r = new byte[ScalarBytes];
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            Reduce(n, r);
+            Reduce912(n, r);
 #else
             ulong x00 =  Codec.Decode32(n,   0);                // x00:32/--
             ulong x01 = (Codec.Decode24(n,   4) << 4);          // x01:28/--
@@ -416,7 +837,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r)
+        internal static void Reduce912(ReadOnlySpan<byte> n, Span<byte> r)
         {
             ulong x00 =  Codec.Decode32(n[  0..]);              // x00:32/--
             ulong x01 = (Codec.Decode24(n[  4..]) << 4);        // x01:28/--
diff --git a/crypto/src/math/ec/rfc8032/Wnaf.cs b/crypto/src/math/ec/rfc8032/Wnaf.cs
index 88319f405..209934031 100644
--- a/crypto/src/math/ec/rfc8032/Wnaf.cs
+++ b/crypto/src/math/ec/rfc8032/Wnaf.cs
@@ -42,7 +42,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 {
                     int word16 = (int)(word >> j);
 
-                    int skip = Integers.NumberOfTrailingZeros((sign ^ word16) | 0x00010000);
+                    int skip = Integers.NumberOfTrailingZeros((sign ^ word16) | (1 << 16));
                     if (skip > 0)
                     {
                         j += skip;
diff --git a/crypto/src/math/raw/Interleave.cs b/crypto/src/math/raw/Interleave.cs
index 8082ce57c..e71f8e394 100644
--- a/crypto/src/math/raw/Interleave.cs
+++ b/crypto/src/math/raw/Interleave.cs
@@ -17,7 +17,7 @@ namespace Org.BouncyCastle.Math.Raw
             uint t = x;
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(t, 0x55555555U);
             }
@@ -33,7 +33,7 @@ namespace Org.BouncyCastle.Math.Raw
             uint t = x;
 
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(t, 0x55555555U);
             }
@@ -48,7 +48,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Expand32to64(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return (ulong)Bmi2.ParallelBitDeposit(x >> 16, 0x55555555U) << 32
                     |         Bmi2.ParallelBitDeposit(x      , 0x55555555U);
@@ -67,7 +67,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static void Expand64To128(ulong x, ulong[] z, int zOff)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 z[zOff    ] = Bmi2.X64.ParallelBitDeposit(x      , 0x5555555555555555UL);
                 z[zOff + 1] = Bmi2.X64.ParallelBitDeposit(x >> 32, 0x5555555555555555UL);
@@ -90,7 +90,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static void Expand64To128(ulong x, Span<ulong> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 z[0] = Bmi2.X64.ParallelBitDeposit(x      , 0x5555555555555555UL);
                 z[1] = Bmi2.X64.ParallelBitDeposit(x >> 32, 0x5555555555555555UL);
@@ -136,7 +136,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Expand64To128Rev(ulong x, out ulong low)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 low  = Bmi2.X64.ParallelBitDeposit(x >> 32, 0xAAAAAAAAAAAAAAAAUL);
                 return Bmi2.X64.ParallelBitDeposit(x      , 0xAAAAAAAAAAAAAAAAUL);
@@ -157,7 +157,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Shuffle(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(x >> 16, 0xAAAAAAAAU)
                     |  Bmi2.ParallelBitDeposit(x      , 0x55555555U);
@@ -175,7 +175,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Shuffle(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitDeposit(x >> 32, 0xAAAAAAAAAAAAAAAAUL)
                     |  Bmi2.X64.ParallelBitDeposit(x      , 0x5555555555555555UL);
@@ -194,7 +194,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Shuffle2(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitDeposit(x >> 24, 0x88888888U)
                     |  Bmi2.ParallelBitDeposit(x >> 16, 0x44444444U)
@@ -219,7 +219,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Shuffle2(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitDeposit(x >> 48, 0x8888888888888888UL)
                     |  Bmi2.X64.ParallelBitDeposit(x >> 32, 0x4444444444444444UL)
@@ -242,7 +242,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Unshuffle(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitExtract(x, 0xAAAAAAAAU) << 16
                     |  Bmi2.ParallelBitExtract(x, 0x55555555U);
@@ -260,7 +260,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitExtract(x, 0xAAAAAAAAAAAAAAAAUL) << 32
                     |  Bmi2.X64.ParallelBitExtract(x, 0x5555555555555555UL);
@@ -279,7 +279,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle(ulong x, out ulong even)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 even = Bmi2.X64.ParallelBitExtract(x, 0x5555555555555555UL);
                 return Bmi2.X64.ParallelBitExtract(x, 0xAAAAAAAAAAAAAAAAUL);
@@ -294,7 +294,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle(ulong x0, ulong x1, out ulong even)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 even = Bmi2.X64.ParallelBitExtract(x0, 0x5555555555555555UL)
                     |  Bmi2.X64.ParallelBitExtract(x1, 0x5555555555555555UL) << 32;
@@ -312,7 +312,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static uint Unshuffle2(uint x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled)
             {
                 return Bmi2.ParallelBitExtract(x, 0x88888888U) << 24
                     |  Bmi2.ParallelBitExtract(x, 0x44444444U) << 16
@@ -337,7 +337,7 @@ namespace Org.BouncyCastle.Math.Raw
         internal static ulong Unshuffle2(ulong x)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Bmi2.X64.IsSupported)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled)
             {
                 return Bmi2.X64.ParallelBitExtract(x, 0x8888888888888888UL) << 48
                     |  Bmi2.X64.ParallelBitExtract(x, 0x4444444444444444UL) << 32
diff --git a/crypto/src/math/raw/Mod.cs b/crypto/src/math/raw/Mod.cs
index f1ca2ebf0..9059e479c 100644
--- a/crypto/src/math/raw/Mod.cs
+++ b/crypto/src/math/raw/Mod.cs
@@ -7,16 +7,21 @@ using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Math.Raw
 {
-    /*
-     * Modular inversion as implemented in this class is based on the paper "Fast constant-time gcd
-     * computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
-     */
-
+    /// <summary>
+    /// Modular inversion as implemented in this class is based on the paper "Fast constant-time gcd computation and
+    /// modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
+    /// </summary>
+    /// <remarks>
+    /// In some cases (when it is faster) we use the "half delta" variant of safegcd based on
+    /// <a href="https://github.com/sipa/safegcd-bounds">hddivsteps</a>. 
+    /// </remarks>
     internal static class Mod
     {
         private const int M30 = 0x3FFFFFFF;
         private const ulong M32UL = 0xFFFFFFFFUL;
 
+        private static readonly int MaxStackAlloc = Platform.Is64BitProcess ? 4096 : 1024;
+
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         public static void CheckedModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
 #else
@@ -66,11 +71,12 @@ namespace Org.BouncyCastle.Math.Raw
             return x;
         }
 
-        public static uint ModOddInverse(uint[] m, uint[] x, uint[] z)
-        {
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            return ModOddInverse(m.AsSpan(), x.AsSpan(), z.AsSpan());
+        public static uint ModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
 #else
+        public static uint ModOddInverse(uint[] m, uint[] x, uint[] z)
+#endif
+        {
             int len32 = m.Length;
             Debug.Assert(len32 > 0);
             Debug.Assert((m[0] & 1) != 0);
@@ -79,25 +85,45 @@ namespace Org.BouncyCastle.Math.Raw
             int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
             int len30 = (bits + 29) / 30;
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 5;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
+
+            Span<int> t = stackalloc int[4];
+            Span<int> D = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> E = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> F = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> G = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> M = alloc[..len30];
+#else
             int[] t = new int[4];
             int[] D = new int[len30];
             int[] E = new int[len30];
             int[] F = new int[len30];
             int[] G = new int[len30];
             int[] M = new int[len30];
+#endif
 
             E[0] = 1;
-            Encode30(bits, x, 0, G, 0);
-            Encode30(bits, m, 0, M, 0);
+            Encode30(bits, x, G);
+            Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            M.CopyTo(F);
+#else
             Array.Copy(M, 0, F, 0, len30);
+#endif
 
-            int delta = 0;
+            // We use the "half delta" variant here, with theta == delta - 1/2
+            int theta = 0;
             int m0Inv32 = (int)Inverse32((uint)M[0]);
-            int maxDivsteps = GetMaximumDivsteps(bits);
+            int maxDivsteps = GetMaximumHDDivsteps(bits);
 
             for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30)
             {
-                delta = Divsteps30(delta, F[0], G[0], t);
+                theta = HDDivsteps30(theta, F[0], G[0], t);
                 UpdateDE30(len30, D, E, t, m0Inv32, M);
                 UpdateFG30(len30, F, G, t);
             }
@@ -107,15 +133,17 @@ namespace Org.BouncyCastle.Math.Raw
 
             CNormalize30(len30, signF, D, M);
 
-            Decode30(bits, D, 0, z, 0);
+            Decode30(bits, D, z);
             Debug.Assert(0 != Nat.LessThan(m.Length, z, m));
 
-            return (uint)(EqualTo(len30, F, 1) & EqualToZero(len30, G));
-#endif
+            return (uint)(EqualTo(len30, F, 1) & EqualTo(len30, G, 0));
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint ModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
+        public static bool ModOddInverseVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
+#else
+        public static bool ModOddInverseVar(uint[] m, uint[] x, uint[] z)
+#endif
         {
             int len32 = m.Length;
             Debug.Assert(len32 > 0);
@@ -125,9 +153,14 @@ namespace Org.BouncyCastle.Math.Raw
             int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
             int len30 = (bits + 29) / 30;
 
-            Span<int> alloc = len30 <= 50
-                ? stackalloc int[len30 * 5]
-                : new int[len30 * 5];
+            int clz = bits - Nat.GetBitLength(len32, x);
+            Debug.Assert(clz >= 0);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 5;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
 
             Span<int> t = stackalloc int[4];
             Span<int> D = alloc[..len30]; alloc = alloc[len30..];
@@ -135,68 +168,34 @@ namespace Org.BouncyCastle.Math.Raw
             Span<int> F = alloc[..len30]; alloc = alloc[len30..];
             Span<int> G = alloc[..len30]; alloc = alloc[len30..];
             Span<int> M = alloc[..len30];
-
-            E[0] = 1;
-            Encode30(bits, x, G);
-            Encode30(bits, m, M);
-            M.CopyTo(F);
-
-            int delta = 0;
-            int m0Inv32 = (int)Inverse32((uint)M[0]);
-            int maxDivsteps = GetMaximumDivsteps(bits);
-
-            for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30)
-            {
-                delta = Divsteps30(delta, F[0], G[0], t);
-                UpdateDE30(len30, D, E, t, m0Inv32, M);
-                UpdateFG30(len30, F, G, t);
-            }
-
-            int signF = F[len30 - 1] >> 31;
-            CNegate30(len30, signF, F);
-
-            CNormalize30(len30, signF, D, M);
-
-            Decode30(bits, D, z);
-            Debug.Assert(0 != Nat.LessThan(m.Length, z, m));
-
-            return (uint)(EqualTo(len30, F, 1) & EqualToZero(len30, G));
-        }
-#endif
-
-        public static bool ModOddInverseVar(uint[] m, uint[] x, uint[] z)
-        {
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            return ModOddInverseVar(m.AsSpan(), x.AsSpan(), z.AsSpan());
 #else
-            int len32 = m.Length;
-            Debug.Assert(len32 > 0);
-            Debug.Assert((m[0] & 1) != 0);
-            Debug.Assert(m[len32 - 1] != 0);
-
-            int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
-            int len30 = (bits + 29) / 30;
-
             int[] t = new int[4];
             int[] D = new int[len30];
             int[] E = new int[len30];
             int[] F = new int[len30];
             int[] G = new int[len30];
             int[] M = new int[len30];
+#endif
 
             E[0] = 1;
-            Encode30(bits, x, 0, G, 0);
-            Encode30(bits, m, 0, M, 0);
+            Encode30(bits, x, G);
+            Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            M.CopyTo(F);
+#else
             Array.Copy(M, 0, F, 0, len30);
+#endif
 
-            int clzG = Integers.NumberOfLeadingZeros(G[len30 - 1] | 1) - (len30 * 30 + 2 - bits);
-            int eta = -1 - clzG;
+            // We use the original safegcd here, with eta == 1 - delta
+            // For shorter x, configure as if low zeros of x had been shifted away by divsteps
+            int eta = -clz;
             int lenDE = len30, lenFG = len30;
             int m0Inv32 = (int)Inverse32((uint)M[0]);
             int maxDivsteps = GetMaximumDivsteps(bits);
 
-            int divsteps = 0;
-            while (!EqualToZeroVar_Unlikely(lenFG, G))
+            int divsteps = clz;
+            while (!EqualToVar(lenFG, G, 0))
             {
                 if (divsteps >= maxDivsteps)
                     return false;
@@ -206,20 +205,7 @@ namespace Org.BouncyCastle.Math.Raw
                 eta = Divsteps30Var(eta, F[0], G[0], t);
                 UpdateDE30(lenDE, D, E, t, m0Inv32, M);
                 UpdateFG30(lenFG, F, G, t);
-
-                int fn = F[lenFG - 1];
-                int gn = G[lenFG - 1];
-
-                int cond = (lenFG - 2) >> 31;
-                cond |= fn ^ (fn >> 31);
-                cond |= gn ^ (gn >> 31);
-
-                if (cond == 0)
-                {
-                    F[lenFG - 2] |= fn << 30;
-                    G[lenFG - 2] |= gn << 30;
-                    --lenFG;
-                }
+                lenFG = TrimFG30Var(lenFG, F, G);
             }
 
             int signF = F[lenFG - 1] >> 31;
@@ -241,7 +227,7 @@ namespace Org.BouncyCastle.Math.Raw
             }
             Debug.Assert(0 == signF);
 
-            if (!EqualToOneVar_Expected(lenFG, F))
+            if (!EqualToVar(lenFG, F, 1))
                 return false;
 
             if (signD < 0)
@@ -250,15 +236,73 @@ namespace Org.BouncyCastle.Math.Raw
             }
             Debug.Assert(0 == signD);
 
-            Decode30(bits, D, 0, z, 0);
+            Decode30(bits, D, z);
             Debug.Assert(!Nat.Gte(m.Length, z, m));
 
             return true;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint ModOddIsCoprime(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x)
+#else
+        public static uint ModOddIsCoprime(uint[] m, uint[] x)
+#endif
+        {
+            int len32 = m.Length;
+            Debug.Assert(len32 > 0);
+            Debug.Assert((m[0] & 1) != 0);
+            Debug.Assert(m[len32 - 1] != 0);
+
+            int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
+            int len30 = (bits + 29) / 30;
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 3;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
+
+            Span<int> t = stackalloc int[4];
+            Span<int> F = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> G = alloc[..len30]; alloc = alloc[len30..];
+            Span<int> M = alloc[..len30];
+#else
+            int[] t = new int[4];
+            int[] F = new int[len30];
+            int[] G = new int[len30];
+            int[] M = new int[len30];
 #endif
+
+            Encode30(bits, x, G);
+            Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            M.CopyTo(F);
+#else
+            Array.Copy(M, 0, F, 0, len30);
+#endif
+
+            // We use the "half delta" variant here, with theta == delta - 1/2
+            int theta = 0;
+            int maxDivsteps = GetMaximumHDDivsteps(bits);
+
+            for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30)
+            {
+                theta = HDDivsteps30(theta, F[0], G[0], t);
+                UpdateFG30(len30, F, G, t);
+            }
+
+            int signF = F[len30 - 1] >> 31;
+            CNegate30(len30, signF, F);
+
+            return (uint)(EqualTo(len30, F, 1) & EqualTo(len30, G, 0));
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static bool ModOddInverseVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z)
+        public static bool ModOddIsCoprimeVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x)
+#else
+        public static bool ModOddIsCoprimeVar(uint[] m, uint[] x)
+#endif
         {
             int len32 = m.Length;
             Debug.Assert(len32 > 0);
@@ -268,30 +312,43 @@ namespace Org.BouncyCastle.Math.Raw
             int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]);
             int len30 = (bits + 29) / 30;
 
-            Span<int> alloc = len30 <= 50
-                ? stackalloc int[len30 * 5]
-                : new int[len30 * 5];
+            int clz = bits - Nat.GetBitLength(len32, x);
+            Debug.Assert(clz >= 0);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            int allocSize = len30 * 3;
+            Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc)
+                ? stackalloc int[allocSize]
+                : new int[allocSize];
 
             Span<int> t = stackalloc int[4];
-            Span<int> D = alloc[..len30]; alloc = alloc[len30..];
-            Span<int> E = alloc[..len30]; alloc = alloc[len30..];
             Span<int> F = alloc[..len30]; alloc = alloc[len30..];
             Span<int> G = alloc[..len30]; alloc = alloc[len30..];
             Span<int> M = alloc[..len30];
+#else
+            int[] t = new int[4];
+            int[] F = new int[len30];
+            int[] G = new int[len30];
+            int[] M = new int[len30];
+#endif
 
-            E[0] = 1;
             Encode30(bits, x, G);
             Encode30(bits, m, M);
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
             M.CopyTo(F);
+#else
+            Array.Copy(M, 0, F, 0, len30);
+#endif
 
-            int clzG = Integers.NumberOfLeadingZeros(G[len30 - 1] | 1) - (len30 * 30 + 2 - bits);
-            int eta = -1 - clzG;
-            int lenDE = len30, lenFG = len30;
-            int m0Inv32 = (int)Inverse32((uint)M[0]);
+            // We use the original safegcd here, with eta == 1 - delta
+            // For shorter x, configure as if low zeros of x had been shifted away by divsteps
+            int eta = -clz;
+            int lenFG = len30;
             int maxDivsteps = GetMaximumDivsteps(bits);
 
-            int divsteps = 0;
-            while (!EqualToZeroVar_Unlikely(lenFG, G))
+            int divsteps = clz;
+            while (!EqualToVar(lenFG, G, 0))
             {
                 if (divsteps >= maxDivsteps)
                     return false;
@@ -299,58 +356,19 @@ namespace Org.BouncyCastle.Math.Raw
                 divsteps += 30;
 
                 eta = Divsteps30Var(eta, F[0], G[0], t);
-                UpdateDE30(lenDE, D, E, t, m0Inv32, M);
                 UpdateFG30(lenFG, F, G, t);
-
-                int fn = F[lenFG - 1];
-                int gn = G[lenFG - 1];
-
-                int cond = (lenFG - 2) >> 31;
-                cond |= fn ^ (fn >> 31);
-                cond |= gn ^ (gn >> 31);
-
-                if (cond == 0)
-                {
-                    F[lenFG - 2] |= fn << 30;
-                    G[lenFG - 2] |= gn << 30;
-                    --lenFG;
-                }
+                lenFG = TrimFG30Var(lenFG, F, G);
             }
 
             int signF = F[lenFG - 1] >> 31;
-
-            /*
-             * D is in the range (-2.M, M). First, conditionally add M if D is negative, to bring it
-             * into the range (-M, M). Then normalize by conditionally negating (according to signF)
-             * and/or then adding M, to bring it into the range [0, M).
-             */
-            int signD = D[lenDE - 1] >> 31;
-            if (signD < 0)
-            {
-                signD = Add30(lenDE, D, M);
-            }
             if (signF < 0)
             {
-                signD = Negate30(lenDE, D);
                 signF = Negate30(lenFG, F);
             }
             Debug.Assert(0 == signF);
 
-            if (!EqualToOneVar_Expected(lenFG, F))
-                return false;
-
-            if (signD < 0)
-            {
-                signD = Add30(lenDE, D, M);
-            }
-            Debug.Assert(0 == signD);
-
-            Decode30(bits, D, z);
-            Debug.Assert(!Nat.Gte(m.Length, z, m));
-
-            return true;
+            return EqualToVar(lenFG, F, 1);
         }
-#endif
 
         public static uint[] Random(SecureRandom random, uint[] p)
         {
@@ -392,9 +410,10 @@ namespace Org.BouncyCastle.Math.Raw
             m |= m >> 8;
             m |= m >> 16;
 
-            Span<byte> bytes = len <= 256
-                ? stackalloc byte[len << 2]
-                : new byte[len << 2];
+            int allocSize = len * Integers.NumBytes;
+            Span<byte> bytes = allocSize <= MaxStackAlloc
+                ? stackalloc byte[allocSize]
+                : new byte[allocSize];
 
             do
             {
@@ -496,34 +515,16 @@ namespace Org.BouncyCastle.Math.Raw
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static void Decode30(int bits, ReadOnlySpan<int> x, Span<uint> z)
-        {
-            Debug.Assert(bits > 0);
-
-            int avail = 0;
-            ulong data = 0L;
-
-            int xOff = 0, zOff = 0;
-            while (bits > 0)
-            {
-                while (avail < System.Math.Min(32, bits))
-                {
-                    data |= (ulong)x[xOff++] << avail;
-                    avail += 30;
-                }
-
-                z[zOff++] = (uint)data; data >>= 32;
-                avail -= 32;
-                bits -= 32;
-            }
-        }
 #else
-        private static void Decode30(int bits, int[] x, int xOff, uint[] z, int zOff)
+        private static void Decode30(int bits, int[] x, uint[] z)
+#endif
         {
             Debug.Assert(bits > 0);
 
             int avail = 0;
-            ulong data = 0L;
+            ulong data = 0UL;
 
+            int xOff = 0, zOff = 0;
             while (bits > 0)
             {
                 while (avail < System.Math.Min(32, bits))
@@ -537,53 +538,6 @@ namespace Org.BouncyCastle.Math.Raw
                 bits -= 32;
             }
         }
-#endif
-
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static int Divsteps30(int delta, int f0, int g0, Span<int> t)
-#else
-        private static int Divsteps30(int delta, int f0, int g0, int[] t)
-#endif
-        {
-            int u = 1 << 30, v = 0, q = 0, r = 1 << 30;
-            int f = f0, g = g0;
-
-            for (int i = 0; i < 30; ++i)
-            {
-                Debug.Assert((f & 1) == 1);
-                Debug.Assert(((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0) == f << i);
-                Debug.Assert(((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0) == g << i);
-
-                int c1 = delta >> 31;
-                int c2 = -(g & 1);
-
-                int x = f ^ c1;
-                int y = u ^ c1;
-                int z = v ^ c1;
-
-                g -= x & c2;
-                q -= y & c2;
-                r -= z & c2;
-
-                c2 &= ~c1;
-                delta = (delta ^ c2) - (c2 - 1);
-
-                f += g & c2;
-                u += q & c2;
-                v += r & c2;
-
-                g >>= 1;
-                q >>= 1;
-                r >>= 1;
-            }
-
-            t[0] = u;
-            t[1] = v;
-            t[2] = q;
-            t[3] = r;
-
-            return delta;
-        }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static int Divsteps30Var(int eta, int f0, int g0, Span<int> t)
@@ -595,7 +549,7 @@ namespace Org.BouncyCastle.Math.Raw
             int f = f0, g = g0, m, w, x, y, z;
             int i = 30, limit, zeros;
 
-            for (; ; )
+            for (;;)
             {
                 // Use a sentinel bit to count zeros only up to i.
                 zeros = Integers.NumberOfTrailingZeros(g | (-1 << i));
@@ -614,15 +568,15 @@ namespace Org.BouncyCastle.Math.Raw
                 Debug.Assert((u * f0 + v * g0) == f << (30 - i));
                 Debug.Assert((q * f0 + r * g0) == g << (30 - i));
 
-                if (eta < 0)
+                if (eta <= 0)
                 {
-                    eta = -eta;
+                    eta = 2 - eta;
                     x = f; f = g; g = -x;
                     y = u; u = q; q = -y;
                     z = v; v = r; r = -z;
 
                     // Handle up to 6 divsteps at once, subject to eta and i.
-                    limit = (eta + 1) > i ? i : (eta + 1);
+                    limit = eta > i ? i : eta;
                     m = (int)((uint.MaxValue >> (32 - limit)) & 63U);
 
                     w = (f * g * (f * f - 2)) & m;
@@ -630,11 +584,11 @@ namespace Org.BouncyCastle.Math.Raw
                 else
                 {
                     // Handle up to 4 divsteps at once, subject to eta and i.
-                    limit = (eta + 1) > i ? i : (eta + 1);
+                    limit = eta > i ? i : eta;
                     m = (int)((uint.MaxValue >> (32 - limit)) & 15U);
 
                     w = f + (((f + 1) & 4) << 1);
-                    w = (-w * g) & m;
+                    w = (w * -g) & m;
                 }
 
                 g += f * w;
@@ -654,34 +608,16 @@ namespace Org.BouncyCastle.Math.Raw
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static void Encode30(int bits, ReadOnlySpan<uint> x, Span<int> z)
-        {
-            Debug.Assert(bits > 0);
-
-            int avail = 0;
-            ulong data = 0UL;
-
-            int xOff = 0, zOff = 0;
-            while (bits > 0)
-            {
-                if (avail < System.Math.Min(30, bits))
-                {
-                    data |= (x[xOff++] & M32UL) << avail;
-                    avail += 32;
-                }
-
-                z[zOff++] = (int)data & M30; data >>= 30;
-                avail -= 30;
-                bits -= 30;
-            }
-        }
 #else
-        private static void Encode30(int bits, uint[] x, int xOff, int[] z, int zOff)
+        private static void Encode30(int bits, uint[] x, int[] z)
+#endif
         {
             Debug.Assert(bits > 0);
 
             int avail = 0;
             ulong data = 0UL;
 
+            int xOff = 0, zOff = 0;
             while (bits > 0)
             {
                 if (avail < System.Math.Min(30, bits))
@@ -695,7 +631,6 @@ namespace Org.BouncyCastle.Math.Raw
                 bits -= 30;
             }
         }
-#endif
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static int EqualTo(int len, ReadOnlySpan<int> x, int y)
@@ -713,12 +648,15 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static bool EqualToOneVar_Expected(int len, ReadOnlySpan<int> x)
+        private static bool EqualToVar(int len, ReadOnlySpan<int> x, int y)
 #else
-        private static bool EqualToOneVar_Expected(int len, int[] x)
+        private static bool EqualToVar(int len, int[] x, int y)
 #endif
         {
-            int d = x[0] ^ 1;
+            int d = x[0] ^ y;
+            if (d != 0)
+                return false;
+
             for (int i = 1; i < len; ++i)
             {
                 d |= x[i];
@@ -726,41 +664,62 @@ namespace Org.BouncyCastle.Math.Raw
             return d == 0;
         }
 
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static int EqualToZero(int len, ReadOnlySpan<int> x)
-#else
-        private static int EqualToZero(int len, int[] x)
-#endif
+        private static int GetMaximumDivsteps(int bits)
         {
-            int d = 0;
-            for (int i = 0; i < len; ++i)
-            {
-                d |= x[i];
-            }
-            d = (int)((uint)d >> 1) | (d & 1);
-            return (d - 1) >> 31;
+            //return (49 * bits + (bits < 46 ? 80 : 47)) / 17;
+            return (int)((188898L * bits + (bits < 46 ? 308405 : 181188)) >> 16);
+        }
+
+        private static int GetMaximumHDDivsteps(int bits)
+        {
+            //return (int)((45907L * bits + 30179) / 19929);
+            return (int)((150964L * bits + 99243) >> 16);
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        private static bool EqualToZeroVar_Unlikely(int len, ReadOnlySpan<int> x)
+        private static int HDDivsteps30(int theta, int f0, int g0, Span<int> t)
 #else
-        private static bool EqualToZeroVar_Unlikely(int len, int[] x)
+        private static int HDDivsteps30(int theta, int f0, int g0, int[] t)
 #endif
         {
-            int d = x[0];
-            if (d != 0)
-                return false;
+            int u = 1 << 30, v = 0, q = 0, r = 1 << 30;
+            int f = f0, g = g0;
 
-            for (int i = 1; i < len; ++i)
+            for (int i = 0; i < 30; ++i)
             {
-                d |= x[i];
+                Debug.Assert((f & 1) == 1);
+                Debug.Assert(((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0) == f << i);
+                Debug.Assert(((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0) == g << i);
+
+                int c1 = theta >> 31;
+                int c2 = -(g & 1);
+
+                int x = f ^ c1;
+                int y = u ^ c1;
+                int z = v ^ c1;
+
+                g -= x & c2;
+                q -= y & c2;
+                r -= z & c2;
+
+                int c3 = c2 & ~c1;
+                theta = (theta ^ c3) + 1;
+
+                f += g & c3;
+                u += q & c3;
+                v += r & c3;
+
+                g >>= 1;
+                q >>= 1;
+                r >>= 1;
             }
-            return d == 0;
-        }
 
-        private static int GetMaximumDivsteps(int bits)
-        {
-            return (49 * bits + (bits < 46 ? 80 : 47)) / 17;
+            t[0] = u;
+            t[1] = v;
+            t[2] = q;
+            t[3] = r;
+
+            return theta;
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -784,6 +743,33 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static int TrimFG30Var(int len30, Span<int> F, Span<int> G)
+#else
+        private static int TrimFG30Var(int len30, int[] F, int[] G)
+#endif
+        {
+            Debug.Assert(len30 > 0);
+            Debug.Assert(F.Length >= len30);
+            Debug.Assert(G.Length >= len30);
+
+            int fn = F[len30 - 1];
+            int gn = G[len30 - 1];
+
+            int cond = (len30 - 2) >> 31;
+            cond |= fn ^ (fn >> 31);
+            cond |= gn ^ (gn >> 31);
+
+            if (cond == 0)
+            {
+                F[len30 - 2] |= fn << 30;
+                G[len30 - 2] |= gn << 30;
+                --len30;
+            }
+
+            return len30;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         private static void UpdateDE30(int len30, Span<int> D, Span<int> E, ReadOnlySpan<int> t, int m0Inv32,
             ReadOnlySpan<int> M)
 #else
diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
index d748e04c5..b524750d8 100644
--- a/crypto/src/math/raw/Nat.cs
+++ b/crypto/src/math/raw/Nat.cs
@@ -6,6 +6,7 @@ using System.Runtime.InteropServices;
 #endif
 
 using Org.BouncyCastle.Crypto.Utilities;
+using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Math.Raw
 {
@@ -400,6 +401,36 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+        public static uint CAddTo(int len, int mask, uint[] x, uint[] z)
+        {
+            uint MASK = (uint)-(mask & 1);
+
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)z[i] + (x[i] & MASK);
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint CAddTo(int len, int mask, ReadOnlySpan<uint> x, Span<uint> z)
+        {
+            uint MASK = (uint)-(mask & 1);
+
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)z[i] + (x[i] & MASK);
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+#endif
+
         public static void CMov(int len, int mask, uint[] x, int xOff, uint[] z, int zOff)
         {
             uint MASK = (uint)-(mask & 1);
@@ -712,7 +743,11 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint EqualTo(int len, ReadOnlySpan<uint> x, uint y)
+#else
         public static uint EqualTo(int len, uint[] x, uint y)
+#endif
         {
             uint d = x[0] ^ y;
             for (int i = 1; i < len; ++i)
@@ -735,19 +770,10 @@ namespace Org.BouncyCastle.Math.Raw
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint EqualTo(int len, ReadOnlySpan<uint> x, uint y)
-        {
-            uint d = x[0] ^ y;
-            for (int i = 1; i < len; ++i)
-            {
-                d |= x[i];
-            }
-            d = (d >> 1) | (d & 1);
-            return (uint)(((int)d - 1) >> 31);
-        }
-#endif
-
+        public static uint EqualTo(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y)
+#else
         public static uint EqualTo(int len, uint[] x, uint[] y)
+#endif
         {
             uint d = 0;
             for (int i = 0; i < len; ++i)
@@ -769,20 +795,12 @@ namespace Org.BouncyCastle.Math.Raw
             return (uint)(((int)d - 1) >> 31);
         }
 
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint EqualTo(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y)
-        {
-            uint d = 0;
-            for (int i = 0; i < len; ++i)
-            {
-                d |= x[i] ^ y[i];
-            }
-            d = (d >> 1) | (d & 1);
-            return (uint)(((int)d - 1) >> 31);
-        }
-#endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint EqualToZero(int len, ReadOnlySpan<uint> x)
+#else
         public static uint EqualToZero(int len, uint[] x)
+#endif
         {
             uint d = 0;
             for (int i = 0; i < len; ++i)
@@ -804,19 +822,6 @@ namespace Org.BouncyCastle.Math.Raw
             return (uint)(((int)d - 1) >> 31);
         }
 
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        public static uint EqualToZero(int len, ReadOnlySpan<uint> x)
-        {
-            uint d = 0;
-            for (int i = 0; i < len; ++i)
-            {
-                d |= x[i];
-            }
-            d = (d >> 1) | (d & 1);
-            return (uint)(((int)d - 1) >> 31);
-        }
-#endif
-
         public static uint[] FromBigInteger(int bits, BigInteger x)
         {
             if (x.SignValue < 0 || x.BitLength > bits)
@@ -924,6 +929,32 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static int GetBitLength(int len, ReadOnlySpan<uint> x)
+#else
+        public static int GetBitLength(int len, uint[] x)
+#endif
+        {
+            for (int i = len - 1; i >= 0; --i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                    return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x_i);
+            }
+            return 0;
+        }
+
+        public static int GetBitLength(int len, uint[] x, int xOff)
+        {
+            for (int i = len - 1; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i];
+                if (x_i != 0)
+                    return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x_i);
+            }
+            return 0;
+        }
+
         public static int GetLengthForBits(int bits)
         {
             if (bits < 1)
diff --git a/crypto/src/math/raw/Nat256.cs b/crypto/src/math/raw/Nat256.cs
index 59039d3fa..49adf04af 100644
--- a/crypto/src/math/raw/Nat256.cs
+++ b/crypto/src/math/raw/Nat256.cs
@@ -1865,7 +1865,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
@@ -1880,7 +1881,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs
index 56fa9a2c9..71b53214c 100644
--- a/crypto/src/math/raw/Nat512.cs
+++ b/crypto/src/math/raw/Nat512.cs
@@ -67,7 +67,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Y = MemoryMarshal.AsBytes(y[..16]);
@@ -87,7 +88,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Y = MemoryMarshal.AsBytes(y[..16]);
@@ -145,7 +147,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Z = MemoryMarshal.AsBytes(z[..16]);
@@ -164,7 +167,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..16]);
                 var Z = MemoryMarshal.AsBytes(z[..16]);
@@ -221,7 +225,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
@@ -241,7 +246,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Y = MemoryMarshal.AsBytes(y[..8]);
@@ -299,7 +305,8 @@ namespace Org.BouncyCastle.Math.Raw
         public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z)
         {
 #if NETCOREAPP3_0_OR_GREATER
-            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Z = MemoryMarshal.AsBytes(z[..8]);
@@ -318,7 +325,8 @@ namespace Org.BouncyCastle.Math.Raw
                 return;
             }
 
-            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
+            if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled &&
+                Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked)
             {
                 var X = MemoryMarshal.AsBytes(x[..8]);
                 var Z = MemoryMarshal.AsBytes(z[..8]);