summary refs log tree commit diff
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rw-r--r--crypto/src/math/ec/rfc8032/Ed25519.cs14
-rw-r--r--crypto/src/math/ec/rfc8032/Ed448.cs14
-rw-r--r--crypto/src/math/ec/rfc8032/Scalar25519.cs309
-rw-r--r--crypto/src/math/ec/rfc8032/Scalar448.cs439
4 files changed, 704 insertions, 72 deletions
diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs
index 766ccb393..8add3c48b 100644
--- a/crypto/src/math/ec/rfc8032/Ed25519.cs
+++ b/crypto/src/math/ec/rfc8032/Ed25519.cs
@@ -149,7 +149,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
             byte[] result = new byte[ScalarBytes * 2];
             Codec.Encode32(t, 0, t.Length, result, 0);
-            return Scalar25519.Reduce(result);
+            return Scalar25519.Reduce512(result);
         }
 
         private static bool CheckContextVar(byte[] ctx, byte phflag)
@@ -592,7 +592,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] r = Scalar25519.Reduce(h);
+            byte[] r = Scalar25519.Reduce512(h);
             byte[] R = new byte[PointBytes];
             ScalarMultBaseEncoded(r, R, 0);
 
@@ -605,7 +605,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
             byte[] S = CalculateS(r, k, s);
 
             Array.Copy(R, 0, sig, sigOff, PointBytes);
@@ -697,7 +697,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.DoFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar25519.Reduce(h, k);
+            Scalar25519.Reduce512(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -739,7 +739,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -799,7 +799,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.DoFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar25519.Reduce(h, k);
+            Scalar25519.Reduce512(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -840,7 +840,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar25519.Decode(k, nA);
diff --git a/crypto/src/math/ec/rfc8032/Ed448.cs b/crypto/src/math/ec/rfc8032/Ed448.cs
index bde1461e6..6eee639fe 100644
--- a/crypto/src/math/ec/rfc8032/Ed448.cs
+++ b/crypto/src/math/ec/rfc8032/Ed448.cs
@@ -118,7 +118,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
             byte[] result = new byte[ScalarBytes * 2];
             Codec.Encode32(t, 0, t.Length, result, 0);
-            return Scalar448.Reduce(result);
+            return Scalar448.Reduce912(result);
         }
 
         private static bool CheckContextVar(byte[] ctx)
@@ -545,7 +545,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] r = Scalar448.Reduce(h);
+            byte[] r = Scalar448.Reduce912(h);
             byte[] R = new byte[PointBytes];
             ScalarMultBaseEncoded(r, R, 0);
 
@@ -555,7 +555,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
             byte[] S = CalculateS(r, k, s);
 
             Array.Copy(R, 0, sig, sigOff, PointBytes);
@@ -644,7 +644,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.OutputFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar448.Reduce(h, k);
+            Scalar448.Reduce912(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -683,7 +683,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -740,7 +740,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.OutputFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar448.Reduce(h, k);
+            Scalar448.Reduce912(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -778,7 +778,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar448.Decode(k, nA);
diff --git a/crypto/src/math/ec/rfc8032/Scalar25519.cs b/crypto/src/math/ec/rfc8032/Scalar25519.cs
index df31929cd..67eee6155 100644
--- a/crypto/src/math/ec/rfc8032/Scalar25519.cs
+++ b/crypto/src/math/ec/rfc8032/Scalar25519.cs
@@ -16,9 +16,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
         private const int ScalarBytes = Size * 4;
 
-        private const long M08L = 0x000000FFL;
         private const long M28L = 0x0FFFFFFFL;
-        private const long M32L = 0xFFFFFFFFL;
 
         private const int TargetLength = 254;
 
@@ -72,7 +70,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         internal static void Multiply128Var(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y128, Span<uint> z)
         {
-            Span<uint> tt = stackalloc uint[16];
+            Span<uint> tt = stackalloc uint[12];
             Nat256.Mul128(x, y128, tt);
 
             if ((int)y128[3] < 0)
@@ -81,9 +79,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat256.SubFrom(x, tt[4..], 0);
             }
 
-            Span<byte> r = MemoryMarshal.AsBytes(tt);
-            Reduce(r, r);
-            tt[..Size].CopyTo(z);
+            if (BitConverter.IsLittleEndian)
+            {
+                Span<byte> r = MemoryMarshal.AsBytes(tt);
+                Reduce384(r, r);
+                tt[..Size].CopyTo(z);
+            }
+            else
+            {
+                Span<byte> r = stackalloc byte[48];
+                Codec.Encode32(tt, r);
+
+                Reduce384(r, r);
+                Decode(r, z);
+            }
         }
 #else
         internal static void Multiply128Var(uint[] x, uint[] y128, uint[] z)
@@ -97,40 +106,242 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat256.SubFrom(x, 0, tt, 4, 0);
             }
 
-            byte[] bytes = new byte[64];
+            byte[] bytes = new byte[48];
             Codec.Encode32(tt, 0, 12, bytes, 0);
 
-            byte[] r = Reduce(bytes);
+            byte[] r = Reduce384(bytes);
             Decode(r, z);
         }
 #endif
 
-        internal static byte[] Reduce(byte[] n)
+        internal static byte[] Reduce384(byte[] n)
+        {
+            byte[] r = new byte[ScalarBytes];
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Reduce384(n, r);
+#else
+            long x00 =  Codec.Decode32(n,  0);          // x00:32/--
+            long x01 = (Codec.Decode24(n,  4) << 4);    // x01:28/--
+            long x02 =  Codec.Decode32(n,  7);          // x02:32/--
+            long x03 = (Codec.Decode24(n, 11) << 4);    // x03:28/--
+            long x04 =  Codec.Decode32(n, 14);          // x04:32/--
+            long x05 = (Codec.Decode24(n, 18) << 4);    // x05:28/--
+            long x06 =  Codec.Decode32(n, 21);          // x06:32/--
+            long x07 = (Codec.Decode24(n, 25) << 4);    // x07:28/--
+            long x08 =  Codec.Decode32(n, 28);          // x08:32/--
+            long x09 = (Codec.Decode24(n, 32) << 4);    // x09:28/--
+            long x10 =  Codec.Decode32(n, 35);          // x10:32/--
+            long x11 = (Codec.Decode24(n, 39) << 4);    // x11:28/--
+            long x12 =  Codec.Decode32(n, 42);          // x12:32/--
+            long x13 = (Codec.Decode16(n, 46) << 4);    // x13:20/--
+            long t;
+
+            // TODO Fix bounds calculations which were copied from Reduce512
+
+            x13 += (x12 >> 28); x12 &= M28L;            // x13:28/22, x12:28/--
+            x04 -= x13 * L0;                            // x04:54/49
+            x05 -= x13 * L1;                            // x05:54/53
+            x06 -= x13 * L2;                            // x06:56/--
+            x07 -= x13 * L3;                            // x07:56/52
+            x08 -= x13 * L4;                            // x08:56/52
+
+            x12 += (x11 >> 28); x11 &= M28L;            // x12:28/24, x11:28/--
+            x03 -= x12 * L0;                            // x03:54/49
+            x04 -= x12 * L1;                            // x04:54/51
+            x05 -= x12 * L2;                            // x05:56/--
+            x06 -= x12 * L3;                            // x06:56/52
+            x07 -= x12 * L4;                            // x07:56/53
+
+            x11 += (x10 >> 28); x10 &= M28L;            // x11:29/--, x10:28/--
+            x02 -= x11 * L0;                            // x02:55/32
+            x03 -= x11 * L1;                            // x03:55/--
+            x04 -= x11 * L2;                            // x04:56/55
+            x05 -= x11 * L3;                            // x05:56/52
+            x06 -= x11 * L4;                            // x06:56/53
+
+            x10 += (x09 >> 28); x09 &= M28L;            // x10:29/--, x09:28/--
+            x01 -= x10 * L0;                            // x01:55/28
+            x02 -= x10 * L1;                            // x02:55/54
+            x03 -= x10 * L2;                            // x03:56/55
+            x04 -= x10 * L3;                            // x04:57/--
+            x05 -= x10 * L4;                            // x05:56/53
+
+            x08 += (x07 >> 28); x07 &= M28L;            // x08:56/53, x07:28/--
+            x09 += (x08 >> 28); x08 &= M28L;            // x09:29/25, x08:28/--
+
+            t    = (x08 >> 27) & 1L;
+            x09 += t;                                   // x09:29/26
+
+            x00 -= x09 * L0;                            // x00:55/53
+            x01 -= x09 * L1;                            // x01:55/54
+            x02 -= x09 * L2;                            // x02:57/--
+            x03 -= x09 * L3;                            // x03:57/--
+            x04 -= x09 * L4;                            // x04:57/42
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+            x09  = (x08 >> 28); x08 &= M28L;
+
+            x09 -= t;
+
+            Debug.Assert(x09 == 0L || x09 == -1L);
+
+            x00 += x09 & L0;
+            x01 += x09 & L1;
+            x02 += x09 & L2;
+            x03 += x09 & L3;
+            x04 += x09 & L4;
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+
+            Codec.Encode56((ulong)(x00 | (x01 << 28)), r, 0);
+            Codec.Encode56((ulong)(x02 | (x03 << 28)), r, 7);
+            Codec.Encode56((ulong)(x04 | (x05 << 28)), r, 14);
+            Codec.Encode56((ulong)(x06 | (x07 << 28)), r, 21);
+            Codec.Encode32((uint)x08, r, 28);
+#endif
+
+            return r;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        internal static void Reduce384(ReadOnlySpan<byte> n, Span<byte> r)
+        {
+            long x00 =  Codec.Decode32(n[ 0..]);        // x00:32/--
+            long x01 = (Codec.Decode24(n[ 4..]) << 4);  // x01:28/--
+            long x02 =  Codec.Decode32(n[ 7..]);        // x02:32/--
+            long x03 = (Codec.Decode24(n[11..]) << 4);  // x03:28/--
+            long x04 =  Codec.Decode32(n[14..]);        // x04:32/--
+            long x05 = (Codec.Decode24(n[18..]) << 4);  // x05:28/--
+            long x06 =  Codec.Decode32(n[21..]);        // x06:32/--
+            long x07 = (Codec.Decode24(n[25..]) << 4);  // x07:28/--
+            long x08 =  Codec.Decode32(n[28..]);        // x08:32/--
+            long x09 = (Codec.Decode24(n[32..]) << 4);  // x09:28/--
+            long x10 =  Codec.Decode32(n[35..]);        // x10:32/--
+            long x11 = (Codec.Decode24(n[39..]) << 4);  // x11:28/--
+            long x12 =  Codec.Decode32(n[42..]);        // x12:32/--
+            long x13 = (Codec.Decode16(n[46..]) << 4);  // x13:20/--
+            long t;
+
+            // TODO Fix bounds calculations which were copied from Reduce512
+
+            x13 += (x12 >> 28); x12 &= M28L;            // x13:28/22, x12:28/--
+            x04 -= x13 * L0;                            // x04:54/49
+            x05 -= x13 * L1;                            // x05:54/53
+            x06 -= x13 * L2;                            // x06:56/--
+            x07 -= x13 * L3;                            // x07:56/52
+            x08 -= x13 * L4;                            // x08:56/52
+
+            x12 += (x11 >> 28); x11 &= M28L;            // x12:28/24, x11:28/--
+            x03 -= x12 * L0;                            // x03:54/49
+            x04 -= x12 * L1;                            // x04:54/51
+            x05 -= x12 * L2;                            // x05:56/--
+            x06 -= x12 * L3;                            // x06:56/52
+            x07 -= x12 * L4;                            // x07:56/53
+
+            x11 += (x10 >> 28); x10 &= M28L;            // x11:29/--, x10:28/--
+            x02 -= x11 * L0;                            // x02:55/32
+            x03 -= x11 * L1;                            // x03:55/--
+            x04 -= x11 * L2;                            // x04:56/55
+            x05 -= x11 * L3;                            // x05:56/52
+            x06 -= x11 * L4;                            // x06:56/53
+
+            x10 += (x09 >> 28); x09 &= M28L;            // x10:29/--, x09:28/--
+            x01 -= x10 * L0;                            // x01:55/28
+            x02 -= x10 * L1;                            // x02:55/54
+            x03 -= x10 * L2;                            // x03:56/55
+            x04 -= x10 * L3;                            // x04:57/--
+            x05 -= x10 * L4;                            // x05:56/53
+
+            x08 += (x07 >> 28); x07 &= M28L;            // x08:56/53, x07:28/--
+            x09 += (x08 >> 28); x08 &= M28L;            // x09:29/25, x08:28/--
+
+            t    = (x08 >> 27) & 1L;
+            x09 += t;                                   // x09:29/26
+
+            x00 -= x09 * L0;                            // x00:55/53
+            x01 -= x09 * L1;                            // x01:55/54
+            x02 -= x09 * L2;                            // x02:57/--
+            x03 -= x09 * L3;                            // x03:57/--
+            x04 -= x09 * L4;                            // x04:57/42
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+            x09  = (x08 >> 28); x08 &= M28L;
+
+            x09 -= t;
+
+            Debug.Assert(x09 == 0L || x09 == -1L);
+
+            x00 += x09 & L0;
+            x01 += x09 & L1;
+            x02 += x09 & L2;
+            x03 += x09 & L3;
+            x04 += x09 & L4;
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+
+            Codec.Encode56((ulong)(x00 | (x01 << 28)), r);
+            Codec.Encode56((ulong)(x02 | (x03 << 28)), r[7..]);
+            Codec.Encode56((ulong)(x04 | (x05 << 28)), r[14..]);
+            Codec.Encode56((ulong)(x06 | (x07 << 28)), r[21..]);
+            Codec.Encode32((uint)x08, r[28..]);
+        }
+#endif
+
+        internal static byte[] Reduce512(byte[] n)
         {
             byte[] r = new byte[ScalarBytes];
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            Reduce(n, r);
+            Reduce512(n, r);
 #else
-            long x00 =  Codec.Decode32(n,  0)       & M32L;         // x00:32/--
-            long x01 = (Codec.Decode24(n,  4) << 4) & M32L;         // x01:28/--
-            long x02 =  Codec.Decode32(n,  7)       & M32L;         // x02:32/--
-            long x03 = (Codec.Decode24(n, 11) << 4) & M32L;         // x03:28/--
-            long x04 =  Codec.Decode32(n, 14)       & M32L;         // x04:32/--
-            long x05 = (Codec.Decode24(n, 18) << 4) & M32L;         // x05:28/--
-            long x06 =  Codec.Decode32(n, 21)       & M32L;         // x06:32/--
-            long x07 = (Codec.Decode24(n, 25) << 4) & M32L;         // x07:28/--
-            long x08 =  Codec.Decode32(n, 28)       & M32L;         // x08:32/--
-            long x09 = (Codec.Decode24(n, 32) << 4) & M32L;         // x09:28/--
-            long x10 =  Codec.Decode32(n, 35)       & M32L;         // x10:32/--
-            long x11 = (Codec.Decode24(n, 39) << 4) & M32L;         // x11:28/--
-            long x12 =  Codec.Decode32(n, 42)       & M32L;         // x12:32/--
-            long x13 = (Codec.Decode24(n, 46) << 4) & M32L;         // x13:28/--
-            long x14 =  Codec.Decode32(n, 49)       & M32L;         // x14:32/--
-            long x15 = (Codec.Decode24(n, 53) << 4) & M32L;         // x15:28/--
-            long x16 =  Codec.Decode32(n, 56)       & M32L;         // x16:32/--
-            long x17 = (Codec.Decode24(n, 60) << 4) & M32L;         // x17:28/--
-            long x18 =                 n[63]        & M08L;         // x18:08/--
+            long x00 =  Codec.Decode32(n,  0);          // x00:32/--
+            long x01 = (Codec.Decode24(n,  4) << 4);    // x01:28/--
+            long x02 =  Codec.Decode32(n,  7);          // x02:32/--
+            long x03 = (Codec.Decode24(n, 11) << 4);    // x03:28/--
+            long x04 =  Codec.Decode32(n, 14);          // x04:32/--
+            long x05 = (Codec.Decode24(n, 18) << 4);    // x05:28/--
+            long x06 =  Codec.Decode32(n, 21);          // x06:32/--
+            long x07 = (Codec.Decode24(n, 25) << 4);    // x07:28/--
+            long x08 =  Codec.Decode32(n, 28);          // x08:32/--
+            long x09 = (Codec.Decode24(n, 32) << 4);    // x09:28/--
+            long x10 =  Codec.Decode32(n, 35);          // x10:32/--
+            long x11 = (Codec.Decode24(n, 39) << 4);    // x11:28/--
+            long x12 =  Codec.Decode32(n, 42);          // x12:32/--
+            long x13 = (Codec.Decode24(n, 46) << 4);    // x13:28/--
+            long x14 =  Codec.Decode32(n, 49);          // x14:32/--
+            long x15 = (Codec.Decode24(n, 53) << 4);    // x15:28/--
+            long x16 =  Codec.Decode32(n, 56);          // x16:32/--
+            long x17 = (Codec.Decode24(n, 60) << 4);    // x17:28/--
+            long x18 =                 n[63];           // x18:08/--
             long t;
 
             //x18 += (x17 >> 28); x17 &= M28L;
@@ -248,27 +459,27 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r)
+        internal static void Reduce512(ReadOnlySpan<byte> n, Span<byte> r)
         {
-            long x00 =  Codec.Decode32(n[ 0..])       & M32L;       // x00:32/--
-            long x01 = (Codec.Decode24(n[ 4..]) << 4) & M32L;       // x01:28/--
-            long x02 =  Codec.Decode32(n[ 7..])       & M32L;       // x02:32/--
-            long x03 = (Codec.Decode24(n[11..]) << 4) & M32L;       // x03:28/--
-            long x04 =  Codec.Decode32(n[14..])       & M32L;       // x04:32/--
-            long x05 = (Codec.Decode24(n[18..]) << 4) & M32L;       // x05:28/--
-            long x06 =  Codec.Decode32(n[21..])       & M32L;       // x06:32/--
-            long x07 = (Codec.Decode24(n[25..]) << 4) & M32L;       // x07:28/--
-            long x08 =  Codec.Decode32(n[28..])       & M32L;       // x08:32/--
-            long x09 = (Codec.Decode24(n[32..]) << 4) & M32L;       // x09:28/--
-            long x10 =  Codec.Decode32(n[35..])       & M32L;       // x10:32/--
-            long x11 = (Codec.Decode24(n[39..]) << 4) & M32L;       // x11:28/--
-            long x12 =  Codec.Decode32(n[42..])       & M32L;       // x12:32/--
-            long x13 = (Codec.Decode24(n[46..]) << 4) & M32L;       // x13:28/--
-            long x14 =  Codec.Decode32(n[49..])       & M32L;       // x14:32/--
-            long x15 = (Codec.Decode24(n[53..]) << 4) & M32L;       // x15:28/--
-            long x16 =  Codec.Decode32(n[56..])       & M32L;       // x16:32/--
-            long x17 = (Codec.Decode24(n[60..]) << 4) & M32L;       // x17:28/--
-            long x18 =                 n[63]          & M08L;       // x18:08/--
+            long x00 =  Codec.Decode32(n[ 0..]);        // x00:32/--
+            long x01 = (Codec.Decode24(n[ 4..]) << 4);  // x01:28/--
+            long x02 =  Codec.Decode32(n[ 7..]);        // x02:32/--
+            long x03 = (Codec.Decode24(n[11..]) << 4);  // x03:28/--
+            long x04 =  Codec.Decode32(n[14..]);        // x04:32/--
+            long x05 = (Codec.Decode24(n[18..]) << 4);  // x05:28/--
+            long x06 =  Codec.Decode32(n[21..]);        // x06:32/--
+            long x07 = (Codec.Decode24(n[25..]) << 4);  // x07:28/--
+            long x08 =  Codec.Decode32(n[28..]);        // x08:32/--
+            long x09 = (Codec.Decode24(n[32..]) << 4);  // x09:28/--
+            long x10 =  Codec.Decode32(n[35..]);        // x10:32/--
+            long x11 = (Codec.Decode24(n[39..]) << 4);  // x11:28/--
+            long x12 =  Codec.Decode32(n[42..]);        // x12:32/--
+            long x13 = (Codec.Decode24(n[46..]) << 4);  // x13:28/--
+            long x14 =  Codec.Decode32(n[49..]);        // x14:32/--
+            long x15 = (Codec.Decode24(n[53..]) << 4);  // x15:28/--
+            long x16 =  Codec.Decode32(n[56..]);        // x16:32/--
+            long x17 = (Codec.Decode24(n[60..]) << 4);  // x17:28/--
+            long x18 =                 n[63];           // x18:08/--
             long t;
 
             //x18 += (x17 >> 28); x17 &= M28L;
diff --git a/crypto/src/math/ec/rfc8032/Scalar448.cs b/crypto/src/math/ec/rfc8032/Scalar448.cs
index 4afe1d2d6..124b91250 100644
--- a/crypto/src/math/ec/rfc8032/Scalar448.cs
+++ b/crypto/src/math/ec/rfc8032/Scalar448.cs
@@ -97,7 +97,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         {
             Debug.Assert((int)y225[7] >> 31 == (int)y225[7]);
 
-            Span<uint> tt = stackalloc uint[29];
+            Span<uint> tt = stackalloc uint[22];
             Nat.Mul(y225, x, tt);
 
             if ((int)y225[7] < 0)
@@ -106,9 +106,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat.SubFrom(Size, x, tt[8..]);
             }
 
-            Span<byte> r = MemoryMarshal.AsBytes(tt);
-            Reduce(r, r);
-            tt[..Size].CopyTo(z);
+            if (BitConverter.IsLittleEndian)
+            {
+                Span<byte> r = MemoryMarshal.AsBytes(tt);
+                Reduce704(r, r);
+                tt[..Size].CopyTo(z);
+            }
+            else
+            {
+                Span<byte> r = stackalloc byte[88];
+                Codec.Encode32(tt, r);
+
+                Reduce704(r, r);
+                Decode(r, z);
+            }
         }
 #else
         internal static void Multiply225Var(uint[] x, uint[] y225, uint[] z)
@@ -124,20 +135,430 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat.SubFrom(Size, x, 0, tt, 8);
             }
 
-            byte[] bytes = new byte[114];
+            byte[] bytes = new byte[88];
             Codec.Encode32(tt, 0, 22, bytes, 0);
 
-            byte[] r = Reduce(bytes);
+            byte[] r = Reduce704(bytes);
             Decode(r, z);
         }
 #endif
 
-        internal static byte[] Reduce(byte[] n)
+        internal static byte[] Reduce704(byte[] n)
+        {
+            byte[] r = new byte[ScalarBytes];
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Reduce704(n, r);
+#else
+            ulong x00 =  Codec.Decode32(n,   0);                // x00:32/--
+            ulong x01 = (Codec.Decode24(n,   4) << 4);          // x01:28/--
+            ulong x02 =  Codec.Decode32(n,   7);                // x02:32/--
+            ulong x03 = (Codec.Decode24(n,  11) << 4);          // x03:28/--
+            ulong x04 =  Codec.Decode32(n,  14);                // x04:32/--
+            ulong x05 = (Codec.Decode24(n,  18) << 4);          // x05:28/--
+            ulong x06 =  Codec.Decode32(n,  21);                // x06:32/--
+            ulong x07 = (Codec.Decode24(n,  25) << 4);          // x07:28/--
+            ulong x08 =  Codec.Decode32(n,  28);                // x08:32/--
+            ulong x09 = (Codec.Decode24(n,  32) << 4);          // x09:28/--
+            ulong x10 =  Codec.Decode32(n,  35);                // x10:32/--
+            ulong x11 = (Codec.Decode24(n,  39) << 4);          // x11:28/--
+            ulong x12 =  Codec.Decode32(n,  42);                // x12:32/--
+            ulong x13 = (Codec.Decode24(n,  46) << 4);          // x13:28/--
+            ulong x14 =  Codec.Decode32(n,  49);                // x14:32/--
+            ulong x15 = (Codec.Decode24(n,  53) << 4);          // x15:28/--
+            ulong x16 =  Codec.Decode32(n,  56);                // x16:32/--
+            ulong x17 = (Codec.Decode24(n,  60) << 4);          // x17:28/--
+            ulong x18 =  Codec.Decode32(n,  63);                // x18:32/--
+            ulong x19 = (Codec.Decode24(n,  67) << 4);          // x19:28/--
+            ulong x20 =  Codec.Decode32(n,  70);                // x20:32/--
+            ulong x21 = (Codec.Decode24(n,  74) << 4);          // x21:28/--
+            ulong x22 =  Codec.Decode32(n,  77);                // x22:32/--
+            ulong x23 = (Codec.Decode24(n,  81) << 4);          // x23:28/--
+            ulong x24 =  Codec.Decode32(n,  84);                // x24:32/--
+            ulong x25 = 0UL;
+
+            // TODO Fix bounds calculations which were copied from Reduce912
+
+            x25 += (x24 >> 28); x24 &= M28UL;           // x25:28/--, x24:28/--
+            x09 += x25 * L4_0;                          // x09:54/--
+            x10 += x25 * L4_1;                          // x10:54/53
+            x11 += x25 * L4_2;                          // x11:56/--
+            x12 += x25 * L4_3;                          // x12:57/--
+            x13 += x25 * L4_4;                          // x13:57/55
+            x14 += x25 * L4_5;                          // x14:58/--
+            x15 += x25 * L4_6;                          // x15:58/56
+            x16 += x25 * L4_7;                          // x16:59/--
+
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:58/--, x20:28/--
+            x22 += (x21 >> 28); x21 &= M28UL;           // x22:57/54, x21:28/--
+            x23 += (x22 >> 28); x22 &= M28UL;           // x23:45/42, x22:28/--
+            x24 += (x23 >> 28); x23 &= M28UL;           // x24:28/18, x23:28/--
+
+            x08 += x24 * L4_0;                          // x08:54/--
+            x09 += x24 * L4_1;                          // x09:55/--
+            x10 += x24 * L4_2;                          // x10:56/46
+            x11 += x24 * L4_3;                          // x11:57/46
+            x12 += x24 * L4_4;                          // x12:57/55
+            x13 += x24 * L4_5;                          // x13:58/--
+            x14 += x24 * L4_6;                          // x14:58/56
+            x15 += x24 * L4_7;                          // x15:59/--
+
+            x07 += x23 * L4_0;                          // x07:54/--
+            x08 += x23 * L4_1;                          // x08:54/53
+            x09 += x23 * L4_2;                          // x09:56/53
+            x10 += x23 * L4_3;                          // x10:57/46
+            x11 += x23 * L4_4;                          // x11:57/55
+            x12 += x23 * L4_5;                          // x12:58/--
+            x13 += x23 * L4_6;                          // x13:58/56
+            x14 += x23 * L4_7;                          // x14:59/--
+
+            x06 += x22 * L4_0;                          // x06:54/--
+            x07 += x22 * L4_1;                          // x07:54/53
+            x08 += x22 * L4_2;                          // x08:56/--
+            x09 += x22 * L4_3;                          // x09:57/53
+            x10 += x22 * L4_4;                          // x10:57/55
+            x11 += x22 * L4_5;                          // x11:58/--
+            x12 += x22 * L4_6;                          // x12:58/56
+            x13 += x22 * L4_7;                          // x13:59/--
+
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:59/31, x17:28/--
+            x19 += (x18 >> 28); x18 &= M28UL;           // x19:58/54, x18:28/--
+            x20 += (x19 >> 28); x19 &= M28UL;           // x20:30/29, x19:28/--
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:28/03, x20:28/--
+
+            x05 += x21 * L4_0;                          // x05:54/--
+            x06 += x21 * L4_1;                          // x06:55/--
+            x07 += x21 * L4_2;                          // x07:56/31
+            x08 += x21 * L4_3;                          // x08:57/31
+            x09 += x21 * L4_4;                          // x09:57/56
+            x10 += x21 * L4_5;                          // x10:58/--
+            x11 += x21 * L4_6;                          // x11:58/56
+            x12 += x21 * L4_7;                          // x12:59/--
+
+            x04 += x20 * L4_0;                          // x04:54/--
+            x05 += x20 * L4_1;                          // x05:54/53
+            x06 += x20 * L4_2;                          // x06:56/53
+            x07 += x20 * L4_3;                          // x07:57/31
+            x08 += x20 * L4_4;                          // x08:57/55
+            x09 += x20 * L4_5;                          // x09:58/--
+            x10 += x20 * L4_6;                          // x10:58/56
+            x11 += x20 * L4_7;                          // x11:59/--
+
+            x03 += x19 * L4_0;                          // x03:54/--
+            x04 += x19 * L4_1;                          // x04:54/53
+            x05 += x19 * L4_2;                          // x05:56/--
+            x06 += x19 * L4_3;                          // x06:57/53
+            x07 += x19 * L4_4;                          // x07:57/55
+            x08 += x19 * L4_5;                          // x08:58/--
+            x09 += x19 * L4_6;                          // x09:58/56
+            x10 += x19 * L4_7;                          // x10:59/--
+
+            x15 += (x14 >> 28); x14 &= M28UL;           // x15:59/31, x14:28/--
+            x16 += (x15 >> 28); x15 &= M28UL;           // x16:59/32, x15:28/--
+            x17 += (x16 >> 28); x16 &= M28UL;           // x17:31/29, x16:28/--
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:28/04, x17:28/--
+
+            x02 += x18 * L4_0;                          // x02:54/--
+            x03 += x18 * L4_1;                          // x03:55/--
+            x04 += x18 * L4_2;                          // x04:56/32
+            x05 += x18 * L4_3;                          // x05:57/32
+            x06 += x18 * L4_4;                          // x06:57/56
+            x07 += x18 * L4_5;                          // x07:58/--
+            x08 += x18 * L4_6;                          // x08:58/56
+            x09 += x18 * L4_7;                          // x09:59/--
+
+            x01 += x17 * L4_0;                          // x01:54/--
+            x02 += x17 * L4_1;                          // x02:54/53
+            x03 += x17 * L4_2;                          // x03:56/53
+            x04 += x17 * L4_3;                          // x04:57/32
+            x05 += x17 * L4_4;                          // x05:57/55
+            x06 += x17 * L4_5;                          // x06:58/--
+            x07 += x17 * L4_6;                          // x07:58/56
+            x08 += x17 * L4_7;                          // x08:59/--
+
+            x16 *= 4;
+            x16 += (x15 >> 26); x15 &= M26UL;
+            x16 += 1;                                   // x16:30/01
+
+            x00 += x16 * L_0;
+            x01 += x16 * L_1;
+            x02 += x16 * L_2;
+            x03 += x16 * L_3;
+            x04 += x16 * L_4;
+            x05 += x16 * L_5;
+            x06 += x16 * L_6;
+            x07 += x16 * L_7;
+
+            x01 += (x00 >> 28); x00 &= M28UL;
+            x02 += (x01 >> 28); x01 &= M28UL;
+            x03 += (x02 >> 28); x02 &= M28UL;
+            x04 += (x03 >> 28); x03 &= M28UL;
+            x05 += (x04 >> 28); x04 &= M28UL;
+            x06 += (x05 >> 28); x05 &= M28UL;
+            x07 += (x06 >> 28); x06 &= M28UL;
+            x08 += (x07 >> 28); x07 &= M28UL;
+            x09 += (x08 >> 28); x08 &= M28UL;
+            x10 += (x09 >> 28); x09 &= M28UL;
+            x11 += (x10 >> 28); x10 &= M28UL;
+            x12 += (x11 >> 28); x11 &= M28UL;
+            x13 += (x12 >> 28); x12 &= M28UL;
+            x14 += (x13 >> 28); x13 &= M28UL;
+            x15 += (x14 >> 28); x14 &= M28UL;
+            x16  = (x15 >> 26); x15 &= M26UL;
+
+            x16 -= 1;
+
+            Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue);
+
+            x00 -= x16 & L_0;
+            x01 -= x16 & L_1;
+            x02 -= x16 & L_2;
+            x03 -= x16 & L_3;
+            x04 -= x16 & L_4;
+            x05 -= x16 & L_5;
+            x06 -= x16 & L_6;
+            x07 -= x16 & L_7;
+
+            x01 += (ulong)((long)x00 >> 28); x00 &= M28UL;
+            x02 += (ulong)((long)x01 >> 28); x01 &= M28UL;
+            x03 += (ulong)((long)x02 >> 28); x02 &= M28UL;
+            x04 += (ulong)((long)x03 >> 28); x03 &= M28UL;
+            x05 += (ulong)((long)x04 >> 28); x04 &= M28UL;
+            x06 += (ulong)((long)x05 >> 28); x05 &= M28UL;
+            x07 += (ulong)((long)x06 >> 28); x06 &= M28UL;
+            x08 += (ulong)((long)x07 >> 28); x07 &= M28UL;
+            x09 += (ulong)((long)x08 >> 28); x08 &= M28UL;
+            x10 += (ulong)((long)x09 >> 28); x09 &= M28UL;
+            x11 += (ulong)((long)x10 >> 28); x10 &= M28UL;
+            x12 += (ulong)((long)x11 >> 28); x11 &= M28UL;
+            x13 += (ulong)((long)x12 >> 28); x12 &= M28UL;
+            x14 += (ulong)((long)x13 >> 28); x13 &= M28UL;
+            x15 += (ulong)((long)x14 >> 28); x14 &= M28UL;
+
+            Debug.Assert(x15 >> 26 == 0UL);
+
+            Codec.Encode56(x00 | (x01 << 28), r,  0);
+            Codec.Encode56(x02 | (x03 << 28), r,  7);
+            Codec.Encode56(x04 | (x05 << 28), r, 14);
+            Codec.Encode56(x06 | (x07 << 28), r, 21);
+            Codec.Encode56(x08 | (x09 << 28), r, 28);
+            Codec.Encode56(x10 | (x11 << 28), r, 35);
+            Codec.Encode56(x12 | (x13 << 28), r, 42);
+            Codec.Encode56(x14 | (x15 << 28), r, 49);
+            //r[ScalarBytes - 1] = 0;
+#endif
+
+            return r;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        internal static void Reduce704(ReadOnlySpan<byte> n, Span<byte> r)
+        {
+            ulong x00 =  Codec.Decode32(n[  0..]);              // x00:32/--
+            ulong x01 = (Codec.Decode24(n[  4..]) << 4);        // x01:28/--
+            ulong x02 =  Codec.Decode32(n[  7..]);              // x02:32/--
+            ulong x03 = (Codec.Decode24(n[ 11..]) << 4);        // x03:28/--
+            ulong x04 =  Codec.Decode32(n[ 14..]);              // x04:32/--
+            ulong x05 = (Codec.Decode24(n[ 18..]) << 4);        // x05:28/--
+            ulong x06 =  Codec.Decode32(n[ 21..]);              // x06:32/--
+            ulong x07 = (Codec.Decode24(n[ 25..]) << 4);        // x07:28/--
+            ulong x08 =  Codec.Decode32(n[ 28..]);              // x08:32/--
+            ulong x09 = (Codec.Decode24(n[ 32..]) << 4);        // x09:28/--
+            ulong x10 =  Codec.Decode32(n[ 35..]);              // x10:32/--
+            ulong x11 = (Codec.Decode24(n[ 39..]) << 4);        // x11:28/--
+            ulong x12 =  Codec.Decode32(n[ 42..]);              // x12:32/--
+            ulong x13 = (Codec.Decode24(n[ 46..]) << 4);        // x13:28/--
+            ulong x14 =  Codec.Decode32(n[ 49..]);              // x14:32/--
+            ulong x15 = (Codec.Decode24(n[ 53..]) << 4);        // x15:28/--
+            ulong x16 =  Codec.Decode32(n[ 56..]);              // x16:32/--
+            ulong x17 = (Codec.Decode24(n[ 60..]) << 4);        // x17:28/--
+            ulong x18 =  Codec.Decode32(n[ 63..]);              // x18:32/--
+            ulong x19 = (Codec.Decode24(n[ 67..]) << 4);        // x19:28/--
+            ulong x20 =  Codec.Decode32(n[ 70..]);              // x20:32/--
+            ulong x21 = (Codec.Decode24(n[ 74..]) << 4);        // x21:28/--
+            ulong x22 =  Codec.Decode32(n[ 77..]);              // x22:32/--
+            ulong x23 = (Codec.Decode24(n[ 81..]) << 4);        // x23:28/--
+            ulong x24 =  Codec.Decode32(n[ 84..]);              // x24:32/--
+            ulong x25 = 0UL;
+
+            // TODO Fix bounds calculations which were copied from Reduce912
+
+            x25 += (x24 >> 28); x24 &= M28UL;           // x25:28/--, x24:28/--
+            x09 += x25 * L4_0;                          // x09:54/--
+            x10 += x25 * L4_1;                          // x10:54/53
+            x11 += x25 * L4_2;                          // x11:56/--
+            x12 += x25 * L4_3;                          // x12:57/--
+            x13 += x25 * L4_4;                          // x13:57/55
+            x14 += x25 * L4_5;                          // x14:58/--
+            x15 += x25 * L4_6;                          // x15:58/56
+            x16 += x25 * L4_7;                          // x16:59/--
+
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:58/--, x20:28/--
+            x22 += (x21 >> 28); x21 &= M28UL;           // x22:57/54, x21:28/--
+            x23 += (x22 >> 28); x22 &= M28UL;           // x23:45/42, x22:28/--
+            x24 += (x23 >> 28); x23 &= M28UL;           // x24:28/18, x23:28/--
+
+            x08 += x24 * L4_0;                          // x08:54/--
+            x09 += x24 * L4_1;                          // x09:55/--
+            x10 += x24 * L4_2;                          // x10:56/46
+            x11 += x24 * L4_3;                          // x11:57/46
+            x12 += x24 * L4_4;                          // x12:57/55
+            x13 += x24 * L4_5;                          // x13:58/--
+            x14 += x24 * L4_6;                          // x14:58/56
+            x15 += x24 * L4_7;                          // x15:59/--
+
+            x07 += x23 * L4_0;                          // x07:54/--
+            x08 += x23 * L4_1;                          // x08:54/53
+            x09 += x23 * L4_2;                          // x09:56/53
+            x10 += x23 * L4_3;                          // x10:57/46
+            x11 += x23 * L4_4;                          // x11:57/55
+            x12 += x23 * L4_5;                          // x12:58/--
+            x13 += x23 * L4_6;                          // x13:58/56
+            x14 += x23 * L4_7;                          // x14:59/--
+
+            x06 += x22 * L4_0;                          // x06:54/--
+            x07 += x22 * L4_1;                          // x07:54/53
+            x08 += x22 * L4_2;                          // x08:56/--
+            x09 += x22 * L4_3;                          // x09:57/53
+            x10 += x22 * L4_4;                          // x10:57/55
+            x11 += x22 * L4_5;                          // x11:58/--
+            x12 += x22 * L4_6;                          // x12:58/56
+            x13 += x22 * L4_7;                          // x13:59/--
+
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:59/31, x17:28/--
+            x19 += (x18 >> 28); x18 &= M28UL;           // x19:58/54, x18:28/--
+            x20 += (x19 >> 28); x19 &= M28UL;           // x20:30/29, x19:28/--
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:28/03, x20:28/--
+
+            x05 += x21 * L4_0;                          // x05:54/--
+            x06 += x21 * L4_1;                          // x06:55/--
+            x07 += x21 * L4_2;                          // x07:56/31
+            x08 += x21 * L4_3;                          // x08:57/31
+            x09 += x21 * L4_4;                          // x09:57/56
+            x10 += x21 * L4_5;                          // x10:58/--
+            x11 += x21 * L4_6;                          // x11:58/56
+            x12 += x21 * L4_7;                          // x12:59/--
+
+            x04 += x20 * L4_0;                          // x04:54/--
+            x05 += x20 * L4_1;                          // x05:54/53
+            x06 += x20 * L4_2;                          // x06:56/53
+            x07 += x20 * L4_3;                          // x07:57/31
+            x08 += x20 * L4_4;                          // x08:57/55
+            x09 += x20 * L4_5;                          // x09:58/--
+            x10 += x20 * L4_6;                          // x10:58/56
+            x11 += x20 * L4_7;                          // x11:59/--
+
+            x03 += x19 * L4_0;                          // x03:54/--
+            x04 += x19 * L4_1;                          // x04:54/53
+            x05 += x19 * L4_2;                          // x05:56/--
+            x06 += x19 * L4_3;                          // x06:57/53
+            x07 += x19 * L4_4;                          // x07:57/55
+            x08 += x19 * L4_5;                          // x08:58/--
+            x09 += x19 * L4_6;                          // x09:58/56
+            x10 += x19 * L4_7;                          // x10:59/--
+
+            x15 += (x14 >> 28); x14 &= M28UL;           // x15:59/31, x14:28/--
+            x16 += (x15 >> 28); x15 &= M28UL;           // x16:59/32, x15:28/--
+            x17 += (x16 >> 28); x16 &= M28UL;           // x17:31/29, x16:28/--
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:28/04, x17:28/--
+
+            x02 += x18 * L4_0;                          // x02:54/--
+            x03 += x18 * L4_1;                          // x03:55/--
+            x04 += x18 * L4_2;                          // x04:56/32
+            x05 += x18 * L4_3;                          // x05:57/32
+            x06 += x18 * L4_4;                          // x06:57/56
+            x07 += x18 * L4_5;                          // x07:58/--
+            x08 += x18 * L4_6;                          // x08:58/56
+            x09 += x18 * L4_7;                          // x09:59/--
+
+            x01 += x17 * L4_0;                          // x01:54/--
+            x02 += x17 * L4_1;                          // x02:54/53
+            x03 += x17 * L4_2;                          // x03:56/53
+            x04 += x17 * L4_3;                          // x04:57/32
+            x05 += x17 * L4_4;                          // x05:57/55
+            x06 += x17 * L4_5;                          // x06:58/--
+            x07 += x17 * L4_6;                          // x07:58/56
+            x08 += x17 * L4_7;                          // x08:59/--
+
+            x16 *= 4;
+            x16 += (x15 >> 26); x15 &= M26UL;
+            x16 += 1;                                   // x16:30/01
+
+            x00 += x16 * L_0;
+            x01 += x16 * L_1;
+            x02 += x16 * L_2;
+            x03 += x16 * L_3;
+            x04 += x16 * L_4;
+            x05 += x16 * L_5;
+            x06 += x16 * L_6;
+            x07 += x16 * L_7;
+
+            x01 += (x00 >> 28); x00 &= M28UL;
+            x02 += (x01 >> 28); x01 &= M28UL;
+            x03 += (x02 >> 28); x02 &= M28UL;
+            x04 += (x03 >> 28); x03 &= M28UL;
+            x05 += (x04 >> 28); x04 &= M28UL;
+            x06 += (x05 >> 28); x05 &= M28UL;
+            x07 += (x06 >> 28); x06 &= M28UL;
+            x08 += (x07 >> 28); x07 &= M28UL;
+            x09 += (x08 >> 28); x08 &= M28UL;
+            x10 += (x09 >> 28); x09 &= M28UL;
+            x11 += (x10 >> 28); x10 &= M28UL;
+            x12 += (x11 >> 28); x11 &= M28UL;
+            x13 += (x12 >> 28); x12 &= M28UL;
+            x14 += (x13 >> 28); x13 &= M28UL;
+            x15 += (x14 >> 28); x14 &= M28UL;
+            x16  = (x15 >> 26); x15 &= M26UL;
+
+            x16 -= 1;
+
+            Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue);
+
+            x00 -= x16 & L_0;
+            x01 -= x16 & L_1;
+            x02 -= x16 & L_2;
+            x03 -= x16 & L_3;
+            x04 -= x16 & L_4;
+            x05 -= x16 & L_5;
+            x06 -= x16 & L_6;
+            x07 -= x16 & L_7;
+
+            x01 += (ulong)((long)x00 >> 28); x00 &= M28UL;
+            x02 += (ulong)((long)x01 >> 28); x01 &= M28UL;
+            x03 += (ulong)((long)x02 >> 28); x02 &= M28UL;
+            x04 += (ulong)((long)x03 >> 28); x03 &= M28UL;
+            x05 += (ulong)((long)x04 >> 28); x04 &= M28UL;
+            x06 += (ulong)((long)x05 >> 28); x05 &= M28UL;
+            x07 += (ulong)((long)x06 >> 28); x06 &= M28UL;
+            x08 += (ulong)((long)x07 >> 28); x07 &= M28UL;
+            x09 += (ulong)((long)x08 >> 28); x08 &= M28UL;
+            x10 += (ulong)((long)x09 >> 28); x09 &= M28UL;
+            x11 += (ulong)((long)x10 >> 28); x10 &= M28UL;
+            x12 += (ulong)((long)x11 >> 28); x11 &= M28UL;
+            x13 += (ulong)((long)x12 >> 28); x12 &= M28UL;
+            x14 += (ulong)((long)x13 >> 28); x13 &= M28UL;
+            x15 += (ulong)((long)x14 >> 28); x14 &= M28UL;
+
+            Debug.Assert(x15 >> 26 == 0UL);
+
+            Codec.Encode56(x00 | (x01 << 28), r);
+            Codec.Encode56(x02 | (x03 << 28), r[7..]);
+            Codec.Encode56(x04 | (x05 << 28), r[14..]);
+            Codec.Encode56(x06 | (x07 << 28), r[21..]);
+            Codec.Encode56(x08 | (x09 << 28), r[28..]);
+            Codec.Encode56(x10 | (x11 << 28), r[35..]);
+            Codec.Encode56(x12 | (x13 << 28), r[42..]);
+            Codec.Encode56(x14 | (x15 << 28), r[49..]);
+            r[ScalarBytes - 1] = 0;
+        }
+#endif
+
+        internal static byte[] Reduce912(byte[] n)
         {
             byte[] r = new byte[ScalarBytes];
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            Reduce(n, r);
+            Reduce912(n, r);
 #else
             ulong x00 =  Codec.Decode32(n,   0);                // x00:32/--
             ulong x01 = (Codec.Decode24(n,   4) << 4);          // x01:28/--
@@ -416,7 +837,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r)
+        internal static void Reduce912(ReadOnlySpan<byte> n, Span<byte> r)
         {
             ulong x00 =  Codec.Decode32(n[  0..]);              // x00:32/--
             ulong x01 = (Codec.Decode24(n[  4..]) << 4);        // x01:28/--