summary refs log tree commit diff
path: root/crypto/src/math/ec/rfc8032
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/src/math/ec/rfc8032')
-rw-r--r--crypto/src/math/ec/rfc8032/Ed25519.cs49
-rw-r--r--crypto/src/math/ec/rfc8032/Ed448.cs49
-rw-r--r--crypto/src/math/ec/rfc8032/Scalar25519.cs319
-rw-r--r--crypto/src/math/ec/rfc8032/Scalar448.cs439
-rw-r--r--crypto/src/math/ec/rfc8032/Wnaf.cs2
5 files changed, 755 insertions, 103 deletions
diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs
index fd2d5fe93..7318a8a7e 100644
--- a/crypto/src/math/ec/rfc8032/Ed25519.cs
+++ b/crypto/src/math/ec/rfc8032/Ed25519.cs
@@ -55,9 +55,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         public static readonly int SignatureSize = PointBytes + ScalarBytes;
 
         // "SigEd25519 no Ed25519 collisions"
-        private static readonly byte[] Dom2Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31,
-            0x39, 0x20, 0x6e, 0x6f, 0x20, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, 0x63, 0x6f, 0x6c, 0x6c, 0x69,
-            0x73, 0x69, 0x6f, 0x6e, 0x73 };
+        private static readonly byte[] Dom2Prefix = { 0x53, 0x69, 0x67, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20,
+            0x6e, 0x6f, 0x20, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, 0x63, 0x6f, 0x6c, 0x6c, 0x69, 0x73, 0x69,
+            0x6f, 0x6e, 0x73 };
 
         private static readonly uint[] P = { 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
             0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU };
@@ -149,7 +149,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
             byte[] result = new byte[ScalarBytes * 2];
             Codec.Encode32(t, 0, t.Length, result, 0);
-            return Scalar25519.Reduce(result);
+            return Scalar25519.Reduce512(result);
         }
 
         private static bool CheckContextVar(byte[] ctx, byte phflag)
@@ -167,13 +167,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Sqr(p.x, u);
             F.Sqr(p.y, v);
             F.Mul(u, v, t);
-            F.Sub(v, u, v);
+            F.Sub(u, v, u);
             F.Mul(t, C_d, t);
             F.AddOne(t);
-            F.Sub(t, v, t);
+            F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v);
         }
 
         private static int CheckPoint(PointAccum p)
@@ -187,15 +188,17 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Sqr(p.y, v);
             F.Sqr(p.z, w);
             F.Mul(u, v, t);
-            F.Sub(v, u, v);
-            F.Mul(v, w, v);
+            F.Sub(u, v, u);
+            F.Mul(u, w, u);
             F.Sqr(w, w);
             F.Mul(t, C_d, t);
             F.Add(t, w, t);
-            F.Sub(t, v, t);
+            F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
+            F.Normalize(w);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v) & ~F.IsZero(w);
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -592,7 +595,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] r = Scalar25519.Reduce(h);
+            byte[] r = Scalar25519.Reduce512(h);
             byte[] R = new byte[PointBytes];
             ScalarMultBaseEncoded(r, R, 0);
 
@@ -605,7 +608,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
             byte[] S = CalculateS(r, k, s);
 
             Array.Copy(R, 0, sig, sigOff, PointBytes);
@@ -697,7 +700,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.DoFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar25519.Reduce(h, k);
+            Scalar25519.Reduce512(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -739,7 +742,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -799,7 +802,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.DoFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar25519.Reduce(h, k);
+            Scalar25519.Reduce512(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -840,7 +843,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.DoFinal(h, 0);
 
-            byte[] k = Scalar25519.Reduce(h);
+            byte[] k = Scalar25519.Reduce512(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar25519.Decode(k, nA);
@@ -950,7 +953,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Normalize(p.y);
             F.Normalize(p.z);
 
-            return F.IsZeroVar(p.x) && F.AreEqualVar(p.y, p.z);
+            return F.IsZeroVar(p.x) && !F.IsZeroVar(p.y) && F.AreEqualVar(p.y, p.z);
         }
 
         private static void PointAdd(ref PointExtended p, ref PointExtended q, ref PointExtended r, ref PointTemp t)
@@ -1496,7 +1499,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #endif
 
             Scalar25519.Decode(k, n);
-            Scalar25519.ToSignedDigits(256, n, n);
+            Scalar25519.ToSignedDigits(256, n);
 
             Init(out PointPrecompZ q);
             Init(out PointTemp t);
@@ -1541,7 +1544,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #endif
 
             Scalar25519.Decode(k, n);
-            Scalar25519.ToSignedDigits(PrecompRange, n, n);
+            Scalar25519.ToSignedDigits(PrecompRange, n);
             GroupCombBits(n);
 
             Init(out PointPrecomp p);
@@ -1714,6 +1717,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             int bit = 128;
             while (--bit >= 0)
             {
+                if (((int)ws_b[bit] | (int)ws_b[128 + bit] | (int)ws_p[bit] | (int)ws_q[bit]) != 0)
+                    break;
+            }
+
+            for (; bit >= 0; --bit)
+            {
                 int wb = ws_b[bit];
                 if (wb != 0)
                 {
diff --git a/crypto/src/math/ec/rfc8032/Ed448.cs b/crypto/src/math/ec/rfc8032/Ed448.cs
index 08b64ddf2..aff9b5460 100644
--- a/crypto/src/math/ec/rfc8032/Ed448.cs
+++ b/crypto/src/math/ec/rfc8032/Ed448.cs
@@ -52,7 +52,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         public static readonly int SignatureSize = PointBytes + ScalarBytes;
 
         // "SigEd448"
-        private static readonly byte[] Dom4Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 };
+        private static readonly byte[] Dom4Prefix = { 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 };
 
         private static readonly uint[] P = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
             0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU,
@@ -73,7 +73,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             0x03AC222BU, 0x0304DB8EU, 0x083EE319U, 0x05E5DB0BU, 0x0ECA503BU, 0x0B1C6539U, 0x078A8DCEU, 0x02D256BCU,
             0x04A8B05EU, 0x0BD9FD57U, 0x0A1C3CB8U };
 
-        private const int C_d = -39081;
+        private const uint C_d = 39081U;
 
         //private const int WnafWidth = 6;
         private const int WnafWidth225 = 5;
@@ -118,7 +118,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
             byte[] result = new byte[ScalarBytes * 2];
             Codec.Encode32(t, 0, t.Length, result, 0);
-            return Scalar448.Reduce(result);
+            return Scalar448.Reduce912(result);
         }
 
         private static bool CheckContextVar(byte[] ctx)
@@ -136,12 +136,13 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Sqr(p.y, v);
             F.Mul(u, v, t);
             F.Add(u, v, u);
-            F.Mul(t, -C_d, t);
+            F.Mul(t, C_d, t);
             F.SubOne(t);
             F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v);
         }
 
         private static int CheckPoint(PointProjective p)
@@ -158,12 +159,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Add(u, v, u);
             F.Mul(u, w, u);
             F.Sqr(w, w);
-            F.Mul(t, -C_d, t);
+            F.Mul(t, C_d, t);
             F.Sub(t, w, t);
             F.Add(t, u, t);
             F.Normalize(t);
+            F.Normalize(v);
+            F.Normalize(w);
 
-            return F.IsZero(t);
+            return F.IsZero(t) & ~F.IsZero(v) & ~F.IsZero(w);
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
@@ -310,7 +313,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             uint[] v = F.Create();
 
             F.Sqr(r.y, u);
-            F.Mul(u, (uint)-C_d, v);
+            F.Mul(u, C_d, v);
             F.Negate(u, u);
             F.AddOne(u);
             F.AddOne(v);
@@ -545,7 +548,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] r = Scalar448.Reduce(h);
+            byte[] r = Scalar448.Reduce912(h);
             byte[] R = new byte[PointBytes];
             ScalarMultBaseEncoded(r, R, 0);
 
@@ -555,7 +558,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
             byte[] S = CalculateS(r, k, s);
 
             Array.Copy(R, 0, sig, sigOff, PointBytes);
@@ -644,7 +647,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.OutputFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar448.Reduce(h, k);
+            Scalar448.Reduce912(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -683,7 +686,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -740,7 +743,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.OutputFinal(h);
 
             Span<byte> k = stackalloc byte[ScalarBytes];
-            Scalar448.Reduce(h, k);
+            Scalar448.Reduce912(h, k);
 
             Span<uint> nA = stackalloc uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -778,7 +781,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             d.BlockUpdate(m, mOff, mLen);
             d.OutputFinal(h, 0, h.Length);
 
-            byte[] k = Scalar448.Reduce(h);
+            byte[] k = Scalar448.Reduce912(h);
 
             uint[] nA = new uint[ScalarUints];
             Scalar448.Decode(k, nA);
@@ -868,7 +871,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Normalize(p.y);
             F.Normalize(p.z);
 
-            return F.IsZeroVar(p.x) && F.AreEqualVar(p.y, p.z);
+            return F.IsZeroVar(p.x) && !F.IsZeroVar(p.y) && F.AreEqualVar(p.y, p.z);
         }
 
         private static void PointAdd(ref PointAffine p, ref PointProjective r, ref PointTemp t)
@@ -885,7 +888,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, f, g);
             F.Add(b, e, f);
             F.Sub(b, e, g);
@@ -920,7 +923,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, f, g);
             F.Add(b, e, f);
             F.Sub(b, e, g);
@@ -965,7 +968,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, nf, ng);
             F.Add(b, e, nf);
             F.Sub(b, e, ng);
@@ -1011,7 +1014,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.Mul(p.x, r.x, c);
             F.Mul(p.y, r.y, d);
             F.Mul(c, d, e);
-            F.Mul(e, -C_d, e);
+            F.Mul(e, C_d, e);
             //F.Apm(b, e, nf, ng);
             F.Add(b, e, nf);
             F.Sub(b, e, ng);
@@ -1149,7 +1152,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             PointCopy(ref p, ref q);
 
             Init(out PointProjective d);
-            PointCopy(ref q, ref d);
+            PointCopy(ref p, ref d);
             PointDouble(ref d, ref t);
 
             uint[] table = F.CreateTable(count * 3);
@@ -1581,6 +1584,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             int bit = 225;
             while (--bit >= 0)
             {
+                if (((int)ws_b[bit] | (int)ws_b[225 + bit] | (int)ws_p[bit] | (int)ws_q[bit]) != 0)
+                    break;
+            }
+
+            for (; bit >= 0; --bit)
+            {
                 int wb = ws_b[bit];
                 if (wb != 0)
                 {
diff --git a/crypto/src/math/ec/rfc8032/Scalar25519.cs b/crypto/src/math/ec/rfc8032/Scalar25519.cs
index 00dcd49a1..67eee6155 100644
--- a/crypto/src/math/ec/rfc8032/Scalar25519.cs
+++ b/crypto/src/math/ec/rfc8032/Scalar25519.cs
@@ -14,9 +14,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
     {
         internal const int Size = 8;
 
-        private const long M08L = 0x000000FFL;
+        private const int ScalarBytes = Size * 4;
+
         private const long M28L = 0x0FFFFFFFL;
-        private const long M32L = 0xFFFFFFFFL;
 
         private const int TargetLength = 254;
 
@@ -70,7 +70,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
         internal static void Multiply128Var(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y128, Span<uint> z)
         {
-            Span<uint> tt = stackalloc uint[16];
+            Span<uint> tt = stackalloc uint[12];
             Nat256.Mul128(x, y128, tt);
 
             if ((int)y128[3] < 0)
@@ -79,9 +79,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat256.SubFrom(x, tt[4..], 0);
             }
 
-            Span<byte> r = MemoryMarshal.AsBytes(tt);
-            Reduce(r, r);
-            tt[..Size].CopyTo(z);
+            if (BitConverter.IsLittleEndian)
+            {
+                Span<byte> r = MemoryMarshal.AsBytes(tt);
+                Reduce384(r, r);
+                tt[..Size].CopyTo(z);
+            }
+            else
+            {
+                Span<byte> r = stackalloc byte[48];
+                Codec.Encode32(tt, r);
+
+                Reduce384(r, r);
+                Decode(r, z);
+            }
         }
 #else
         internal static void Multiply128Var(uint[] x, uint[] y128, uint[] z)
@@ -95,40 +106,242 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat256.SubFrom(x, 0, tt, 4, 0);
             }
 
-            byte[] bytes = new byte[64];
+            byte[] bytes = new byte[48];
             Codec.Encode32(tt, 0, 12, bytes, 0);
 
-            byte[] r = Reduce(bytes);
+            byte[] r = Reduce384(bytes);
             Decode(r, z);
         }
 #endif
 
-        internal static byte[] Reduce(byte[] n)
+        internal static byte[] Reduce384(byte[] n)
+        {
+            byte[] r = new byte[ScalarBytes];
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Reduce384(n, r);
+#else
+            long x00 =  Codec.Decode32(n,  0);          // x00:32/--
+            long x01 = (Codec.Decode24(n,  4) << 4);    // x01:28/--
+            long x02 =  Codec.Decode32(n,  7);          // x02:32/--
+            long x03 = (Codec.Decode24(n, 11) << 4);    // x03:28/--
+            long x04 =  Codec.Decode32(n, 14);          // x04:32/--
+            long x05 = (Codec.Decode24(n, 18) << 4);    // x05:28/--
+            long x06 =  Codec.Decode32(n, 21);          // x06:32/--
+            long x07 = (Codec.Decode24(n, 25) << 4);    // x07:28/--
+            long x08 =  Codec.Decode32(n, 28);          // x08:32/--
+            long x09 = (Codec.Decode24(n, 32) << 4);    // x09:28/--
+            long x10 =  Codec.Decode32(n, 35);          // x10:32/--
+            long x11 = (Codec.Decode24(n, 39) << 4);    // x11:28/--
+            long x12 =  Codec.Decode32(n, 42);          // x12:32/--
+            long x13 = (Codec.Decode16(n, 46) << 4);    // x13:20/--
+            long t;
+
+            // TODO Fix bounds calculations which were copied from Reduce512
+
+            x13 += (x12 >> 28); x12 &= M28L;            // x13:28/22, x12:28/--
+            x04 -= x13 * L0;                            // x04:54/49
+            x05 -= x13 * L1;                            // x05:54/53
+            x06 -= x13 * L2;                            // x06:56/--
+            x07 -= x13 * L3;                            // x07:56/52
+            x08 -= x13 * L4;                            // x08:56/52
+
+            x12 += (x11 >> 28); x11 &= M28L;            // x12:28/24, x11:28/--
+            x03 -= x12 * L0;                            // x03:54/49
+            x04 -= x12 * L1;                            // x04:54/51
+            x05 -= x12 * L2;                            // x05:56/--
+            x06 -= x12 * L3;                            // x06:56/52
+            x07 -= x12 * L4;                            // x07:56/53
+
+            x11 += (x10 >> 28); x10 &= M28L;            // x11:29/--, x10:28/--
+            x02 -= x11 * L0;                            // x02:55/32
+            x03 -= x11 * L1;                            // x03:55/--
+            x04 -= x11 * L2;                            // x04:56/55
+            x05 -= x11 * L3;                            // x05:56/52
+            x06 -= x11 * L4;                            // x06:56/53
+
+            x10 += (x09 >> 28); x09 &= M28L;            // x10:29/--, x09:28/--
+            x01 -= x10 * L0;                            // x01:55/28
+            x02 -= x10 * L1;                            // x02:55/54
+            x03 -= x10 * L2;                            // x03:56/55
+            x04 -= x10 * L3;                            // x04:57/--
+            x05 -= x10 * L4;                            // x05:56/53
+
+            x08 += (x07 >> 28); x07 &= M28L;            // x08:56/53, x07:28/--
+            x09 += (x08 >> 28); x08 &= M28L;            // x09:29/25, x08:28/--
+
+            t    = (x08 >> 27) & 1L;
+            x09 += t;                                   // x09:29/26
+
+            x00 -= x09 * L0;                            // x00:55/53
+            x01 -= x09 * L1;                            // x01:55/54
+            x02 -= x09 * L2;                            // x02:57/--
+            x03 -= x09 * L3;                            // x03:57/--
+            x04 -= x09 * L4;                            // x04:57/42
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+            x09  = (x08 >> 28); x08 &= M28L;
+
+            x09 -= t;
+
+            Debug.Assert(x09 == 0L || x09 == -1L);
+
+            x00 += x09 & L0;
+            x01 += x09 & L1;
+            x02 += x09 & L2;
+            x03 += x09 & L3;
+            x04 += x09 & L4;
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+
+            Codec.Encode56((ulong)(x00 | (x01 << 28)), r, 0);
+            Codec.Encode56((ulong)(x02 | (x03 << 28)), r, 7);
+            Codec.Encode56((ulong)(x04 | (x05 << 28)), r, 14);
+            Codec.Encode56((ulong)(x06 | (x07 << 28)), r, 21);
+            Codec.Encode32((uint)x08, r, 28);
+#endif
+
+            return r;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        internal static void Reduce384(ReadOnlySpan<byte> n, Span<byte> r)
+        {
+            long x00 =  Codec.Decode32(n[ 0..]);        // x00:32/--
+            long x01 = (Codec.Decode24(n[ 4..]) << 4);  // x01:28/--
+            long x02 =  Codec.Decode32(n[ 7..]);        // x02:32/--
+            long x03 = (Codec.Decode24(n[11..]) << 4);  // x03:28/--
+            long x04 =  Codec.Decode32(n[14..]);        // x04:32/--
+            long x05 = (Codec.Decode24(n[18..]) << 4);  // x05:28/--
+            long x06 =  Codec.Decode32(n[21..]);        // x06:32/--
+            long x07 = (Codec.Decode24(n[25..]) << 4);  // x07:28/--
+            long x08 =  Codec.Decode32(n[28..]);        // x08:32/--
+            long x09 = (Codec.Decode24(n[32..]) << 4);  // x09:28/--
+            long x10 =  Codec.Decode32(n[35..]);        // x10:32/--
+            long x11 = (Codec.Decode24(n[39..]) << 4);  // x11:28/--
+            long x12 =  Codec.Decode32(n[42..]);        // x12:32/--
+            long x13 = (Codec.Decode16(n[46..]) << 4);  // x13:20/--
+            long t;
+
+            // TODO Fix bounds calculations which were copied from Reduce512
+
+            x13 += (x12 >> 28); x12 &= M28L;            // x13:28/22, x12:28/--
+            x04 -= x13 * L0;                            // x04:54/49
+            x05 -= x13 * L1;                            // x05:54/53
+            x06 -= x13 * L2;                            // x06:56/--
+            x07 -= x13 * L3;                            // x07:56/52
+            x08 -= x13 * L4;                            // x08:56/52
+
+            x12 += (x11 >> 28); x11 &= M28L;            // x12:28/24, x11:28/--
+            x03 -= x12 * L0;                            // x03:54/49
+            x04 -= x12 * L1;                            // x04:54/51
+            x05 -= x12 * L2;                            // x05:56/--
+            x06 -= x12 * L3;                            // x06:56/52
+            x07 -= x12 * L4;                            // x07:56/53
+
+            x11 += (x10 >> 28); x10 &= M28L;            // x11:29/--, x10:28/--
+            x02 -= x11 * L0;                            // x02:55/32
+            x03 -= x11 * L1;                            // x03:55/--
+            x04 -= x11 * L2;                            // x04:56/55
+            x05 -= x11 * L3;                            // x05:56/52
+            x06 -= x11 * L4;                            // x06:56/53
+
+            x10 += (x09 >> 28); x09 &= M28L;            // x10:29/--, x09:28/--
+            x01 -= x10 * L0;                            // x01:55/28
+            x02 -= x10 * L1;                            // x02:55/54
+            x03 -= x10 * L2;                            // x03:56/55
+            x04 -= x10 * L3;                            // x04:57/--
+            x05 -= x10 * L4;                            // x05:56/53
+
+            x08 += (x07 >> 28); x07 &= M28L;            // x08:56/53, x07:28/--
+            x09 += (x08 >> 28); x08 &= M28L;            // x09:29/25, x08:28/--
+
+            t    = (x08 >> 27) & 1L;
+            x09 += t;                                   // x09:29/26
+
+            x00 -= x09 * L0;                            // x00:55/53
+            x01 -= x09 * L1;                            // x01:55/54
+            x02 -= x09 * L2;                            // x02:57/--
+            x03 -= x09 * L3;                            // x03:57/--
+            x04 -= x09 * L4;                            // x04:57/42
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+            x09  = (x08 >> 28); x08 &= M28L;
+
+            x09 -= t;
+
+            Debug.Assert(x09 == 0L || x09 == -1L);
+
+            x00 += x09 & L0;
+            x01 += x09 & L1;
+            x02 += x09 & L2;
+            x03 += x09 & L3;
+            x04 += x09 & L4;
+
+            x01 += (x00 >> 28); x00 &= M28L;
+            x02 += (x01 >> 28); x01 &= M28L;
+            x03 += (x02 >> 28); x02 &= M28L;
+            x04 += (x03 >> 28); x03 &= M28L;
+            x05 += (x04 >> 28); x04 &= M28L;
+            x06 += (x05 >> 28); x05 &= M28L;
+            x07 += (x06 >> 28); x06 &= M28L;
+            x08 += (x07 >> 28); x07 &= M28L;
+
+            Codec.Encode56((ulong)(x00 | (x01 << 28)), r);
+            Codec.Encode56((ulong)(x02 | (x03 << 28)), r[7..]);
+            Codec.Encode56((ulong)(x04 | (x05 << 28)), r[14..]);
+            Codec.Encode56((ulong)(x06 | (x07 << 28)), r[21..]);
+            Codec.Encode32((uint)x08, r[28..]);
+        }
+#endif
+
+        internal static byte[] Reduce512(byte[] n)
         {
-            byte[] r = new byte[64];
+            byte[] r = new byte[ScalarBytes];
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            Reduce(n, r);
+            Reduce512(n, r);
 #else
-            long x00 =  Codec.Decode32(n,  0)       & M32L;         // x00:32/--
-            long x01 = (Codec.Decode24(n,  4) << 4) & M32L;         // x01:28/--
-            long x02 =  Codec.Decode32(n,  7)       & M32L;         // x02:32/--
-            long x03 = (Codec.Decode24(n, 11) << 4) & M32L;         // x03:28/--
-            long x04 =  Codec.Decode32(n, 14)       & M32L;         // x04:32/--
-            long x05 = (Codec.Decode24(n, 18) << 4) & M32L;         // x05:28/--
-            long x06 =  Codec.Decode32(n, 21)       & M32L;         // x06:32/--
-            long x07 = (Codec.Decode24(n, 25) << 4) & M32L;         // x07:28/--
-            long x08 =  Codec.Decode32(n, 28)       & M32L;         // x08:32/--
-            long x09 = (Codec.Decode24(n, 32) << 4) & M32L;         // x09:28/--
-            long x10 =  Codec.Decode32(n, 35)       & M32L;         // x10:32/--
-            long x11 = (Codec.Decode24(n, 39) << 4) & M32L;         // x11:28/--
-            long x12 =  Codec.Decode32(n, 42)       & M32L;         // x12:32/--
-            long x13 = (Codec.Decode24(n, 46) << 4) & M32L;         // x13:28/--
-            long x14 =  Codec.Decode32(n, 49)       & M32L;         // x14:32/--
-            long x15 = (Codec.Decode24(n, 53) << 4) & M32L;         // x15:28/--
-            long x16 =  Codec.Decode32(n, 56)       & M32L;         // x16:32/--
-            long x17 = (Codec.Decode24(n, 60) << 4) & M32L;         // x17:28/--
-            long x18 =                 n[63]        & M08L;         // x18:08/--
+            long x00 =  Codec.Decode32(n,  0);          // x00:32/--
+            long x01 = (Codec.Decode24(n,  4) << 4);    // x01:28/--
+            long x02 =  Codec.Decode32(n,  7);          // x02:32/--
+            long x03 = (Codec.Decode24(n, 11) << 4);    // x03:28/--
+            long x04 =  Codec.Decode32(n, 14);          // x04:32/--
+            long x05 = (Codec.Decode24(n, 18) << 4);    // x05:28/--
+            long x06 =  Codec.Decode32(n, 21);          // x06:32/--
+            long x07 = (Codec.Decode24(n, 25) << 4);    // x07:28/--
+            long x08 =  Codec.Decode32(n, 28);          // x08:32/--
+            long x09 = (Codec.Decode24(n, 32) << 4);    // x09:28/--
+            long x10 =  Codec.Decode32(n, 35);          // x10:32/--
+            long x11 = (Codec.Decode24(n, 39) << 4);    // x11:28/--
+            long x12 =  Codec.Decode32(n, 42);          // x12:32/--
+            long x13 = (Codec.Decode24(n, 46) << 4);    // x13:28/--
+            long x14 =  Codec.Decode32(n, 49);          // x14:32/--
+            long x15 = (Codec.Decode24(n, 53) << 4);    // x15:28/--
+            long x16 =  Codec.Decode32(n, 56);          // x16:32/--
+            long x17 = (Codec.Decode24(n, 60) << 4);    // x17:28/--
+            long x18 =                 n[63];           // x18:08/--
             long t;
 
             //x18 += (x17 >> 28); x17 &= M28L;
@@ -246,27 +459,27 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r)
+        internal static void Reduce512(ReadOnlySpan<byte> n, Span<byte> r)
         {
-            long x00 =  Codec.Decode32(n[ 0..])       & M32L;       // x00:32/--
-            long x01 = (Codec.Decode24(n[ 4..]) << 4) & M32L;       // x01:28/--
-            long x02 =  Codec.Decode32(n[ 7..])       & M32L;       // x02:32/--
-            long x03 = (Codec.Decode24(n[11..]) << 4) & M32L;       // x03:28/--
-            long x04 =  Codec.Decode32(n[14..])       & M32L;       // x04:32/--
-            long x05 = (Codec.Decode24(n[18..]) << 4) & M32L;       // x05:28/--
-            long x06 =  Codec.Decode32(n[21..])       & M32L;       // x06:32/--
-            long x07 = (Codec.Decode24(n[25..]) << 4) & M32L;       // x07:28/--
-            long x08 =  Codec.Decode32(n[28..])       & M32L;       // x08:32/--
-            long x09 = (Codec.Decode24(n[32..]) << 4) & M32L;       // x09:28/--
-            long x10 =  Codec.Decode32(n[35..])       & M32L;       // x10:32/--
-            long x11 = (Codec.Decode24(n[39..]) << 4) & M32L;       // x11:28/--
-            long x12 =  Codec.Decode32(n[42..])       & M32L;       // x12:32/--
-            long x13 = (Codec.Decode24(n[46..]) << 4) & M32L;       // x13:28/--
-            long x14 =  Codec.Decode32(n[49..])       & M32L;       // x14:32/--
-            long x15 = (Codec.Decode24(n[53..]) << 4) & M32L;       // x15:28/--
-            long x16 =  Codec.Decode32(n[56..])       & M32L;       // x16:32/--
-            long x17 = (Codec.Decode24(n[60..]) << 4) & M32L;       // x17:28/--
-            long x18 =                 n[63]          & M08L;       // x18:08/--
+            long x00 =  Codec.Decode32(n[ 0..]);        // x00:32/--
+            long x01 = (Codec.Decode24(n[ 4..]) << 4);  // x01:28/--
+            long x02 =  Codec.Decode32(n[ 7..]);        // x02:32/--
+            long x03 = (Codec.Decode24(n[11..]) << 4);  // x03:28/--
+            long x04 =  Codec.Decode32(n[14..]);        // x04:32/--
+            long x05 = (Codec.Decode24(n[18..]) << 4);  // x05:28/--
+            long x06 =  Codec.Decode32(n[21..]);        // x06:32/--
+            long x07 = (Codec.Decode24(n[25..]) << 4);  // x07:28/--
+            long x08 =  Codec.Decode32(n[28..]);        // x08:32/--
+            long x09 = (Codec.Decode24(n[32..]) << 4);  // x09:28/--
+            long x10 =  Codec.Decode32(n[35..]);        // x10:32/--
+            long x11 = (Codec.Decode24(n[39..]) << 4);  // x11:28/--
+            long x12 =  Codec.Decode32(n[42..]);        // x12:32/--
+            long x13 = (Codec.Decode24(n[46..]) << 4);  // x13:28/--
+            long x14 =  Codec.Decode32(n[49..]);        // x14:32/--
+            long x15 = (Codec.Decode24(n[53..]) << 4);  // x15:28/--
+            long x16 =  Codec.Decode32(n[56..]);        // x16:32/--
+            long x17 = (Codec.Decode24(n[60..]) << 4);  // x17:28/--
+            long x18 =                 n[63];           // x18:08/--
             long t;
 
             //x18 += (x17 >> 28); x17 &= M28L;
@@ -488,15 +701,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 #endif
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void ToSignedDigits(int bits, ReadOnlySpan<uint> x, Span<uint> z)
+        internal static void ToSignedDigits(int bits, Span<uint> z)
 #else
-        internal static void ToSignedDigits(int bits, uint[] x, uint[] z)
+        internal static void ToSignedDigits(int bits, uint[] z)
 #endif
         {
             Debug.Assert(bits == 256);
             Debug.Assert(z.Length >= Size);
 
-            uint c1 = Nat.CAdd(Size, ~(int)x[0] & 1, x, L, z);  Debug.Assert(c1 == 0U);
+            uint c1 = Nat.CAddTo(Size, ~(int)z[0] & 1, L, z);   Debug.Assert(c1 == 0U);
             uint c2 = Nat.ShiftDownBit(Size, z, 1U);            Debug.Assert(c2 == (1U << 31));
         }
     }
diff --git a/crypto/src/math/ec/rfc8032/Scalar448.cs b/crypto/src/math/ec/rfc8032/Scalar448.cs
index 4afe1d2d6..124b91250 100644
--- a/crypto/src/math/ec/rfc8032/Scalar448.cs
+++ b/crypto/src/math/ec/rfc8032/Scalar448.cs
@@ -97,7 +97,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         {
             Debug.Assert((int)y225[7] >> 31 == (int)y225[7]);
 
-            Span<uint> tt = stackalloc uint[29];
+            Span<uint> tt = stackalloc uint[22];
             Nat.Mul(y225, x, tt);
 
             if ((int)y225[7] < 0)
@@ -106,9 +106,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat.SubFrom(Size, x, tt[8..]);
             }
 
-            Span<byte> r = MemoryMarshal.AsBytes(tt);
-            Reduce(r, r);
-            tt[..Size].CopyTo(z);
+            if (BitConverter.IsLittleEndian)
+            {
+                Span<byte> r = MemoryMarshal.AsBytes(tt);
+                Reduce704(r, r);
+                tt[..Size].CopyTo(z);
+            }
+            else
+            {
+                Span<byte> r = stackalloc byte[88];
+                Codec.Encode32(tt, r);
+
+                Reduce704(r, r);
+                Decode(r, z);
+            }
         }
 #else
         internal static void Multiply225Var(uint[] x, uint[] y225, uint[] z)
@@ -124,20 +135,430 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 Nat.SubFrom(Size, x, 0, tt, 8);
             }
 
-            byte[] bytes = new byte[114];
+            byte[] bytes = new byte[88];
             Codec.Encode32(tt, 0, 22, bytes, 0);
 
-            byte[] r = Reduce(bytes);
+            byte[] r = Reduce704(bytes);
             Decode(r, z);
         }
 #endif
 
-        internal static byte[] Reduce(byte[] n)
+        internal static byte[] Reduce704(byte[] n)
+        {
+            byte[] r = new byte[ScalarBytes];
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Reduce704(n, r);
+#else
+            ulong x00 =  Codec.Decode32(n,   0);                // x00:32/--
+            ulong x01 = (Codec.Decode24(n,   4) << 4);          // x01:28/--
+            ulong x02 =  Codec.Decode32(n,   7);                // x02:32/--
+            ulong x03 = (Codec.Decode24(n,  11) << 4);          // x03:28/--
+            ulong x04 =  Codec.Decode32(n,  14);                // x04:32/--
+            ulong x05 = (Codec.Decode24(n,  18) << 4);          // x05:28/--
+            ulong x06 =  Codec.Decode32(n,  21);                // x06:32/--
+            ulong x07 = (Codec.Decode24(n,  25) << 4);          // x07:28/--
+            ulong x08 =  Codec.Decode32(n,  28);                // x08:32/--
+            ulong x09 = (Codec.Decode24(n,  32) << 4);          // x09:28/--
+            ulong x10 =  Codec.Decode32(n,  35);                // x10:32/--
+            ulong x11 = (Codec.Decode24(n,  39) << 4);          // x11:28/--
+            ulong x12 =  Codec.Decode32(n,  42);                // x12:32/--
+            ulong x13 = (Codec.Decode24(n,  46) << 4);          // x13:28/--
+            ulong x14 =  Codec.Decode32(n,  49);                // x14:32/--
+            ulong x15 = (Codec.Decode24(n,  53) << 4);          // x15:28/--
+            ulong x16 =  Codec.Decode32(n,  56);                // x16:32/--
+            ulong x17 = (Codec.Decode24(n,  60) << 4);          // x17:28/--
+            ulong x18 =  Codec.Decode32(n,  63);                // x18:32/--
+            ulong x19 = (Codec.Decode24(n,  67) << 4);          // x19:28/--
+            ulong x20 =  Codec.Decode32(n,  70);                // x20:32/--
+            ulong x21 = (Codec.Decode24(n,  74) << 4);          // x21:28/--
+            ulong x22 =  Codec.Decode32(n,  77);                // x22:32/--
+            ulong x23 = (Codec.Decode24(n,  81) << 4);          // x23:28/--
+            ulong x24 =  Codec.Decode32(n,  84);                // x24:32/--
+            ulong x25 = 0UL;
+
+            // TODO Fix bounds calculations which were copied from Reduce912
+
+            x25 += (x24 >> 28); x24 &= M28UL;           // x25:28/--, x24:28/--
+            x09 += x25 * L4_0;                          // x09:54/--
+            x10 += x25 * L4_1;                          // x10:54/53
+            x11 += x25 * L4_2;                          // x11:56/--
+            x12 += x25 * L4_3;                          // x12:57/--
+            x13 += x25 * L4_4;                          // x13:57/55
+            x14 += x25 * L4_5;                          // x14:58/--
+            x15 += x25 * L4_6;                          // x15:58/56
+            x16 += x25 * L4_7;                          // x16:59/--
+
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:58/--, x20:28/--
+            x22 += (x21 >> 28); x21 &= M28UL;           // x22:57/54, x21:28/--
+            x23 += (x22 >> 28); x22 &= M28UL;           // x23:45/42, x22:28/--
+            x24 += (x23 >> 28); x23 &= M28UL;           // x24:28/18, x23:28/--
+
+            x08 += x24 * L4_0;                          // x08:54/--
+            x09 += x24 * L4_1;                          // x09:55/--
+            x10 += x24 * L4_2;                          // x10:56/46
+            x11 += x24 * L4_3;                          // x11:57/46
+            x12 += x24 * L4_4;                          // x12:57/55
+            x13 += x24 * L4_5;                          // x13:58/--
+            x14 += x24 * L4_6;                          // x14:58/56
+            x15 += x24 * L4_7;                          // x15:59/--
+
+            x07 += x23 * L4_0;                          // x07:54/--
+            x08 += x23 * L4_1;                          // x08:54/53
+            x09 += x23 * L4_2;                          // x09:56/53
+            x10 += x23 * L4_3;                          // x10:57/46
+            x11 += x23 * L4_4;                          // x11:57/55
+            x12 += x23 * L4_5;                          // x12:58/--
+            x13 += x23 * L4_6;                          // x13:58/56
+            x14 += x23 * L4_7;                          // x14:59/--
+
+            x06 += x22 * L4_0;                          // x06:54/--
+            x07 += x22 * L4_1;                          // x07:54/53
+            x08 += x22 * L4_2;                          // x08:56/--
+            x09 += x22 * L4_3;                          // x09:57/53
+            x10 += x22 * L4_4;                          // x10:57/55
+            x11 += x22 * L4_5;                          // x11:58/--
+            x12 += x22 * L4_6;                          // x12:58/56
+            x13 += x22 * L4_7;                          // x13:59/--
+
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:59/31, x17:28/--
+            x19 += (x18 >> 28); x18 &= M28UL;           // x19:58/54, x18:28/--
+            x20 += (x19 >> 28); x19 &= M28UL;           // x20:30/29, x19:28/--
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:28/03, x20:28/--
+
+            x05 += x21 * L4_0;                          // x05:54/--
+            x06 += x21 * L4_1;                          // x06:55/--
+            x07 += x21 * L4_2;                          // x07:56/31
+            x08 += x21 * L4_3;                          // x08:57/31
+            x09 += x21 * L4_4;                          // x09:57/56
+            x10 += x21 * L4_5;                          // x10:58/--
+            x11 += x21 * L4_6;                          // x11:58/56
+            x12 += x21 * L4_7;                          // x12:59/--
+
+            x04 += x20 * L4_0;                          // x04:54/--
+            x05 += x20 * L4_1;                          // x05:54/53
+            x06 += x20 * L4_2;                          // x06:56/53
+            x07 += x20 * L4_3;                          // x07:57/31
+            x08 += x20 * L4_4;                          // x08:57/55
+            x09 += x20 * L4_5;                          // x09:58/--
+            x10 += x20 * L4_6;                          // x10:58/56
+            x11 += x20 * L4_7;                          // x11:59/--
+
+            x03 += x19 * L4_0;                          // x03:54/--
+            x04 += x19 * L4_1;                          // x04:54/53
+            x05 += x19 * L4_2;                          // x05:56/--
+            x06 += x19 * L4_3;                          // x06:57/53
+            x07 += x19 * L4_4;                          // x07:57/55
+            x08 += x19 * L4_5;                          // x08:58/--
+            x09 += x19 * L4_6;                          // x09:58/56
+            x10 += x19 * L4_7;                          // x10:59/--
+
+            x15 += (x14 >> 28); x14 &= M28UL;           // x15:59/31, x14:28/--
+            x16 += (x15 >> 28); x15 &= M28UL;           // x16:59/32, x15:28/--
+            x17 += (x16 >> 28); x16 &= M28UL;           // x17:31/29, x16:28/--
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:28/04, x17:28/--
+
+            x02 += x18 * L4_0;                          // x02:54/--
+            x03 += x18 * L4_1;                          // x03:55/--
+            x04 += x18 * L4_2;                          // x04:56/32
+            x05 += x18 * L4_3;                          // x05:57/32
+            x06 += x18 * L4_4;                          // x06:57/56
+            x07 += x18 * L4_5;                          // x07:58/--
+            x08 += x18 * L4_6;                          // x08:58/56
+            x09 += x18 * L4_7;                          // x09:59/--
+
+            x01 += x17 * L4_0;                          // x01:54/--
+            x02 += x17 * L4_1;                          // x02:54/53
+            x03 += x17 * L4_2;                          // x03:56/53
+            x04 += x17 * L4_3;                          // x04:57/32
+            x05 += x17 * L4_4;                          // x05:57/55
+            x06 += x17 * L4_5;                          // x06:58/--
+            x07 += x17 * L4_6;                          // x07:58/56
+            x08 += x17 * L4_7;                          // x08:59/--
+
+            x16 *= 4;
+            x16 += (x15 >> 26); x15 &= M26UL;
+            x16 += 1;                                   // x16:30/01
+
+            x00 += x16 * L_0;
+            x01 += x16 * L_1;
+            x02 += x16 * L_2;
+            x03 += x16 * L_3;
+            x04 += x16 * L_4;
+            x05 += x16 * L_5;
+            x06 += x16 * L_6;
+            x07 += x16 * L_7;
+
+            x01 += (x00 >> 28); x00 &= M28UL;
+            x02 += (x01 >> 28); x01 &= M28UL;
+            x03 += (x02 >> 28); x02 &= M28UL;
+            x04 += (x03 >> 28); x03 &= M28UL;
+            x05 += (x04 >> 28); x04 &= M28UL;
+            x06 += (x05 >> 28); x05 &= M28UL;
+            x07 += (x06 >> 28); x06 &= M28UL;
+            x08 += (x07 >> 28); x07 &= M28UL;
+            x09 += (x08 >> 28); x08 &= M28UL;
+            x10 += (x09 >> 28); x09 &= M28UL;
+            x11 += (x10 >> 28); x10 &= M28UL;
+            x12 += (x11 >> 28); x11 &= M28UL;
+            x13 += (x12 >> 28); x12 &= M28UL;
+            x14 += (x13 >> 28); x13 &= M28UL;
+            x15 += (x14 >> 28); x14 &= M28UL;
+            x16  = (x15 >> 26); x15 &= M26UL;
+
+            x16 -= 1;
+
+            Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue);
+
+            x00 -= x16 & L_0;
+            x01 -= x16 & L_1;
+            x02 -= x16 & L_2;
+            x03 -= x16 & L_3;
+            x04 -= x16 & L_4;
+            x05 -= x16 & L_5;
+            x06 -= x16 & L_6;
+            x07 -= x16 & L_7;
+
+            x01 += (ulong)((long)x00 >> 28); x00 &= M28UL;
+            x02 += (ulong)((long)x01 >> 28); x01 &= M28UL;
+            x03 += (ulong)((long)x02 >> 28); x02 &= M28UL;
+            x04 += (ulong)((long)x03 >> 28); x03 &= M28UL;
+            x05 += (ulong)((long)x04 >> 28); x04 &= M28UL;
+            x06 += (ulong)((long)x05 >> 28); x05 &= M28UL;
+            x07 += (ulong)((long)x06 >> 28); x06 &= M28UL;
+            x08 += (ulong)((long)x07 >> 28); x07 &= M28UL;
+            x09 += (ulong)((long)x08 >> 28); x08 &= M28UL;
+            x10 += (ulong)((long)x09 >> 28); x09 &= M28UL;
+            x11 += (ulong)((long)x10 >> 28); x10 &= M28UL;
+            x12 += (ulong)((long)x11 >> 28); x11 &= M28UL;
+            x13 += (ulong)((long)x12 >> 28); x12 &= M28UL;
+            x14 += (ulong)((long)x13 >> 28); x13 &= M28UL;
+            x15 += (ulong)((long)x14 >> 28); x14 &= M28UL;
+
+            Debug.Assert(x15 >> 26 == 0UL);
+
+            Codec.Encode56(x00 | (x01 << 28), r,  0);
+            Codec.Encode56(x02 | (x03 << 28), r,  7);
+            Codec.Encode56(x04 | (x05 << 28), r, 14);
+            Codec.Encode56(x06 | (x07 << 28), r, 21);
+            Codec.Encode56(x08 | (x09 << 28), r, 28);
+            Codec.Encode56(x10 | (x11 << 28), r, 35);
+            Codec.Encode56(x12 | (x13 << 28), r, 42);
+            Codec.Encode56(x14 | (x15 << 28), r, 49);
+            //r[ScalarBytes - 1] = 0;
+#endif
+
+            return r;
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        internal static void Reduce704(ReadOnlySpan<byte> n, Span<byte> r)
+        {
+            ulong x00 =  Codec.Decode32(n[  0..]);              // x00:32/--
+            ulong x01 = (Codec.Decode24(n[  4..]) << 4);        // x01:28/--
+            ulong x02 =  Codec.Decode32(n[  7..]);              // x02:32/--
+            ulong x03 = (Codec.Decode24(n[ 11..]) << 4);        // x03:28/--
+            ulong x04 =  Codec.Decode32(n[ 14..]);              // x04:32/--
+            ulong x05 = (Codec.Decode24(n[ 18..]) << 4);        // x05:28/--
+            ulong x06 =  Codec.Decode32(n[ 21..]);              // x06:32/--
+            ulong x07 = (Codec.Decode24(n[ 25..]) << 4);        // x07:28/--
+            ulong x08 =  Codec.Decode32(n[ 28..]);              // x08:32/--
+            ulong x09 = (Codec.Decode24(n[ 32..]) << 4);        // x09:28/--
+            ulong x10 =  Codec.Decode32(n[ 35..]);              // x10:32/--
+            ulong x11 = (Codec.Decode24(n[ 39..]) << 4);        // x11:28/--
+            ulong x12 =  Codec.Decode32(n[ 42..]);              // x12:32/--
+            ulong x13 = (Codec.Decode24(n[ 46..]) << 4);        // x13:28/--
+            ulong x14 =  Codec.Decode32(n[ 49..]);              // x14:32/--
+            ulong x15 = (Codec.Decode24(n[ 53..]) << 4);        // x15:28/--
+            ulong x16 =  Codec.Decode32(n[ 56..]);              // x16:32/--
+            ulong x17 = (Codec.Decode24(n[ 60..]) << 4);        // x17:28/--
+            ulong x18 =  Codec.Decode32(n[ 63..]);              // x18:32/--
+            ulong x19 = (Codec.Decode24(n[ 67..]) << 4);        // x19:28/--
+            ulong x20 =  Codec.Decode32(n[ 70..]);              // x20:32/--
+            ulong x21 = (Codec.Decode24(n[ 74..]) << 4);        // x21:28/--
+            ulong x22 =  Codec.Decode32(n[ 77..]);              // x22:32/--
+            ulong x23 = (Codec.Decode24(n[ 81..]) << 4);        // x23:28/--
+            ulong x24 =  Codec.Decode32(n[ 84..]);              // x24:32/--
+            ulong x25 = 0UL;
+
+            // TODO Fix bounds calculations which were copied from Reduce912
+
+            x25 += (x24 >> 28); x24 &= M28UL;           // x25:28/--, x24:28/--
+            x09 += x25 * L4_0;                          // x09:54/--
+            x10 += x25 * L4_1;                          // x10:54/53
+            x11 += x25 * L4_2;                          // x11:56/--
+            x12 += x25 * L4_3;                          // x12:57/--
+            x13 += x25 * L4_4;                          // x13:57/55
+            x14 += x25 * L4_5;                          // x14:58/--
+            x15 += x25 * L4_6;                          // x15:58/56
+            x16 += x25 * L4_7;                          // x16:59/--
+
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:58/--, x20:28/--
+            x22 += (x21 >> 28); x21 &= M28UL;           // x22:57/54, x21:28/--
+            x23 += (x22 >> 28); x22 &= M28UL;           // x23:45/42, x22:28/--
+            x24 += (x23 >> 28); x23 &= M28UL;           // x24:28/18, x23:28/--
+
+            x08 += x24 * L4_0;                          // x08:54/--
+            x09 += x24 * L4_1;                          // x09:55/--
+            x10 += x24 * L4_2;                          // x10:56/46
+            x11 += x24 * L4_3;                          // x11:57/46
+            x12 += x24 * L4_4;                          // x12:57/55
+            x13 += x24 * L4_5;                          // x13:58/--
+            x14 += x24 * L4_6;                          // x14:58/56
+            x15 += x24 * L4_7;                          // x15:59/--
+
+            x07 += x23 * L4_0;                          // x07:54/--
+            x08 += x23 * L4_1;                          // x08:54/53
+            x09 += x23 * L4_2;                          // x09:56/53
+            x10 += x23 * L4_3;                          // x10:57/46
+            x11 += x23 * L4_4;                          // x11:57/55
+            x12 += x23 * L4_5;                          // x12:58/--
+            x13 += x23 * L4_6;                          // x13:58/56
+            x14 += x23 * L4_7;                          // x14:59/--
+
+            x06 += x22 * L4_0;                          // x06:54/--
+            x07 += x22 * L4_1;                          // x07:54/53
+            x08 += x22 * L4_2;                          // x08:56/--
+            x09 += x22 * L4_3;                          // x09:57/53
+            x10 += x22 * L4_4;                          // x10:57/55
+            x11 += x22 * L4_5;                          // x11:58/--
+            x12 += x22 * L4_6;                          // x12:58/56
+            x13 += x22 * L4_7;                          // x13:59/--
+
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:59/31, x17:28/--
+            x19 += (x18 >> 28); x18 &= M28UL;           // x19:58/54, x18:28/--
+            x20 += (x19 >> 28); x19 &= M28UL;           // x20:30/29, x19:28/--
+            x21 += (x20 >> 28); x20 &= M28UL;           // x21:28/03, x20:28/--
+
+            x05 += x21 * L4_0;                          // x05:54/--
+            x06 += x21 * L4_1;                          // x06:55/--
+            x07 += x21 * L4_2;                          // x07:56/31
+            x08 += x21 * L4_3;                          // x08:57/31
+            x09 += x21 * L4_4;                          // x09:57/56
+            x10 += x21 * L4_5;                          // x10:58/--
+            x11 += x21 * L4_6;                          // x11:58/56
+            x12 += x21 * L4_7;                          // x12:59/--
+
+            x04 += x20 * L4_0;                          // x04:54/--
+            x05 += x20 * L4_1;                          // x05:54/53
+            x06 += x20 * L4_2;                          // x06:56/53
+            x07 += x20 * L4_3;                          // x07:57/31
+            x08 += x20 * L4_4;                          // x08:57/55
+            x09 += x20 * L4_5;                          // x09:58/--
+            x10 += x20 * L4_6;                          // x10:58/56
+            x11 += x20 * L4_7;                          // x11:59/--
+
+            x03 += x19 * L4_0;                          // x03:54/--
+            x04 += x19 * L4_1;                          // x04:54/53
+            x05 += x19 * L4_2;                          // x05:56/--
+            x06 += x19 * L4_3;                          // x06:57/53
+            x07 += x19 * L4_4;                          // x07:57/55
+            x08 += x19 * L4_5;                          // x08:58/--
+            x09 += x19 * L4_6;                          // x09:58/56
+            x10 += x19 * L4_7;                          // x10:59/--
+
+            x15 += (x14 >> 28); x14 &= M28UL;           // x15:59/31, x14:28/--
+            x16 += (x15 >> 28); x15 &= M28UL;           // x16:59/32, x15:28/--
+            x17 += (x16 >> 28); x16 &= M28UL;           // x17:31/29, x16:28/--
+            x18 += (x17 >> 28); x17 &= M28UL;           // x18:28/04, x17:28/--
+
+            x02 += x18 * L4_0;                          // x02:54/--
+            x03 += x18 * L4_1;                          // x03:55/--
+            x04 += x18 * L4_2;                          // x04:56/32
+            x05 += x18 * L4_3;                          // x05:57/32
+            x06 += x18 * L4_4;                          // x06:57/56
+            x07 += x18 * L4_5;                          // x07:58/--
+            x08 += x18 * L4_6;                          // x08:58/56
+            x09 += x18 * L4_7;                          // x09:59/--
+
+            x01 += x17 * L4_0;                          // x01:54/--
+            x02 += x17 * L4_1;                          // x02:54/53
+            x03 += x17 * L4_2;                          // x03:56/53
+            x04 += x17 * L4_3;                          // x04:57/32
+            x05 += x17 * L4_4;                          // x05:57/55
+            x06 += x17 * L4_5;                          // x06:58/--
+            x07 += x17 * L4_6;                          // x07:58/56
+            x08 += x17 * L4_7;                          // x08:59/--
+
+            x16 *= 4;
+            x16 += (x15 >> 26); x15 &= M26UL;
+            x16 += 1;                                   // x16:30/01
+
+            x00 += x16 * L_0;
+            x01 += x16 * L_1;
+            x02 += x16 * L_2;
+            x03 += x16 * L_3;
+            x04 += x16 * L_4;
+            x05 += x16 * L_5;
+            x06 += x16 * L_6;
+            x07 += x16 * L_7;
+
+            x01 += (x00 >> 28); x00 &= M28UL;
+            x02 += (x01 >> 28); x01 &= M28UL;
+            x03 += (x02 >> 28); x02 &= M28UL;
+            x04 += (x03 >> 28); x03 &= M28UL;
+            x05 += (x04 >> 28); x04 &= M28UL;
+            x06 += (x05 >> 28); x05 &= M28UL;
+            x07 += (x06 >> 28); x06 &= M28UL;
+            x08 += (x07 >> 28); x07 &= M28UL;
+            x09 += (x08 >> 28); x08 &= M28UL;
+            x10 += (x09 >> 28); x09 &= M28UL;
+            x11 += (x10 >> 28); x10 &= M28UL;
+            x12 += (x11 >> 28); x11 &= M28UL;
+            x13 += (x12 >> 28); x12 &= M28UL;
+            x14 += (x13 >> 28); x13 &= M28UL;
+            x15 += (x14 >> 28); x14 &= M28UL;
+            x16  = (x15 >> 26); x15 &= M26UL;
+
+            x16 -= 1;
+
+            Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue);
+
+            x00 -= x16 & L_0;
+            x01 -= x16 & L_1;
+            x02 -= x16 & L_2;
+            x03 -= x16 & L_3;
+            x04 -= x16 & L_4;
+            x05 -= x16 & L_5;
+            x06 -= x16 & L_6;
+            x07 -= x16 & L_7;
+
+            x01 += (ulong)((long)x00 >> 28); x00 &= M28UL;
+            x02 += (ulong)((long)x01 >> 28); x01 &= M28UL;
+            x03 += (ulong)((long)x02 >> 28); x02 &= M28UL;
+            x04 += (ulong)((long)x03 >> 28); x03 &= M28UL;
+            x05 += (ulong)((long)x04 >> 28); x04 &= M28UL;
+            x06 += (ulong)((long)x05 >> 28); x05 &= M28UL;
+            x07 += (ulong)((long)x06 >> 28); x06 &= M28UL;
+            x08 += (ulong)((long)x07 >> 28); x07 &= M28UL;
+            x09 += (ulong)((long)x08 >> 28); x08 &= M28UL;
+            x10 += (ulong)((long)x09 >> 28); x09 &= M28UL;
+            x11 += (ulong)((long)x10 >> 28); x10 &= M28UL;
+            x12 += (ulong)((long)x11 >> 28); x11 &= M28UL;
+            x13 += (ulong)((long)x12 >> 28); x12 &= M28UL;
+            x14 += (ulong)((long)x13 >> 28); x13 &= M28UL;
+            x15 += (ulong)((long)x14 >> 28); x14 &= M28UL;
+
+            Debug.Assert(x15 >> 26 == 0UL);
+
+            Codec.Encode56(x00 | (x01 << 28), r);
+            Codec.Encode56(x02 | (x03 << 28), r[7..]);
+            Codec.Encode56(x04 | (x05 << 28), r[14..]);
+            Codec.Encode56(x06 | (x07 << 28), r[21..]);
+            Codec.Encode56(x08 | (x09 << 28), r[28..]);
+            Codec.Encode56(x10 | (x11 << 28), r[35..]);
+            Codec.Encode56(x12 | (x13 << 28), r[42..]);
+            Codec.Encode56(x14 | (x15 << 28), r[49..]);
+            r[ScalarBytes - 1] = 0;
+        }
+#endif
+
+        internal static byte[] Reduce912(byte[] n)
         {
             byte[] r = new byte[ScalarBytes];
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-            Reduce(n, r);
+            Reduce912(n, r);
 #else
             ulong x00 =  Codec.Decode32(n,   0);                // x00:32/--
             ulong x01 = (Codec.Decode24(n,   4) << 4);          // x01:28/--
@@ -416,7 +837,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         }
 
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-        internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r)
+        internal static void Reduce912(ReadOnlySpan<byte> n, Span<byte> r)
         {
             ulong x00 =  Codec.Decode32(n[  0..]);              // x00:32/--
             ulong x01 = (Codec.Decode24(n[  4..]) << 4);        // x01:28/--
diff --git a/crypto/src/math/ec/rfc8032/Wnaf.cs b/crypto/src/math/ec/rfc8032/Wnaf.cs
index 88319f405..209934031 100644
--- a/crypto/src/math/ec/rfc8032/Wnaf.cs
+++ b/crypto/src/math/ec/rfc8032/Wnaf.cs
@@ -42,7 +42,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 {
                     int word16 = (int)(word >> j);
 
-                    int skip = Integers.NumberOfTrailingZeros((sign ^ word16) | 0x00010000);
+                    int skip = Integers.NumberOfTrailingZeros((sign ^ word16) | (1 << 16));
                     if (skip > 0)
                     {
                         j += skip;