summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-07-04 16:17:35 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-07-04 16:17:35 +0700
commit8e29badb3fcfb84fbc271dd1ae7de416c99ca294 (patch)
tree00652960807924a5781857caf9f7e8434f13900a
parentSIKE implementation with compression added (diff)
downloadBouncyCastle.NET-ed25519-8e29badb3fcfb84fbc271dd1ae7de416c99ca294.tar.xz
Ed25519 overhaul
- improved performance
- reduced allocation
- comments and references
-rw-r--r--crypto/src/math/ec/rfc8032/Ed25519.cs514
1 files changed, 277 insertions, 237 deletions
diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs
index 49f7b23a9..11ac809ed 100644
--- a/crypto/src/math/ec/rfc8032/Ed25519.cs
+++ b/crypto/src/math/ec/rfc8032/Ed25519.cs
@@ -10,6 +10,18 @@ using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Math.EC.Rfc8032
 {
+    /// <summary>
+    /// A low-level implementation of the Ed25519, Ed25519ctx, and Ed25519ph instantiations of the Edwards-Curve Digital
+    /// Signature Algorithm specified in <a href="https://www.rfc-editor.org/rfc/rfc8032">RFC 8032</a>.
+    /// </summary>
+    /// <remarks>
+    /// The implementation strategy is mostly drawn from <a href="https://ia.cr/2012/309">
+    /// Mike Hamburg, "Fast and compact elliptic-curve cryptography"</a>, notably the "signed multi-comb" algorithm (for
+    /// scalar multiplication by a fixed point), the "half Niels coordinates" (for precomputed points), and the
+    /// "extensible coordinates" (for accumulators). Standard
+    /// <a href="http://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html">extended coordinates</a> are used during
+    /// precomputations, needing only a single extra point addition formula.
+    /// </remarks>
     public static class Ed25519
     {
         // -x^2 + y^2 == 1 + 0x52036CEE2B6FFE738CC740797779E89800700A4D4141D8AB75EB4DCA135978A3 * x^2 * y^2
@@ -47,16 +59,18 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         private static readonly uint[] L = { 0x5CF5D3EDU, 0x5812631AU, 0xA2F79CD6U, 0x14DEF9DEU, 0x00000000U,
             0x00000000U, 0x00000000U, 0x10000000U };
 
-        private const int L0 = unchecked((int)0xFCF5D3ED);  // L0:26/--
-        private const int L1 =                0x012631A6;   // L1:24/22
-        private const int L2 =                0x079CD658;   // L2:27/--
-        private const int L3 = unchecked((int)0xFF9DEA2F);  // L3:23/--
-        private const int L4 =                0x000014DF;   // L4:12/11
+        private const int L0 = -0x030A2C13;     // L0:26/--
+        private const int L1 =  0x012631A6;     // L1:24/22
+        private const int L2 =  0x079CD658;     // L2:27/--
+        private const int L3 = -0x006215D1;     // L3:23/--
+        private const int L4 =  0x000014DF;     // L4:12/11
 
         private static readonly int[] B_x = { 0x0325D51A, 0x018B5823, 0x007B2C95, 0x0304A92D, 0x00D2598E, 0x01D6DC5C,
             0x01388C7F, 0x013FEC0A, 0x029E6B72, 0x0042D26D };
         private static readonly int[] B_y = { 0x02666658, 0x01999999, 0x00666666, 0x03333333, 0x00CCCCCC, 0x02666666,
             0x01999999, 0x00666666, 0x03333333, 0x00CCCCCC, };
+
+        // Note that d == -121665/121666
         private static readonly int[] C_d = { 0x035978A3, 0x02D37284, 0x018AB75E, 0x026A0A0E, 0x0000E014, 0x0379E898,
             0x01D01E5D, 0x01E738CC, 0x03715B7F, 0x00A406D9 };
         private static readonly int[] C_d2 = { 0x02B2F159, 0x01A6E509, 0x01156EBD, 0x00D4141D, 0x0001C029, 0x02F3D130,
@@ -64,12 +78,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         private static readonly int[] C_d4 = { 0x0165E2B2, 0x034DCA13, 0x002ADD7A, 0x01A8283B, 0x00038052, 0x01E7A260,
             0x03407977, 0x019CE331, 0x01C56DFF, 0x00901B67 };
 
+        private const int WnafWidth = 5;
         private const int WnafWidthBase = 7;
 
         // ScalarMultBase is hard-coded for these values of blocks, teeth, spacing so they can't be freely changed
         private const int PrecompBlocks = 8;
         private const int PrecompTeeth = 4;
         private const int PrecompSpacing = 8;
+        //private const int PrecompRange = PrecompBlocks * PrecompTeeth * PrecompSpacing; // range == 256
         private const int PrecompPoints = 1 << (PrecompTeeth - 1);
         private const int PrecompMask = PrecompPoints - 1;
 
@@ -94,7 +110,23 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
         private struct PointPrecomp
         {
-            internal int[] ypx_h, ymx_h, xyd;
+            internal int[] ymx_h;       // (y - x)/2
+            internal int[] ypx_h;       // (y + x)/2
+            internal int[] xyd;         // x.y.d
+        }
+
+        private struct PointPrecompZ
+        {
+            internal int[] ymx_h;       // (y - x)/2
+            internal int[] ypx_h;       // (y + x)/2
+            internal int[] xyd;         // x.y.d
+            internal int[] z;
+        }
+
+        // Temp space to avoid allocations in point formulae.
+        private struct PointTemp
+        {
+            internal int[] r0, r1;
         }
 
         private static byte[] CalculateS(byte[] r, byte[] k, byte[] s)
@@ -515,9 +547,23 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
         private static void Init(out PointPrecomp r)
         {
+            r.ymx_h = F.Create();
             r.ypx_h = F.Create();
+            r.xyd = F.Create();
+        }
+
+        private static void Init(out PointPrecompZ r)
+        {
             r.ymx_h = F.Create();
+            r.ypx_h = F.Create();
             r.xyd = F.Create();
+            r.z = F.Create();
+        }
+
+        private static void Init(out PointTemp r)
+        {
+            r.r0 = F.Create();
+            r.r1 = F.Create();
         }
 
         private static void InvertDoubleZs(PointExtended[] points)
@@ -564,191 +610,148 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             return F.IsZeroVar(x) && F.AreEqualVar(y, z);
         }
 
-        private static void PointAdd(ref PointExtended p, ref PointAccum r)
+        private static void PointAdd(ref PointExtended p, ref PointExtended q, ref PointExtended r, ref PointTemp t)
         {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
-            int[] d = F.Create();
-            int[] e = r.u;
-            int[] f = F.Create();
-            int[] g = F.Create();
-            int[] h = r.v;
+            // p may ref the same point as r (or q), but q may not ref the same point as r.
+            Debug.Assert(q.x != r.x & q.y != r.y && q.z != r.z && q.t != r.t);
 
-            F.Apm(r.y, r.x, b, a);
-            F.Apm(p.y, p.x, d, c);
+            int[] a = r.x;
+            int[] b = r.y;
+            int[] c = t.r0;
+            int[] d = t.r1;
+            int[] e = a;
+            int[] f = c;
+            int[] g = d;
+            int[] h = b;
+
+            F.Apm(p.y, p.x, b, a);
+            F.Apm(q.y, q.x, d, c);
             F.Mul(a, c, a);
             F.Mul(b, d, b);
-            F.Mul(r.u, r.v, c);
-            F.Mul(c, p.t, c);
+            F.Mul(p.t, q.t, c);
             F.Mul(c, C_d2, c);
-            F.Mul(r.z, p.z, d);
-            F.Add(d, d, d);
+            F.Add(p.z, p.z, d);
+            F.Mul(d, q.z, d);
             F.Apm(b, a, h, e);
             F.Apm(d, c, g, f);
-            F.Carry(g);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
+            F.Mul(e, h, r.t);
             F.Mul(f, g, r.z);
+            F.Mul(e, f, r.x);
+            F.Mul(h, g, r.y);
         }
 
-        private static void PointAdd(ref PointExtended p, ref PointExtended r)
+        private static void PointAdd(ref PointPrecomp p, ref PointAccum r, ref PointTemp t)
         {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
-            int[] d = F.Create();
-            int[] e = F.Create();
-            int[] f = F.Create();
-            int[] g = F.Create();
-            int[] h = F.Create();
+            int[] a = r.x;
+            int[] b = r.y;
+            int[] c = t.r0;
+            int[] e = r.u;
+            int[] f = a;
+            int[] g = b;
+            int[] h = r.v;
 
-            F.Apm(p.y, p.x, b, a);
-            F.Apm(r.y, r.x, d, c);
-            F.Mul(a, c, a);
-            F.Mul(b, d, b);
-            F.Mul(p.t, r.t, c);
-            F.Mul(c, C_d2, c);
-            F.Mul(p.z, r.z, d);
-            F.Add(d, d, d);
+            F.Apm(r.y, r.x, b, a);
+            F.Mul(a, p.ymx_h, a);
+            F.Mul(b, p.ypx_h, b);
+            F.Mul(r.u, r.v, c);
+            F.Mul(c, p.xyd, c);
             F.Apm(b, a, h, e);
-            F.Apm(d, c, g, f);
-            F.Carry(g);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
+            F.Apm(r.z, c, g, f);
             F.Mul(f, g, r.z);
-            F.Mul(e, h, r.t);
+            F.Mul(f, e, r.x);
+            F.Mul(g, h, r.y);
         }
 
-        private static void PointAddVar(bool negate, ref PointExtended p, ref PointAccum r)
+        private static void PointAdd(ref PointPrecompZ p, ref PointAccum r, ref PointTemp t)
         {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
-            int[] d = F.Create();
+            int[] a = r.x;
+            int[] b = r.y;
+            int[] c = t.r0;
+            int[] d = r.z;
             int[] e = r.u;
-            int[] f = F.Create();
-            int[] g = F.Create();
+            int[] f = a;
+            int[] g = b;
             int[] h = r.v;
 
-            int[] nc, nd, nf, ng;
-            if (negate)
-            {
-                nc = d; nd = c; nf = g; ng = f;
-            }
-            else
-            {
-                nc = c; nd = d; nf = f; ng = g;
-            }
-
             F.Apm(r.y, r.x, b, a);
-            F.Apm(p.y, p.x, nd, nc);
-            F.Mul(a, c, a);
-            F.Mul(b, d, b);
+            F.Mul(a, p.ymx_h, a);
+            F.Mul(b, p.ypx_h, b);
             F.Mul(r.u, r.v, c);
-            F.Mul(c, p.t, c);
-            F.Mul(c, C_d2, c);
+            F.Mul(c, p.xyd, c);
             F.Mul(r.z, p.z, d);
-            F.Add(d, d, d);
             F.Apm(b, a, h, e);
-            F.Apm(d, c, ng, nf);
-            F.Carry(ng);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
+            F.Apm(d, c, g, f);
             F.Mul(f, g, r.z);
+            F.Mul(f, e, r.x);
+            F.Mul(g, h, r.y);
         }
 
-        private static void PointAddVar(bool negate, ref PointExtended p, ref PointExtended q, ref PointExtended r)
+        private static void PointAddVar(bool negate, ref PointPrecomp p, ref PointAccum r, ref PointTemp t)
         {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
-            int[] d = F.Create();
-            int[] e = F.Create();
-            int[] f = F.Create();
-            int[] g = F.Create();
-            int[] h = F.Create();
+            int[] a = r.x;
+            int[] b = r.y;
+            int[] c = t.r0;
+            int[] e = r.u;
+            int[] f = a;
+            int[] g = b;
+            int[] h = r.v;
 
-            int[] nc, nd, nf, ng;
+            int[] na, nb;
             if (negate)
             {
-                nc = d; nd = c; nf = g; ng = f;
+                na = b; nb = a;
             }
             else
             {
-                nc = c; nd = d; nf = f; ng = g;
+                na = a; nb = b;
             }
-
-            F.Apm(p.y, p.x, b, a);
-            F.Apm(q.y, q.x, nd, nc);
-            F.Mul(a, c, a);
-            F.Mul(b, d, b);
-            F.Mul(p.t, q.t, c);
-            F.Mul(c, C_d2, c);
-            F.Mul(p.z, q.z, d);
-            F.Add(d, d, d);
-            F.Apm(b, a, h, e);
-            F.Apm(d, c, ng, nf);
-            F.Carry(ng);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
-            F.Mul(f, g, r.z);
-            F.Mul(e, h, r.t);
-        }
-
-        private static void PointAddPrecomp(ref PointPrecomp p, ref PointAccum r)
-        {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
-            int[] e = r.u;
-            int[] f = F.Create();
-            int[] g = F.Create();
-            int[] h = r.v;
+            int[] nf = na, ng = nb;
 
             F.Apm(r.y, r.x, b, a);
-            F.Mul(a, p.ymx_h, a);
-            F.Mul(b, p.ypx_h, b);
+            F.Mul(na, p.ymx_h, na);
+            F.Mul(nb, p.ypx_h, nb);
             F.Mul(r.u, r.v, c);
             F.Mul(c, p.xyd, c);
             F.Apm(b, a, h, e);
-            F.Apm(r.z, c, g, f);
-            F.Carry(g);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
+            F.Apm(r.z, c, ng, nf);
             F.Mul(f, g, r.z);
+            F.Mul(f, e, r.x);
+            F.Mul(g, h, r.y);
         }
 
-        private static void PointAddPrecompVar(int sign, ref PointPrecomp p, ref PointAccum r)
+        private static void PointAddVar(bool negate, ref PointPrecompZ p, ref PointAccum r, ref PointTemp t)
         {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
+            int[] a = r.x;
+            int[] b = r.y;
+            int[] c = t.r0;
+            int[] d = r.z;
             int[] e = r.u;
-            int[] f = F.Create();
-            int[] g = F.Create();
+            int[] f = a;
+            int[] g = b;
             int[] h = r.v;
 
-            F.Apm(r.y, r.x, b, a);
-            if (sign == 0)
+            int[] na, nb;
+            if (negate)
             {
-                F.Mul(a, p.ymx_h, a);
-                F.Mul(b, p.ypx_h, b);
+                na = b; nb = a;
             }
             else
             {
-                F.Mul(a, p.ypx_h, a);
-                F.Mul(b, p.ymx_h, b);
+                na = a; nb = b;
             }
+            int[] nf = na, ng = nb;
+
+            F.Apm(r.y, r.x, b, a);
+            F.Mul(na, p.ymx_h, na);
+            F.Mul(nb, p.ypx_h, nb);
             F.Mul(r.u, r.v, c);
             F.Mul(c, p.xyd, c);
-            F.CNegate(sign, c);
+            F.Mul(r.z, p.z, d);
             F.Apm(b, a, h, e);
-            F.Apm(r.z, c, g, f);
-            F.Carry(g);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
+            F.Apm(d, c, ng, nf);
             F.Mul(f, g, r.z);
+            F.Mul(f, e, r.x);
+            F.Mul(g, h, r.y);
         }
 
         private static void PointCopy(ref PointAccum p, ref PointExtended r)
@@ -763,53 +766,41 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         {
             F.Copy(p.x, 0, r.x, 0);
             F.Copy(p.y, 0, r.y, 0);
-            PointExtendXY(ref r);
+            F.One(r.z);
+            F.Mul(p.x, p.y, r.t);
         }
 
-        private static void PointCopy(ref PointExtended p, ref PointExtended r)
+        private static void PointCopy(ref PointExtended p, ref PointPrecompZ r)
         {
-            F.Copy(p.x, 0, r.x, 0);
-            F.Copy(p.y, 0, r.y, 0);
-            F.Copy(p.z, 0, r.z, 0);
-            F.Copy(p.t, 0, r.t, 0);
+            // To avoid halving x and y, we double t and z instead.
+            F.Apm(p.y, p.x, r.ypx_h, r.ymx_h);
+            F.Mul(p.t, C_d2, r.xyd);
+            F.Add(p.z, p.z, r.z);
         }
 
         private static void PointDouble(ref PointAccum r)
         {
-            int[] a = F.Create();
-            int[] b = F.Create();
-            int[] c = F.Create();
+            int[] a = r.x;
+            int[] b = r.y;
+            int[] c = r.z;
             int[] e = r.u;
-            int[] f = F.Create();
-            int[] g = F.Create();
+            int[] f = a;
+            int[] g = b;
             int[] h = r.v;
 
+            F.Add(r.x, r.y, e);
             F.Sqr(r.x, a);
             F.Sqr(r.y, b);
             F.Sqr(r.z, c);
             F.Add(c, c, c);
             F.Apm(a, b, h, g);
-            F.Add(r.x, r.y, e);
             F.Sqr(e, e);
             F.Sub(h, e, e);
             F.Add(c, g, f);
-            F.Carry(f);
-            F.Mul(e, f, r.x);
-            F.Mul(g, h, r.y);
+            F.Carry(f); // Probably unnecessary, but keep until better bounds analysis available
             F.Mul(f, g, r.z);
-        }
-
-        private static void PointExtendXY(ref PointAccum p)
-        {
-            F.One(p.z);
-            F.Copy(p.x, 0, p.u, 0);
-            F.Copy(p.y, 0, p.v, 0);
-        }
-
-        private static void PointExtendXY(ref PointExtended p)
-        {
-            F.One(p.z);
-            F.Mul(p.x, p.y, p.t);
+            F.Mul(f, e, r.x);
+            F.Mul(g, h, r.y);
         }
 
         private static void PointLookup(int block, int index, ref PointPrecomp p)
@@ -822,15 +813,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             for (int i = 0; i < PrecompPoints; ++i)
             {
                 int cond = ((i ^ index) - 1) >> 31;
-                F.CMov(cond, PrecompBaseComb, off, p.ypx_h, 0);     off += F.Size;
                 F.CMov(cond, PrecompBaseComb, off, p.ymx_h, 0);     off += F.Size;
+                F.CMov(cond, PrecompBaseComb, off, p.ypx_h, 0);     off += F.Size;
                 F.CMov(cond, PrecompBaseComb, off, p.xyd  , 0);     off += F.Size;
             }
         }
 
-        private static void PointLookup(uint[] x, int n, int[] table, ref PointExtended r)
+        private static void PointLookupZ(uint[] x, int n, int[] table, ref PointPrecompZ r)
         {
-            // TODO This method is currently hardcoded to 4-bit windows and 8 precomputed points
+            // TODO This method is currently hard-coded to 4-bit windows and 8 precomputed points
 
             uint w = GetWindow4(x, n);
 
@@ -843,17 +834,34 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             for (int i = 0, off = 0; i < 8; ++i)
             {
                 int cond = ((i ^ abs) - 1) >> 31;
-                F.CMov(cond, table, off, r.x, 0);       off += F.Size;
-                F.CMov(cond, table, off, r.y, 0);       off += F.Size;
-                F.CMov(cond, table, off, r.z, 0);       off += F.Size;
-                F.CMov(cond, table, off, r.t, 0);       off += F.Size;
+                F.CMov(cond, table, off, r.ymx_h, 0);       off += F.Size;
+                F.CMov(cond, table, off, r.ypx_h, 0);       off += F.Size;
+                F.CMov(cond, table, off, r.xyd  , 0);       off += F.Size;
+                F.CMov(cond, table, off, r.z    , 0);       off += F.Size;
             }
 
-            F.CNegate(sign, r.x);
-            F.CNegate(sign, r.t);
+            F.CSwap(sign, r.ymx_h, r.ypx_h);
+            F.CNegate(sign, r.xyd);
+        }
+
+        private static void PointPrecompute(ref PointAffine p, PointExtended[] points, int count, ref PointTemp t)
+        {
+            Debug.Assert(count > 0);
+
+            Init(out points[0]);
+            PointCopy(ref p, ref points[0]);
+
+            PointExtended d; Init(out d);
+            PointAdd(ref points[0], ref points[0], ref d, ref t);
+
+            for (int i = 1; i < count; ++i)
+            {
+                Init(out points[i]);
+                PointAdd(ref points[i - 1], ref d, ref points[i], ref t);
+            }
         }
 
-        private static int[] PointPrecompute(ref PointAffine p, int count)
+        private static int[] PointPrecomputeZ(ref PointAffine p, int count, ref PointTemp t)
         {
             Debug.Assert(count > 0);
 
@@ -861,43 +869,52 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             PointCopy(ref p, ref q);
 
             PointExtended d; Init(out d);
-            PointCopy(ref q, ref d);
-            PointAdd(ref q, ref d);
+            PointAdd(ref q, ref q, ref d, ref t);
 
+            PointPrecompZ r; Init(out r);
             int[] table = F.CreateTable(count * 4);
             int off = 0;
 
             int i = 0;
             for (;;)
             {
-                F.Copy(q.x, 0, table, off);     off += F.Size;
-                F.Copy(q.y, 0, table, off);     off += F.Size;
-                F.Copy(q.z, 0, table, off);     off += F.Size;
-                F.Copy(q.t, 0, table, off);     off += F.Size;
+                PointCopy(ref q, ref r);
+
+                F.Copy(r.ymx_h, 0, table, off);     off += F.Size;
+                F.Copy(r.ypx_h, 0, table, off);     off += F.Size;
+                F.Copy(r.xyd  , 0, table, off);     off += F.Size;
+                F.Copy(r.z    , 0, table, off);     off += F.Size;
 
                 if (++i == count)
                     break;
 
-                PointAdd(ref d, ref q);
+                PointAdd(ref q, ref d, ref q, ref t);
             }
 
             return table;
         }
 
-        private static void PointPrecomputeVar(ref PointAffine p, PointExtended[] points, int count)
+        private static void PointPrecomputeZ(ref PointAffine p, PointPrecompZ[] points, int count, ref PointTemp t)
         {
             Debug.Assert(count > 0);
 
-            Init(out points[0]);
-            PointCopy(ref p, ref points[0]);
+            PointExtended q; Init(out q);
+            PointCopy(ref p, ref q);
 
             PointExtended d; Init(out d);
-            PointAddVar(false, ref points[0], ref points[0], ref d);
+            PointAdd(ref q, ref q, ref d, ref t);
 
-            for (int i = 1; i < count; ++i)
+            int i = 0;
+            for (;;)
             {
-                Init(out points[i]);
-                PointAddVar(false, ref points[i - 1], ref d, ref points[i]);
+                ref PointPrecompZ r = ref points[i];
+                Init(out r);
+                PointCopy(ref q, ref r);
+
+                if (++i == count)
+                    break;
+
+                PointAdd(ref q, ref d, ref q, ref t);
             }
         }
 
@@ -910,14 +927,6 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             F.One(p.v);
         }
 
-        private static void PointSetNeutral(ref PointExtended p)
-        {
-            F.Zero(p.x);
-            F.One(p.y);
-            F.One(p.z);
-            F.Zero(p.t);
-        }
-
         public static void Precompute()
         {
             lock (PrecompLock)
@@ -930,17 +939,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 int totalPoints = wnafPoints + combPoints;
 
                 PointExtended[] points = new PointExtended[totalPoints];
+                PointTemp t; Init(out t);
 
                 PointAffine b;
                 b.x = B_x;
                 b.y = B_y;
 
-                PointPrecomputeVar(ref b, points, wnafPoints);
+                PointPrecompute(ref b, points, wnafPoints, ref t);
 
                 PointAccum p; Init(out p);
                 F.Copy(B_x, 0, p.x, 0);
                 F.Copy(B_y, 0, p.y, 0);
-                PointExtendXY(ref p);
+                F.One(p.z);
+                F.Copy(B_x, 0, p.u, 0);
+                F.Copy(B_y, 0, p.v, 0);
 
                 int pointsIndex = wnafPoints;
                 PointExtended[] toothPowers = new PointExtended[PrecompTeeth];
@@ -953,37 +965,48 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 {
                     ref PointExtended sum = ref points[pointsIndex++];
                     Init(out sum);
-                    PointSetNeutral(ref sum);
 
                     for (int tooth = 0; tooth < PrecompTeeth; ++tooth)
                     {
-                        PointCopy(ref p, ref u);
-                        PointAddVar(true, ref sum, ref u, ref sum);
+                        if (tooth == 0)
+                        {
+                            PointCopy(ref p, ref sum);
+                        }
+                        else
+                        {
+                            PointCopy(ref p, ref u);
+                            PointAdd(ref sum, ref u, ref sum, ref t);
+                        }
+
                         PointDouble(ref p);
                         PointCopy(ref p, ref toothPowers[tooth]);
 
                         if (block + tooth != PrecompBlocks + PrecompTeeth - 2)
                         {
-                            for (int s = 1; s < PrecompSpacing; ++s)
+                            for (int spacing = 1; spacing < PrecompSpacing; ++spacing)
                             {
                                 PointDouble(ref p);
                             }
                         }
                     }
 
+                    F.Negate(sum.x, sum.x);
+                    F.Negate(sum.t, sum.t);
+
                     for (int tooth = 0; tooth < (PrecompTeeth - 1); ++tooth)
                     {
                         int size = 1 << tooth;
                         for (int j = 0; j < size; ++j, ++pointsIndex)
                         {
                             Init(out points[pointsIndex]);
-                            PointAddVar(false, ref points[pointsIndex - size], ref toothPowers[tooth],
-                                ref points[pointsIndex]);
+                            PointAdd(ref points[pointsIndex - size], ref toothPowers[tooth], ref points[pointsIndex],
+                                ref t);
                         }
                     }
                 }
                 Debug.Assert(pointsIndex == totalPoints);
 
+                // Set each z coordinate to 1/(2.z) to avoid calculating halves of x, y in the following code
                 InvertDoubleZs(points);
 
                 PrecompBaseWnaf = new PointPrecomp[wnafPoints];
@@ -993,39 +1016,47 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                     ref PointPrecomp r = ref PrecompBaseWnaf[i];
                     Init(out r);
 
+                    // Calculate x/2 and y/2 (because the z value holds half the inverse; see above).
                     F.Mul(q.x, q.z, q.x);
                     F.Mul(q.y, q.z, q.y);
 
+                    // y/2 +/- x/2
                     F.Apm(q.y, q.x, r.ypx_h, r.ymx_h);
+
+                    // x/2 * y/2 * (4.d) == x.y.d
                     F.Mul(q.x, q.y, r.xyd);
                     F.Mul(r.xyd, C_d4, r.xyd);
 
-                    F.Normalize(r.ypx_h);
                     F.Normalize(r.ymx_h);
+                    F.Normalize(r.ypx_h);
                     F.Normalize(r.xyd);
                 }
 
                 PrecompBaseComb = F.CreateTable(combPoints * 3);
-                PointPrecomp t; Init(out t);
+                PointPrecomp s; Init(out s);
                 int off = 0;
                 for (int i = wnafPoints; i < totalPoints; ++i)
                 {
                     ref PointExtended q = ref points[i];
 
+                    // Calculate x/2 and y/2 (because the z value holds half the inverse; see above).
                     F.Mul(q.x, q.z, q.x);
                     F.Mul(q.y, q.z, q.y);
 
-                    F.Apm(q.y, q.x, t.ypx_h, t.ymx_h);
-                    F.Mul(q.x, q.y, t.xyd);
-                    F.Mul(t.xyd, C_d4, t.xyd);
+                    // y/2 +/- x/2
+                    F.Apm(q.y, q.x, s.ypx_h, s.ymx_h);
 
-                    F.Normalize(t.ypx_h);
-                    F.Normalize(t.ymx_h);
-                    F.Normalize(t.xyd);
+                    // x/2 * y/2 * (4.d) == x.y.d
+                    F.Mul(q.x, q.y, s.xyd);
+                    F.Mul(s.xyd, C_d4, s.xyd);
 
-                    F.Copy(t.ypx_h, 0, PrecompBaseComb, off);       off += F.Size;
-                    F.Copy(t.ymx_h, 0, PrecompBaseComb, off);       off += F.Size;
-                    F.Copy(t.xyd  , 0, PrecompBaseComb, off);       off += F.Size;
+                    F.Normalize(s.ymx_h);
+                    F.Normalize(s.ypx_h);
+                    F.Normalize(s.xyd);
+
+                    F.Copy(s.ymx_h, 0, PrecompBaseComb, off);       off += F.Size;
+                    F.Copy(s.ypx_h, 0, PrecompBaseComb, off);       off += F.Size;
+                    F.Copy(s.xyd  , 0, PrecompBaseComb, off);       off += F.Size;
                 }
                 Debug.Assert(off == PrecompBaseComb.Length);
             }
@@ -1187,16 +1218,17 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 uint c2 = Nat.ShiftDownBit(ScalarUints, n, 1U);             Debug.Assert(c2 == (1U << 31));
             }
 
-            int[] table = PointPrecompute(ref p, 8);
-            PointExtended q; Init(out q);
+            PointPrecompZ q; Init(out q);
+            PointTemp t; Init(out t);
+            int[] table = PointPrecomputeZ(ref p, 8, ref t);
 
             PointSetNeutral(ref r);
 
             int w = 63;
             for (;;)
             {
-                PointLookup(n, w, table, ref q);
-                PointAdd(ref q, ref r);
+                PointLookupZ(n, w, table, ref q);
+                PointAdd(ref q, ref r, ref t);
 
                 if (--w < 0)
                     break;
@@ -1211,7 +1243,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         private static void ScalarMultBase(byte[] k, ref PointAccum r)
         {
             // Equivalent (but much slower)
-            //PointAffine p; InitAffine(out p);
+            //PointAffine p; Init(out p);
             //F.Copy(B_x, 0, p.x, 0);
             //F.Copy(B_y, 0, p.y, 0);
             //ScalarMult(k, ref p, ref r);
@@ -1226,6 +1258,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                 uint c1 = Nat.CAdd(ScalarUints, ~(int)n[0] & 1, n, L, n);   Debug.Assert(c1 == 0U);
                 uint c2 = Nat.ShiftDownBit(ScalarUints, n, 1U);             Debug.Assert(c2 == (1U << 31));
 
+                /*
+                 * Because we are using 4 teeth and 8 spacing, each limb of n corresponds to one of the 8 blocks.
+                 * Therefore we can efficiently group the bits for each comb position using a (double) shuffle. 
+                 */
                 for (int i = 0; i < ScalarUints; ++i)
                 {
                     n[i] = Interleave.Shuffle2(n[i]);
@@ -1233,8 +1269,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
             }
 
             PointPrecomp p; Init(out p);
+            PointTemp t; Init(out t);
 
             PointSetNeutral(ref r);
+            int resultSign = 0;
 
             int cOff = (PrecompSpacing - 1) * PrecompTeeth;
             for (;;)
@@ -1250,10 +1288,11 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
                     PointLookup(b, abs, ref p);
 
-                    F.CSwap(sign, p.ypx_h, p.ymx_h);
-                    F.CNegate(sign, p.xyd);
+                    F.CNegate(resultSign ^ sign, r.x);
+                    F.CNegate(resultSign ^ sign, r.u);
+                    resultSign = sign;
 
-                    PointAddPrecomp(ref p, ref r);
+                    PointAdd(ref p, ref r, ref t);
                 }
 
                 if ((cOff -= PrecompTeeth) < 0)
@@ -1261,6 +1300,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
                 PointDouble(ref r);
             }
+
+            F.CNegate(resultSign, r.x);
+            F.CNegate(resultSign, r.u);
         }
 
         private static void ScalarMultBaseEncoded(byte[] k, byte[] r, int rOff)
@@ -1288,13 +1330,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
 
         private static void ScalarMultOrderVar(ref PointAffine p, ref PointAccum r)
         {
-            int width = 5;
+            sbyte[] ws_p = GetWnafVar(L, WnafWidth);
 
-            sbyte[] ws_p = GetWnafVar(L, width);
-
-            int count = 1 << (width - 2);
-            PointExtended[] tp = new PointExtended[count];
-            PointPrecomputeVar(ref p, tp, count);
+            int count = 1 << (WnafWidth - 2);
+            PointPrecompZ[] tp = new PointPrecompZ[count];
+            PointTemp t; Init(out t);
+            PointPrecomputeZ(ref p, tp, count, ref t);
 
             PointSetNeutral(ref r);
 
@@ -1306,7 +1347,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                     int sign = wp >> 31;
                     int index = (wp ^ sign) >> 1;
 
-                    PointAddVar(sign != 0, ref tp[index], ref r);
+                    PointAddVar(sign != 0, ref tp[index], ref r, ref t);
                 }
 
                 if (--bit < 0)
@@ -1320,14 +1361,13 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
         {
             Precompute();
 
-            int width = 5;
-
             sbyte[] ws_b = GetWnafVar(nb, WnafWidthBase);
-            sbyte[] ws_p = GetWnafVar(np, width);
+            sbyte[] ws_p = GetWnafVar(np, WnafWidth);
 
-            int count = 1 << (width - 2);
-            PointExtended[] tp = new PointExtended[count];
-            PointPrecomputeVar(ref p, tp, count);
+            int count = 1 << (WnafWidth - 2);
+            PointPrecompZ[] tp = new PointPrecompZ[count];
+            PointTemp t; Init(out t);
+            PointPrecomputeZ(ref p, tp, count, ref t);
 
             PointSetNeutral(ref r);
 
@@ -1339,7 +1379,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                     int sign = wb >> 31;
                     int index = (wb ^ sign) >> 1;
 
-                    PointAddPrecompVar(-sign, ref PrecompBaseWnaf[index], ref r);
+                    PointAddVar(sign != 0, ref PrecompBaseWnaf[index], ref r, ref t);
                 }
 
                 int wp = ws_p[bit];
@@ -1348,7 +1388,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032
                     int sign = wp >> 31;
                     int index = (wp ^ sign) >> 1;
 
-                    PointAddVar(sign != 0, ref tp[index], ref r);
+                    PointAddVar(sign != 0, ref tp[index], ref r, ref t);
                 }
 
                 if (--bit < 0)