diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2022-07-04 16:17:35 +0700 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2022-07-04 16:17:35 +0700 |
commit | 8e29badb3fcfb84fbc271dd1ae7de416c99ca294 (patch) | |
tree | 00652960807924a5781857caf9f7e8434f13900a | |
parent | SIKE implementation with compression added (diff) | |
download | BouncyCastle.NET-ed25519-8e29badb3fcfb84fbc271dd1ae7de416c99ca294.tar.xz |
Ed25519 overhaul
- improved performance - reduced allocation - comments and references
-rw-r--r-- | crypto/src/math/ec/rfc8032/Ed25519.cs | 514 |
1 files changed, 277 insertions, 237 deletions
diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs index 49f7b23a9..11ac809ed 100644 --- a/crypto/src/math/ec/rfc8032/Ed25519.cs +++ b/crypto/src/math/ec/rfc8032/Ed25519.cs @@ -10,6 +10,18 @@ using Org.BouncyCastle.Utilities; namespace Org.BouncyCastle.Math.EC.Rfc8032 { + /// <summary> + /// A low-level implementation of the Ed25519, Ed25519ctx, and Ed25519ph instantiations of the Edwards-Curve Digital + /// Signature Algorithm specified in <a href="https://www.rfc-editor.org/rfc/rfc8032">RFC 8032</a>. + /// </summary> + /// <remarks> + /// The implementation strategy is mostly drawn from <a href="https://ia.cr/2012/309"> + /// Mike Hamburg, "Fast and compact elliptic-curve cryptography"</a>, notably the "signed multi-comb" algorithm (for + /// scalar multiplication by a fixed point), the "half Niels coordinates" (for precomputed points), and the + /// "extensible coordinates" (for accumulators). Standard + /// <a href="http://hyperelliptic.org/EFD/g1p/auto-twisted-extended.html">extended coordinates</a> are used during + /// precomputations, needing only a single extra point addition formula. + /// </remarks> public static class Ed25519 { // -x^2 + y^2 == 1 + 0x52036CEE2B6FFE738CC740797779E89800700A4D4141D8AB75EB4DCA135978A3 * x^2 * y^2 @@ -47,16 +59,18 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static readonly uint[] L = { 0x5CF5D3EDU, 0x5812631AU, 0xA2F79CD6U, 0x14DEF9DEU, 0x00000000U, 0x00000000U, 0x00000000U, 0x10000000U }; - private const int L0 = unchecked((int)0xFCF5D3ED); // L0:26/-- - private const int L1 = 0x012631A6; // L1:24/22 - private const int L2 = 0x079CD658; // L2:27/-- - private const int L3 = unchecked((int)0xFF9DEA2F); // L3:23/-- - private const int L4 = 0x000014DF; // L4:12/11 + private const int L0 = -0x030A2C13; // L0:26/-- + private const int L1 = 0x012631A6; // L1:24/22 + private const int L2 = 0x079CD658; // L2:27/-- + private const int L3 = -0x006215D1; // L3:23/-- + private const int L4 = 0x000014DF; // L4:12/11 private static readonly int[] B_x = { 0x0325D51A, 0x018B5823, 0x007B2C95, 0x0304A92D, 0x00D2598E, 0x01D6DC5C, 0x01388C7F, 0x013FEC0A, 0x029E6B72, 0x0042D26D }; private static readonly int[] B_y = { 0x02666658, 0x01999999, 0x00666666, 0x03333333, 0x00CCCCCC, 0x02666666, 0x01999999, 0x00666666, 0x03333333, 0x00CCCCCC, }; + + // Note that d == -121665/121666 private static readonly int[] C_d = { 0x035978A3, 0x02D37284, 0x018AB75E, 0x026A0A0E, 0x0000E014, 0x0379E898, 0x01D01E5D, 0x01E738CC, 0x03715B7F, 0x00A406D9 }; private static readonly int[] C_d2 = { 0x02B2F159, 0x01A6E509, 0x01156EBD, 0x00D4141D, 0x0001C029, 0x02F3D130, @@ -64,12 +78,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static readonly int[] C_d4 = { 0x0165E2B2, 0x034DCA13, 0x002ADD7A, 0x01A8283B, 0x00038052, 0x01E7A260, 0x03407977, 0x019CE331, 0x01C56DFF, 0x00901B67 }; + private const int WnafWidth = 5; private const int WnafWidthBase = 7; // ScalarMultBase is hard-coded for these values of blocks, teeth, spacing so they can't be freely changed private const int PrecompBlocks = 8; private const int PrecompTeeth = 4; private const int PrecompSpacing = 8; + //private const int PrecompRange = PrecompBlocks * PrecompTeeth * PrecompSpacing; // range == 256 private const int PrecompPoints = 1 << (PrecompTeeth - 1); private const int PrecompMask = PrecompPoints - 1; @@ -94,7 +110,23 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private struct PointPrecomp { - internal int[] ypx_h, ymx_h, xyd; + internal int[] ymx_h; // (y - x)/2 + internal int[] ypx_h; // (y + x)/2 + internal int[] xyd; // x.y.d + } + + private struct PointPrecompZ + { + internal int[] ymx_h; // (y - x)/2 + internal int[] ypx_h; // (y + x)/2 + internal int[] xyd; // x.y.d + internal int[] z; + } + + // Temp space to avoid allocations in point formulae. + private struct PointTemp + { + internal int[] r0, r1; } private static byte[] CalculateS(byte[] r, byte[] k, byte[] s) @@ -515,9 +547,23 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static void Init(out PointPrecomp r) { + r.ymx_h = F.Create(); r.ypx_h = F.Create(); + r.xyd = F.Create(); + } + + private static void Init(out PointPrecompZ r) + { r.ymx_h = F.Create(); + r.ypx_h = F.Create(); r.xyd = F.Create(); + r.z = F.Create(); + } + + private static void Init(out PointTemp r) + { + r.r0 = F.Create(); + r.r1 = F.Create(); } private static void InvertDoubleZs(PointExtended[] points) @@ -564,191 +610,148 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 return F.IsZeroVar(x) && F.AreEqualVar(y, z); } - private static void PointAdd(ref PointExtended p, ref PointAccum r) + private static void PointAdd(ref PointExtended p, ref PointExtended q, ref PointExtended r, ref PointTemp t) { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); - int[] d = F.Create(); - int[] e = r.u; - int[] f = F.Create(); - int[] g = F.Create(); - int[] h = r.v; + // p may ref the same point as r (or q), but q may not ref the same point as r. + Debug.Assert(q.x != r.x & q.y != r.y && q.z != r.z && q.t != r.t); - F.Apm(r.y, r.x, b, a); - F.Apm(p.y, p.x, d, c); + int[] a = r.x; + int[] b = r.y; + int[] c = t.r0; + int[] d = t.r1; + int[] e = a; + int[] f = c; + int[] g = d; + int[] h = b; + + F.Apm(p.y, p.x, b, a); + F.Apm(q.y, q.x, d, c); F.Mul(a, c, a); F.Mul(b, d, b); - F.Mul(r.u, r.v, c); - F.Mul(c, p.t, c); + F.Mul(p.t, q.t, c); F.Mul(c, C_d2, c); - F.Mul(r.z, p.z, d); - F.Add(d, d, d); + F.Add(p.z, p.z, d); + F.Mul(d, q.z, d); F.Apm(b, a, h, e); F.Apm(d, c, g, f); - F.Carry(g); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); + F.Mul(e, h, r.t); F.Mul(f, g, r.z); + F.Mul(e, f, r.x); + F.Mul(h, g, r.y); } - private static void PointAdd(ref PointExtended p, ref PointExtended r) + private static void PointAdd(ref PointPrecomp p, ref PointAccum r, ref PointTemp t) { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); - int[] d = F.Create(); - int[] e = F.Create(); - int[] f = F.Create(); - int[] g = F.Create(); - int[] h = F.Create(); + int[] a = r.x; + int[] b = r.y; + int[] c = t.r0; + int[] e = r.u; + int[] f = a; + int[] g = b; + int[] h = r.v; - F.Apm(p.y, p.x, b, a); - F.Apm(r.y, r.x, d, c); - F.Mul(a, c, a); - F.Mul(b, d, b); - F.Mul(p.t, r.t, c); - F.Mul(c, C_d2, c); - F.Mul(p.z, r.z, d); - F.Add(d, d, d); + F.Apm(r.y, r.x, b, a); + F.Mul(a, p.ymx_h, a); + F.Mul(b, p.ypx_h, b); + F.Mul(r.u, r.v, c); + F.Mul(c, p.xyd, c); F.Apm(b, a, h, e); - F.Apm(d, c, g, f); - F.Carry(g); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); + F.Apm(r.z, c, g, f); F.Mul(f, g, r.z); - F.Mul(e, h, r.t); + F.Mul(f, e, r.x); + F.Mul(g, h, r.y); } - private static void PointAddVar(bool negate, ref PointExtended p, ref PointAccum r) + private static void PointAdd(ref PointPrecompZ p, ref PointAccum r, ref PointTemp t) { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); - int[] d = F.Create(); + int[] a = r.x; + int[] b = r.y; + int[] c = t.r0; + int[] d = r.z; int[] e = r.u; - int[] f = F.Create(); - int[] g = F.Create(); + int[] f = a; + int[] g = b; int[] h = r.v; - int[] nc, nd, nf, ng; - if (negate) - { - nc = d; nd = c; nf = g; ng = f; - } - else - { - nc = c; nd = d; nf = f; ng = g; - } - F.Apm(r.y, r.x, b, a); - F.Apm(p.y, p.x, nd, nc); - F.Mul(a, c, a); - F.Mul(b, d, b); + F.Mul(a, p.ymx_h, a); + F.Mul(b, p.ypx_h, b); F.Mul(r.u, r.v, c); - F.Mul(c, p.t, c); - F.Mul(c, C_d2, c); + F.Mul(c, p.xyd, c); F.Mul(r.z, p.z, d); - F.Add(d, d, d); F.Apm(b, a, h, e); - F.Apm(d, c, ng, nf); - F.Carry(ng); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); + F.Apm(d, c, g, f); F.Mul(f, g, r.z); + F.Mul(f, e, r.x); + F.Mul(g, h, r.y); } - private static void PointAddVar(bool negate, ref PointExtended p, ref PointExtended q, ref PointExtended r) + private static void PointAddVar(bool negate, ref PointPrecomp p, ref PointAccum r, ref PointTemp t) { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); - int[] d = F.Create(); - int[] e = F.Create(); - int[] f = F.Create(); - int[] g = F.Create(); - int[] h = F.Create(); + int[] a = r.x; + int[] b = r.y; + int[] c = t.r0; + int[] e = r.u; + int[] f = a; + int[] g = b; + int[] h = r.v; - int[] nc, nd, nf, ng; + int[] na, nb; if (negate) { - nc = d; nd = c; nf = g; ng = f; + na = b; nb = a; } else { - nc = c; nd = d; nf = f; ng = g; + na = a; nb = b; } - - F.Apm(p.y, p.x, b, a); - F.Apm(q.y, q.x, nd, nc); - F.Mul(a, c, a); - F.Mul(b, d, b); - F.Mul(p.t, q.t, c); - F.Mul(c, C_d2, c); - F.Mul(p.z, q.z, d); - F.Add(d, d, d); - F.Apm(b, a, h, e); - F.Apm(d, c, ng, nf); - F.Carry(ng); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); - F.Mul(f, g, r.z); - F.Mul(e, h, r.t); - } - - private static void PointAddPrecomp(ref PointPrecomp p, ref PointAccum r) - { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); - int[] e = r.u; - int[] f = F.Create(); - int[] g = F.Create(); - int[] h = r.v; + int[] nf = na, ng = nb; F.Apm(r.y, r.x, b, a); - F.Mul(a, p.ymx_h, a); - F.Mul(b, p.ypx_h, b); + F.Mul(na, p.ymx_h, na); + F.Mul(nb, p.ypx_h, nb); F.Mul(r.u, r.v, c); F.Mul(c, p.xyd, c); F.Apm(b, a, h, e); - F.Apm(r.z, c, g, f); - F.Carry(g); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); + F.Apm(r.z, c, ng, nf); F.Mul(f, g, r.z); + F.Mul(f, e, r.x); + F.Mul(g, h, r.y); } - private static void PointAddPrecompVar(int sign, ref PointPrecomp p, ref PointAccum r) + private static void PointAddVar(bool negate, ref PointPrecompZ p, ref PointAccum r, ref PointTemp t) { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); + int[] a = r.x; + int[] b = r.y; + int[] c = t.r0; + int[] d = r.z; int[] e = r.u; - int[] f = F.Create(); - int[] g = F.Create(); + int[] f = a; + int[] g = b; int[] h = r.v; - F.Apm(r.y, r.x, b, a); - if (sign == 0) + int[] na, nb; + if (negate) { - F.Mul(a, p.ymx_h, a); - F.Mul(b, p.ypx_h, b); + na = b; nb = a; } else { - F.Mul(a, p.ypx_h, a); - F.Mul(b, p.ymx_h, b); + na = a; nb = b; } + int[] nf = na, ng = nb; + + F.Apm(r.y, r.x, b, a); + F.Mul(na, p.ymx_h, na); + F.Mul(nb, p.ypx_h, nb); F.Mul(r.u, r.v, c); F.Mul(c, p.xyd, c); - F.CNegate(sign, c); + F.Mul(r.z, p.z, d); F.Apm(b, a, h, e); - F.Apm(r.z, c, g, f); - F.Carry(g); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); + F.Apm(d, c, ng, nf); F.Mul(f, g, r.z); + F.Mul(f, e, r.x); + F.Mul(g, h, r.y); } private static void PointCopy(ref PointAccum p, ref PointExtended r) @@ -763,53 +766,41 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { F.Copy(p.x, 0, r.x, 0); F.Copy(p.y, 0, r.y, 0); - PointExtendXY(ref r); + F.One(r.z); + F.Mul(p.x, p.y, r.t); } - private static void PointCopy(ref PointExtended p, ref PointExtended r) + private static void PointCopy(ref PointExtended p, ref PointPrecompZ r) { - F.Copy(p.x, 0, r.x, 0); - F.Copy(p.y, 0, r.y, 0); - F.Copy(p.z, 0, r.z, 0); - F.Copy(p.t, 0, r.t, 0); + // To avoid halving x and y, we double t and z instead. + F.Apm(p.y, p.x, r.ypx_h, r.ymx_h); + F.Mul(p.t, C_d2, r.xyd); + F.Add(p.z, p.z, r.z); } private static void PointDouble(ref PointAccum r) { - int[] a = F.Create(); - int[] b = F.Create(); - int[] c = F.Create(); + int[] a = r.x; + int[] b = r.y; + int[] c = r.z; int[] e = r.u; - int[] f = F.Create(); - int[] g = F.Create(); + int[] f = a; + int[] g = b; int[] h = r.v; + F.Add(r.x, r.y, e); F.Sqr(r.x, a); F.Sqr(r.y, b); F.Sqr(r.z, c); F.Add(c, c, c); F.Apm(a, b, h, g); - F.Add(r.x, r.y, e); F.Sqr(e, e); F.Sub(h, e, e); F.Add(c, g, f); - F.Carry(f); - F.Mul(e, f, r.x); - F.Mul(g, h, r.y); + F.Carry(f); // Probably unnecessary, but keep until better bounds analysis available F.Mul(f, g, r.z); - } - - private static void PointExtendXY(ref PointAccum p) - { - F.One(p.z); - F.Copy(p.x, 0, p.u, 0); - F.Copy(p.y, 0, p.v, 0); - } - - private static void PointExtendXY(ref PointExtended p) - { - F.One(p.z); - F.Mul(p.x, p.y, p.t); + F.Mul(f, e, r.x); + F.Mul(g, h, r.y); } private static void PointLookup(int block, int index, ref PointPrecomp p) @@ -822,15 +813,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 for (int i = 0; i < PrecompPoints; ++i) { int cond = ((i ^ index) - 1) >> 31; - F.CMov(cond, PrecompBaseComb, off, p.ypx_h, 0); off += F.Size; F.CMov(cond, PrecompBaseComb, off, p.ymx_h, 0); off += F.Size; + F.CMov(cond, PrecompBaseComb, off, p.ypx_h, 0); off += F.Size; F.CMov(cond, PrecompBaseComb, off, p.xyd , 0); off += F.Size; } } - private static void PointLookup(uint[] x, int n, int[] table, ref PointExtended r) + private static void PointLookupZ(uint[] x, int n, int[] table, ref PointPrecompZ r) { - // TODO This method is currently hardcoded to 4-bit windows and 8 precomputed points + // TODO This method is currently hard-coded to 4-bit windows and 8 precomputed points uint w = GetWindow4(x, n); @@ -843,17 +834,34 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 for (int i = 0, off = 0; i < 8; ++i) { int cond = ((i ^ abs) - 1) >> 31; - F.CMov(cond, table, off, r.x, 0); off += F.Size; - F.CMov(cond, table, off, r.y, 0); off += F.Size; - F.CMov(cond, table, off, r.z, 0); off += F.Size; - F.CMov(cond, table, off, r.t, 0); off += F.Size; + F.CMov(cond, table, off, r.ymx_h, 0); off += F.Size; + F.CMov(cond, table, off, r.ypx_h, 0); off += F.Size; + F.CMov(cond, table, off, r.xyd , 0); off += F.Size; + F.CMov(cond, table, off, r.z , 0); off += F.Size; } - F.CNegate(sign, r.x); - F.CNegate(sign, r.t); + F.CSwap(sign, r.ymx_h, r.ypx_h); + F.CNegate(sign, r.xyd); + } + + private static void PointPrecompute(ref PointAffine p, PointExtended[] points, int count, ref PointTemp t) + { + Debug.Assert(count > 0); + + Init(out points[0]); + PointCopy(ref p, ref points[0]); + + PointExtended d; Init(out d); + PointAdd(ref points[0], ref points[0], ref d, ref t); + + for (int i = 1; i < count; ++i) + { + Init(out points[i]); + PointAdd(ref points[i - 1], ref d, ref points[i], ref t); + } } - private static int[] PointPrecompute(ref PointAffine p, int count) + private static int[] PointPrecomputeZ(ref PointAffine p, int count, ref PointTemp t) { Debug.Assert(count > 0); @@ -861,43 +869,52 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointCopy(ref p, ref q); PointExtended d; Init(out d); - PointCopy(ref q, ref d); - PointAdd(ref q, ref d); + PointAdd(ref q, ref q, ref d, ref t); + PointPrecompZ r; Init(out r); int[] table = F.CreateTable(count * 4); int off = 0; int i = 0; for (;;) { - F.Copy(q.x, 0, table, off); off += F.Size; - F.Copy(q.y, 0, table, off); off += F.Size; - F.Copy(q.z, 0, table, off); off += F.Size; - F.Copy(q.t, 0, table, off); off += F.Size; + PointCopy(ref q, ref r); + + F.Copy(r.ymx_h, 0, table, off); off += F.Size; + F.Copy(r.ypx_h, 0, table, off); off += F.Size; + F.Copy(r.xyd , 0, table, off); off += F.Size; + F.Copy(r.z , 0, table, off); off += F.Size; if (++i == count) break; - PointAdd(ref d, ref q); + PointAdd(ref q, ref d, ref q, ref t); } return table; } - private static void PointPrecomputeVar(ref PointAffine p, PointExtended[] points, int count) + private static void PointPrecomputeZ(ref PointAffine p, PointPrecompZ[] points, int count, ref PointTemp t) { Debug.Assert(count > 0); - Init(out points[0]); - PointCopy(ref p, ref points[0]); + PointExtended q; Init(out q); + PointCopy(ref p, ref q); PointExtended d; Init(out d); - PointAddVar(false, ref points[0], ref points[0], ref d); + PointAdd(ref q, ref q, ref d, ref t); - for (int i = 1; i < count; ++i) + int i = 0; + for (;;) { - Init(out points[i]); - PointAddVar(false, ref points[i - 1], ref d, ref points[i]); + ref PointPrecompZ r = ref points[i]; + Init(out r); + PointCopy(ref q, ref r); + + if (++i == count) + break; + + PointAdd(ref q, ref d, ref q, ref t); } } @@ -910,14 +927,6 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.One(p.v); } - private static void PointSetNeutral(ref PointExtended p) - { - F.Zero(p.x); - F.One(p.y); - F.One(p.z); - F.Zero(p.t); - } - public static void Precompute() { lock (PrecompLock) @@ -930,17 +939,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 int totalPoints = wnafPoints + combPoints; PointExtended[] points = new PointExtended[totalPoints]; + PointTemp t; Init(out t); PointAffine b; b.x = B_x; b.y = B_y; - PointPrecomputeVar(ref b, points, wnafPoints); + PointPrecompute(ref b, points, wnafPoints, ref t); PointAccum p; Init(out p); F.Copy(B_x, 0, p.x, 0); F.Copy(B_y, 0, p.y, 0); - PointExtendXY(ref p); + F.One(p.z); + F.Copy(B_x, 0, p.u, 0); + F.Copy(B_y, 0, p.v, 0); int pointsIndex = wnafPoints; PointExtended[] toothPowers = new PointExtended[PrecompTeeth]; @@ -953,37 +965,48 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { ref PointExtended sum = ref points[pointsIndex++]; Init(out sum); - PointSetNeutral(ref sum); for (int tooth = 0; tooth < PrecompTeeth; ++tooth) { - PointCopy(ref p, ref u); - PointAddVar(true, ref sum, ref u, ref sum); + if (tooth == 0) + { + PointCopy(ref p, ref sum); + } + else + { + PointCopy(ref p, ref u); + PointAdd(ref sum, ref u, ref sum, ref t); + } + PointDouble(ref p); PointCopy(ref p, ref toothPowers[tooth]); if (block + tooth != PrecompBlocks + PrecompTeeth - 2) { - for (int s = 1; s < PrecompSpacing; ++s) + for (int spacing = 1; spacing < PrecompSpacing; ++spacing) { PointDouble(ref p); } } } + F.Negate(sum.x, sum.x); + F.Negate(sum.t, sum.t); + for (int tooth = 0; tooth < (PrecompTeeth - 1); ++tooth) { int size = 1 << tooth; for (int j = 0; j < size; ++j, ++pointsIndex) { Init(out points[pointsIndex]); - PointAddVar(false, ref points[pointsIndex - size], ref toothPowers[tooth], - ref points[pointsIndex]); + PointAdd(ref points[pointsIndex - size], ref toothPowers[tooth], ref points[pointsIndex], + ref t); } } } Debug.Assert(pointsIndex == totalPoints); + // Set each z coordinate to 1/(2.z) to avoid calculating halves of x, y in the following code InvertDoubleZs(points); PrecompBaseWnaf = new PointPrecomp[wnafPoints]; @@ -993,39 +1016,47 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 ref PointPrecomp r = ref PrecompBaseWnaf[i]; Init(out r); + // Calculate x/2 and y/2 (because the z value holds half the inverse; see above). F.Mul(q.x, q.z, q.x); F.Mul(q.y, q.z, q.y); + // y/2 +/- x/2 F.Apm(q.y, q.x, r.ypx_h, r.ymx_h); + + // x/2 * y/2 * (4.d) == x.y.d F.Mul(q.x, q.y, r.xyd); F.Mul(r.xyd, C_d4, r.xyd); - F.Normalize(r.ypx_h); F.Normalize(r.ymx_h); + F.Normalize(r.ypx_h); F.Normalize(r.xyd); } PrecompBaseComb = F.CreateTable(combPoints * 3); - PointPrecomp t; Init(out t); + PointPrecomp s; Init(out s); int off = 0; for (int i = wnafPoints; i < totalPoints; ++i) { ref PointExtended q = ref points[i]; + // Calculate x/2 and y/2 (because the z value holds half the inverse; see above). F.Mul(q.x, q.z, q.x); F.Mul(q.y, q.z, q.y); - F.Apm(q.y, q.x, t.ypx_h, t.ymx_h); - F.Mul(q.x, q.y, t.xyd); - F.Mul(t.xyd, C_d4, t.xyd); + // y/2 +/- x/2 + F.Apm(q.y, q.x, s.ypx_h, s.ymx_h); - F.Normalize(t.ypx_h); - F.Normalize(t.ymx_h); - F.Normalize(t.xyd); + // x/2 * y/2 * (4.d) == x.y.d + F.Mul(q.x, q.y, s.xyd); + F.Mul(s.xyd, C_d4, s.xyd); - F.Copy(t.ypx_h, 0, PrecompBaseComb, off); off += F.Size; - F.Copy(t.ymx_h, 0, PrecompBaseComb, off); off += F.Size; - F.Copy(t.xyd , 0, PrecompBaseComb, off); off += F.Size; + F.Normalize(s.ymx_h); + F.Normalize(s.ypx_h); + F.Normalize(s.xyd); + + F.Copy(s.ymx_h, 0, PrecompBaseComb, off); off += F.Size; + F.Copy(s.ypx_h, 0, PrecompBaseComb, off); off += F.Size; + F.Copy(s.xyd , 0, PrecompBaseComb, off); off += F.Size; } Debug.Assert(off == PrecompBaseComb.Length); } @@ -1187,16 +1218,17 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 uint c2 = Nat.ShiftDownBit(ScalarUints, n, 1U); Debug.Assert(c2 == (1U << 31)); } - int[] table = PointPrecompute(ref p, 8); - PointExtended q; Init(out q); + PointPrecompZ q; Init(out q); + PointTemp t; Init(out t); + int[] table = PointPrecomputeZ(ref p, 8, ref t); PointSetNeutral(ref r); int w = 63; for (;;) { - PointLookup(n, w, table, ref q); - PointAdd(ref q, ref r); + PointLookupZ(n, w, table, ref q); + PointAdd(ref q, ref r, ref t); if (--w < 0) break; @@ -1211,7 +1243,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static void ScalarMultBase(byte[] k, ref PointAccum r) { // Equivalent (but much slower) - //PointAffine p; InitAffine(out p); + //PointAffine p; Init(out p); //F.Copy(B_x, 0, p.x, 0); //F.Copy(B_y, 0, p.y, 0); //ScalarMult(k, ref p, ref r); @@ -1226,6 +1258,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 uint c1 = Nat.CAdd(ScalarUints, ~(int)n[0] & 1, n, L, n); Debug.Assert(c1 == 0U); uint c2 = Nat.ShiftDownBit(ScalarUints, n, 1U); Debug.Assert(c2 == (1U << 31)); + /* + * Because we are using 4 teeth and 8 spacing, each limb of n corresponds to one of the 8 blocks. + * Therefore we can efficiently group the bits for each comb position using a (double) shuffle. + */ for (int i = 0; i < ScalarUints; ++i) { n[i] = Interleave.Shuffle2(n[i]); @@ -1233,8 +1269,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } PointPrecomp p; Init(out p); + PointTemp t; Init(out t); PointSetNeutral(ref r); + int resultSign = 0; int cOff = (PrecompSpacing - 1) * PrecompTeeth; for (;;) @@ -1250,10 +1288,11 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointLookup(b, abs, ref p); - F.CSwap(sign, p.ypx_h, p.ymx_h); - F.CNegate(sign, p.xyd); + F.CNegate(resultSign ^ sign, r.x); + F.CNegate(resultSign ^ sign, r.u); + resultSign = sign; - PointAddPrecomp(ref p, ref r); + PointAdd(ref p, ref r, ref t); } if ((cOff -= PrecompTeeth) < 0) @@ -1261,6 +1300,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointDouble(ref r); } + + F.CNegate(resultSign, r.x); + F.CNegate(resultSign, r.u); } private static void ScalarMultBaseEncoded(byte[] k, byte[] r, int rOff) @@ -1288,13 +1330,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static void ScalarMultOrderVar(ref PointAffine p, ref PointAccum r) { - int width = 5; + sbyte[] ws_p = GetWnafVar(L, WnafWidth); - sbyte[] ws_p = GetWnafVar(L, width); - - int count = 1 << (width - 2); - PointExtended[] tp = new PointExtended[count]; - PointPrecomputeVar(ref p, tp, count); + int count = 1 << (WnafWidth - 2); + PointPrecompZ[] tp = new PointPrecompZ[count]; + PointTemp t; Init(out t); + PointPrecomputeZ(ref p, tp, count, ref t); PointSetNeutral(ref r); @@ -1306,7 +1347,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 int sign = wp >> 31; int index = (wp ^ sign) >> 1; - PointAddVar(sign != 0, ref tp[index], ref r); + PointAddVar(sign != 0, ref tp[index], ref r, ref t); } if (--bit < 0) @@ -1320,14 +1361,13 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { Precompute(); - int width = 5; - sbyte[] ws_b = GetWnafVar(nb, WnafWidthBase); - sbyte[] ws_p = GetWnafVar(np, width); + sbyte[] ws_p = GetWnafVar(np, WnafWidth); - int count = 1 << (width - 2); - PointExtended[] tp = new PointExtended[count]; - PointPrecomputeVar(ref p, tp, count); + int count = 1 << (WnafWidth - 2); + PointPrecompZ[] tp = new PointPrecompZ[count]; + PointTemp t; Init(out t); + PointPrecomputeZ(ref p, tp, count, ref t); PointSetNeutral(ref r); @@ -1339,7 +1379,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 int sign = wb >> 31; int index = (wb ^ sign) >> 1; - PointAddPrecompVar(-sign, ref PrecompBaseWnaf[index], ref r); + PointAddVar(sign != 0, ref PrecompBaseWnaf[index], ref r, ref t); } int wp = ws_p[bit]; @@ -1348,7 +1388,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 int sign = wp >> 31; int index = (wp ^ sign) >> 1; - PointAddVar(sign != 0, ref tp[index], ref r); + PointAddVar(sign != 0, ref tp[index], ref r, ref t); } if (--bit < 0) |