diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2022-11-25 19:12:01 +0700 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2022-11-25 19:12:01 +0700 |
commit | df0e0d95e952954d9c9b8588372b1f194bf329ce (patch) | |
tree | ac34a878cc1c31e9744b0d2aff468ab4d4442d56 | |
parent | Ed25519: cofactored verification (diff) | |
download | BouncyCastle.NET-ed25519-df0e0d95e952954d9c9b8588372b1f194bf329ce.tar.xz |
Ed448: cofactored verification
- Perf. opts.: Pornin's basis reduction - factor out Scalar448 class - factor out ScalarUtilities class
-rw-r--r-- | crypto/src/math/ec/rfc7748/X448Field.cs | 24 | ||||
-rw-r--r-- | crypto/src/math/ec/rfc8032/Ed25519.cs | 34 | ||||
-rw-r--r-- | crypto/src/math/ec/rfc8032/Ed448.cs | 1021 | ||||
-rw-r--r-- | crypto/src/math/ec/rfc8032/Scalar25519.cs | 338 | ||||
-rw-r--r-- | crypto/src/math/ec/rfc8032/Scalar448.cs | 819 | ||||
-rw-r--r-- | crypto/src/math/ec/rfc8032/ScalarUtilities.cs | 294 |
6 files changed, 1387 insertions, 1143 deletions
diff --git a/crypto/src/math/ec/rfc7748/X448Field.cs b/crypto/src/math/ec/rfc7748/X448Field.cs index 1df837d3a..7169bd6d8 100644 --- a/crypto/src/math/ec/rfc7748/X448Field.cs +++ b/crypto/src/math/ec/rfc7748/X448Field.cs @@ -241,6 +241,18 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 } #endif + public static void Decode(byte[] x, uint[] z) + { + Decode56(x, 0, z, 0); + Decode56(x, 7, z, 2); + Decode56(x, 14, z, 4); + Decode56(x, 21, z, 6); + Decode56(x, 28, z, 8); + Decode56(x, 35, z, 10); + Decode56(x, 42, z, 12); + Decode56(x, 49, z, 14); + } + public static void Decode(byte[] x, int xOff, uint[] z) { Decode56(x, xOff, z, 0); @@ -369,6 +381,18 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 } #endif + public static void Encode(uint[] x, byte[] z) + { + Encode56(x, 0, z, 0); + Encode56(x, 2, z, 7); + Encode56(x, 4, z, 14); + Encode56(x, 6, z, 21); + Encode56(x, 8, z, 28); + Encode56(x, 10, z, 35); + Encode56(x, 12, z, 42); + Encode56(x, 14, z, 49); + } + public static void Encode(uint[] x, byte[] z, int zOff) { Encode56(x, 0, z, zOff); diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs index 0c95fade3..9f9daf39c 100644 --- a/crypto/src/math/ec/rfc8032/Ed25519.cs +++ b/crypto/src/math/ec/rfc8032/Ed25519.cs @@ -77,14 +77,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static readonly int[] C_d4 = { 0x0165E2B2, 0x034DCA13, 0x002ADD7A, 0x01A8283B, 0x00038052, 0x01E7A260, 0x03407977, 0x019CE331, 0x01C56DFF, 0x00901B67 }; - private const int WnafWidth = 5; - private const int WnafWidthBase = 7; + //private const int WnafWidth = 5; + private const int WnafWidth128 = 4; + private const int WnafWidthBase = 6; // ScalarMultBase is hard-coded for these values of blocks, teeth, spacing so they can't be freely changed private const int PrecompBlocks = 8; private const int PrecompTeeth = 4; private const int PrecompSpacing = 8; - //private const int PrecompRange = PrecompBlocks * PrecompTeeth * PrecompSpacing; // range == 256 + private const int PrecompRange = PrecompBlocks * PrecompTeeth * PrecompSpacing; // range == 256 private const int PrecompPoints = 1 << (PrecompTeeth - 1); private const int PrecompMask = PrecompPoints - 1; @@ -614,8 +615,6 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Span<uint> v0 = stackalloc uint[4]; Span<uint> v1 = stackalloc uint[4]; - Scalar25519.ReduceBasisVar(nA, v0, v1); - Scalar25519.Multiply128Var(nS, v1, nS); #else byte[] R = Copy(sig, sigOff, PointBytes); byte[] S = Copy(sig, sigOff + PointBytes, ScalarBytes); @@ -658,9 +657,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 uint[] v0 = new uint[4]; uint[] v1 = new uint[4]; +#endif + Scalar25519.ReduceBasisVar(nA, v0, v1); Scalar25519.Multiply128Var(nS, v1, nS); -#endif Init(out PointAccum pZ); ScalarMultStraus128Var(nS, v0, ref pA, v1, ref pR, ref pZ); @@ -1302,7 +1302,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif Scalar25519.Decode(k, n); - Scalar25519.ToSignedDigits(n, n); + Scalar25519.ToSignedDigits(256, n, n); Init(out PointPrecompZ q); Init(out PointTemp t); @@ -1327,7 +1327,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static void ScalarMultBase(ReadOnlySpan<byte> k, ref PointAccum r) + private static void ScalarMultBase(ReadOnlySpan<byte> k, ref PointAccum r) #else private static void ScalarMultBase(byte[] k, ref PointAccum r) #endif @@ -1347,7 +1347,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif Scalar25519.Decode(k, n); - Scalar25519.ToSignedDigits(n, n); + Scalar25519.ToSignedDigits(PrecompRange, n, n); GroupCombBits(n); Init(out PointPrecomp p); @@ -1452,9 +1452,11 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #else sbyte[] ws_p = new sbyte[253]; #endif - Scalar25519.GetOrderWnafVar(WnafWidth, ws_p); - int count = 1 << (WnafWidth - 2); + // NOTE: WnafWidth128 because of the special structure of the order + Scalar25519.GetOrderWnafVar(WnafWidth128, ws_p); + + int count = 1 << (WnafWidth128 - 2); PointPrecompZ[] tp = new PointPrecompZ[count]; Init(out PointTemp t); PointPrecomputeZ(ref p, tp, count, ref t); @@ -1486,6 +1488,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif { Debug.Assert(nb.Length == ScalarUints); + Debug.Assert(nb[ScalarUints - 1] >> 29 == 0U); Debug.Assert(np.Length == 4); Debug.Assert(nq.Length == 4); @@ -1502,10 +1505,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif Wnaf.GetSignedVar(nb, WnafWidthBase, ws_b); - Wnaf.GetSignedVar(np, WnafWidth - 1, ws_p); - Wnaf.GetSignedVar(nq, WnafWidth - 1, ws_q); + Wnaf.GetSignedVar(np, WnafWidth128, ws_p); + Wnaf.GetSignedVar(nq, WnafWidth128, ws_q); - int count = 1 << (WnafWidth - 3); + int count = 1 << (WnafWidth128 - 2); PointPrecompZ[] tp = new PointPrecompZ[count]; PointPrecompZ[] tq = new PointPrecompZ[count]; Init(out PointTemp t); @@ -1514,7 +1517,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointSetNeutral(ref r); - for (int bit = 127; bit >= 0; --bit) + int bit = 128; + while (--bit >= 0) { int wb = ws_b[bit]; if (wb != 0) diff --git a/crypto/src/math/ec/rfc8032/Ed448.cs b/crypto/src/math/ec/rfc8032/Ed448.cs index cc189615b..7b774896b 100644 --- a/crypto/src/math/ec/rfc8032/Ed448.cs +++ b/crypto/src/math/ec/rfc8032/Ed448.cs @@ -31,9 +31,6 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Ed448ph = 1, } - private const ulong M26UL = 0x03FFFFFFUL; - private const ulong M28UL = 0x0FFFFFFFUL; - private const int CoordUints = 14; private const int PointBytes = CoordUints * 4 + 1; private const int ScalarUints = 14; @@ -47,28 +44,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 // "SigEd448" private static readonly byte[] Dom4Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 }; - private static readonly uint[] P = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, - 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU }; - private static readonly uint[] L = { 0xAB5844F3U, 0x2378C292U, 0x8DC58F55U, 0x216CC272U, 0xAED63690U, 0xC44EDB49U, 0x7CCA23E9U, - 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x3FFFFFFFU }; - - private const int L_0 = 0x04A7BB0D; // L_0:26/24 - private const int L_1 = 0x0873D6D5; // L_1:27/23 - private const int L_2 = 0x0A70AADC; // L_2:27/26 - private const int L_3 = 0x03D8D723; // L_3:26/-- - private const int L_4 = 0x096FDE93; // L_4:27/25 - private const int L_5 = 0x0B65129C; // L_5:27/26 - private const int L_6 = 0x063BB124; // L_6:27/-- - private const int L_7 = 0x08335DC1; // L_7:27/22 - - private const int L4_0 = 0x029EEC34; // L4_0:25/24 - private const int L4_1 = 0x01CF5B55; // L4_1:25/-- - private const int L4_2 = 0x09C2AB72; // L4_2:27/25 - private const int L4_3 = 0x0F635C8E; // L4_3:28/-- - private const int L4_4 = 0x05BF7A4C; // L4_4:26/25 - private const int L4_5 = 0x0D944A72; // L4_5:28/-- - private const int L4_6 = 0x08EEC492; // L4_6:27/24 - private const int L4_7 = 0x20CD7705; // L4_7:29/24 + private static readonly uint[] P = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU }; private static readonly uint[] B_x = { 0x070CC05EU, 0x026A82BCU, 0x00938E26U, 0x080E18B0U, 0x0511433BU, 0x0F72AB66U, 0x0412AE1AU, 0x0A3D3A46U, 0x0A6DE324U, 0x00F1767EU, 0x04657047U, 0x036DA9E1U, 0x05A622BFU, @@ -77,17 +55,18 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 0x01CE67C3U, 0x073AD3FFU, 0x005A0C2DU, 0x07789C1EU, 0x0A398408U, 0x0A73736CU, 0x0C7624BEU, 0x003756C9U, 0x02488762U, 0x016EB6BCU, 0x0693F467U }; - // 2^224 * B - private static readonly uint[] B224_x = { 0x091780C7U, 0x0A7EA989U, 0x0D2476B6U, 0x004E4ECCU, 0x0C494B68U, - 0x00AF9F58U, 0x0DEE64FDU, 0x0E0F269FU, 0x0021BD26U, 0x085A61F6U, 0x0B5D284BU, 0x0C265C35U, 0x03775AFDU, - 0x058755EAU, 0x02ECF2C6U, 0x0617F174U }; - private static readonly uint[] B224_y = { 0x05EC556AU, 0x050109E2U, 0x0FD57E39U, 0x0235366BU, 0x044B6B2EU, - 0x07B3C976U, 0x0B2B7B9CU, 0x0F7F9E82U, 0x00EC6409U, 0x0B6196ABU, 0x00A20D9EU, 0x088F1D16U, 0x0586F761U, - 0x0e3BE3B4U, 0x0E26395DU, 0x09983C26U }; + // 2^225 * B + private static readonly uint[] B225_x = { 0x06909ee2U, 0x01d7605cU, 0x0995ec8aU, 0x0fc4d970U, 0x0cf2b361U, + 0x02d82e9dU, 0x01225f55U, 0x007f0ef6U, 0x0aee9c55U, 0x0a240c13U, 0x05627b54U, 0x0d449d1eU, 0x03a44575U, + 0x007164a7U, 0x0bd4bd71U, 0x061a15fdU }; + private static readonly uint[] B225_y = { 0x0d3a9fe4U, 0x030696b9U, 0x07e7e326U, 0x068308c7U, 0x0ce0b8c8U, + 0x03ac222bU, 0x0304db8eU, 0x083ee319U, 0x05e5db0bU, 0x0eca503bU, 0x0b1c6539U, 0x078a8dceU, 0x02d256bcU, + 0x04a8b05eU, 0x0bd9fd57U, 0x0a1c3cb8U }; private const int C_d = -39081; - private const int WnafWidth = 5; + //private const int WnafWidth = 6; + private const int WnafWidth225 = 5; private const int WnafWidthBase = 7; // ScalarMultBase supports varying blocks, teeth, spacing so long as their product is in range [449, 479] @@ -100,7 +79,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static readonly object PrecompLock = new object(); private static PointAffine[] PrecompBaseWnaf = null; - private static PointAffine[] PrecompBase224Wnaf = null; + private static PointAffine[] PrecompBase225Wnaf = null; private static uint[] PrecompBaseComb = null; private struct PointAffine @@ -115,9 +94,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 private static byte[] CalculateS(byte[] r, byte[] k, byte[] s) { - uint[] t = new uint[ScalarUints * 2]; DecodeScalar(r, 0, t); - uint[] u = new uint[ScalarUints]; DecodeScalar(k, 0, u); - uint[] v = new uint[ScalarUints]; DecodeScalar(s, 0, v); + uint[] t = new uint[ScalarUints * 2]; Scalar448.Decode(r, t); + uint[] u = new uint[ScalarUints]; Scalar448.Decode(k, u); + uint[] v = new uint[ScalarUints]; Scalar448.Decode(s, v); Nat.MulAddTo(ScalarUints, u, v, t); @@ -126,7 +105,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { Codec.Encode32(t[i], result, i * 4); } - return ReduceScalar(result); + return Scalar448.Reduce(result); } private static bool CheckContextVar(byte[] ctx) @@ -208,20 +187,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } #endif - - private static bool CheckPointFullVar(byte[] p) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + private static bool CheckPointFullVar(ReadOnlySpan<byte> p) { if ((p[PointBytes - 1] & 0x7F) != 0x00) return false; - uint y13 = Codec.Decode32(p, 52); + uint y13 = Codec.Decode32(p[52..]); uint t0 = y13; uint t1 = y13 ^ P[13]; for (int i = CoordUints - 2; i > 0; --i) { - uint yi = Codec.Decode32(p, i * 4); + uint yi = Codec.Decode32(p[(i * 4)..]); // Reject non-canonical encodings (i.e. >= P) if (t1 == 0 && yi > P[i]) @@ -231,7 +210,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 t1 |= yi ^ P[i]; } - uint y0 = Codec.Decode32(p, 0); + uint y0 = Codec.Decode32(p); // Reject 0 and 1 if (t0 == 0 && y0 <= 1U) @@ -243,24 +222,40 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 return true; } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static bool CheckScalarVar(ReadOnlySpan<byte> s, Span<uint> n) +#else + private static bool CheckPointFullVar(byte[] p) { - if (s[ScalarBytes - 1] != 0x00) + if ((p[PointBytes - 1] & 0x7F) != 0x00) return false; - DecodeScalar(s, n); - return !Nat.Gte(ScalarUints, n, L); - } -#else - private static bool CheckScalarVar(byte[] s, uint[] n) - { - if (s[ScalarBytes - 1] != 0x00) + uint y13 = Codec.Decode32(p, 52); + + uint t0 = y13; + uint t1 = y13 ^ P[13]; + + for (int i = CoordUints - 2; i > 0; --i) + { + uint yi = Codec.Decode32(p, i * 4); + + // Reject non-canonical encodings (i.e. >= P) + if (t1 == 0 && yi > P[i]) + return false; + + t0 |= yi; + t1 |= yi ^ P[i]; + } + + uint y0 = Codec.Decode32(p, 0); + + // Reject 0 and 1 + if (t0 == 0 && y0 <= 1U) return false; - DecodeScalar(s, 0, n); - return !Nat.Gte(ScalarUints, n, L); + // Reject P - 1 and non-canonical encodings (i.e. >= P) + if (t1 == 0 && y0 >= (P[0] - 1U)) + return false; + + return true; } #endif @@ -281,16 +276,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 return new ShakeDigest(256); } - private static bool DecodePointVar(byte[] p, int pOff, bool negate, ref PointProjective r) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + private static bool DecodePointVar(ReadOnlySpan<byte> p, bool negate, ref PointProjective r) +#else + private static bool DecodePointVar(byte[] p, bool negate, ref PointProjective r) +#endif { - byte[] py = Copy(p, pOff, PointBytes); - if (!CheckPointFullVar(py)) - return false; - - int x_0 = (py[PointBytes - 1] & 0x80) >> 7; - py[PointBytes - 1] &= 0x7F; + int x_0 = (p[PointBytes - 1] & 0x80) >> 7; - F.Decode(py, 0, r.y); + F.Decode(p, r.y); uint[] u = F.Create(); uint[] v = F.Create(); @@ -317,22 +311,6 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 return true; } - private static void DecodeScalar(byte[] k, int kOff, uint[] n) - { - Debug.Assert(k[kOff + ScalarBytes - 1] == 0x00); - - Codec.Decode32(k, kOff, n, 0, ScalarUints); - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static void DecodeScalar(ReadOnlySpan<byte> k, Span<uint> n) - { - Debug.Assert(k[ScalarBytes - 1] == 0x00); - - Codec.Decode32(k, n[..ScalarUints]); - } -#endif - private static void Dom4(IXof d, byte phflag, byte[] ctx) { int n = Dom4Prefix.Length; @@ -466,7 +444,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] r = ReduceScalar(h); + byte[] r = Scalar448.Reduce(h); byte[] R = new byte[PointBytes]; ScalarMultBaseEncoded(r, R, 0); @@ -476,7 +454,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] k = ReduceScalar(h); + byte[] k = Scalar448.Reduce(h); byte[] S = CalculateS(r, k, s); Array.Copy(R, 0, sig, sigOff, PointBytes); @@ -529,21 +507,30 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 throw new ArgumentException("ctx"); #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - Span<byte> RS = stackalloc byte[PointBytes + ScalarBytes]; - RS.CopyFrom(sig.AsSpan(sigOff, PointBytes + ScalarBytes)); + Span<byte> signature = stackalloc byte[SignatureSize]; + signature.CopyFrom(sig.AsSpan(sigOff, SignatureSize)); + var R = signature[..PointBytes]; + var S = signature[PointBytes..]; - var R = RS[..PointBytes]; - var S = RS[PointBytes..]; + Span<byte> A = stackalloc byte[PublicKeySize]; + A.CopyFrom(pk.AsSpan(pkOff)); if (!CheckPointVar(R)) return false; Span<uint> nS = stackalloc uint[ScalarUints]; - if (!CheckScalarVar(S, nS)) + if (!Scalar448.CheckVar(S, nS)) + return false; + + if (!CheckPointFullVar(A)) + return false; + + Init(out PointProjective pR); + if (!DecodePointVar(R, true, ref pR)) return false; Init(out PointProjective pA); - if (!DecodePointVar(pk, pkOff, true, ref pA)) + if (!DecodePointVar(A, true, ref pA)) return false; IXof d = CreateXof(); @@ -551,34 +538,39 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Dom4(d, phflag, ctx); d.BlockUpdate(R); - d.BlockUpdate(pk.AsSpan(pkOff, PointBytes)); + d.BlockUpdate(A); d.BlockUpdate(m.AsSpan(mOff, mLen)); d.OutputFinal(h); Span<byte> k = stackalloc byte[ScalarBytes]; - ReduceScalar(h, k); + Scalar448.Reduce(h, k); Span<uint> nA = stackalloc uint[ScalarUints]; - DecodeScalar(k, nA); - - Init(out PointProjective pR); - ScalarMultStrausVar(nS, nA, ref pA, ref pR); + Scalar448.Decode(k, nA); - Span<byte> check = stackalloc byte[PointBytes]; - return 0 != EncodePoint(ref pR, check) && check.SequenceEqual(R); + Span<uint> v0 = stackalloc uint[8]; + Span<uint> v1 = stackalloc uint[8]; #else byte[] R = Copy(sig, sigOff, PointBytes); byte[] S = Copy(sig, sigOff + PointBytes, ScalarBytes); + byte[] A = Copy(pk, pkOff, PublicKeySize); if (!CheckPointVar(R)) return false; uint[] nS = new uint[ScalarUints]; - if (!CheckScalarVar(S, nS)) + if (!Scalar448.CheckVar(S, nS)) + return false; + + if (!CheckPointFullVar(A)) + return false; + + Init(out PointProjective pR); + if (!DecodePointVar(R, true, ref pR)) return false; Init(out PointProjective pA); - if (!DecodePointVar(pk, pkOff, true, ref pA)) + if (!DecodePointVar(A, true, ref pA)) return false; IXof d = CreateXof(); @@ -586,21 +578,30 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Dom4(d, phflag, ctx); d.BlockUpdate(R, 0, PointBytes); - d.BlockUpdate(pk, pkOff, PointBytes); + d.BlockUpdate(A, 0, PointBytes); d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] k = ReduceScalar(h); + byte[] k = Scalar448.Reduce(h); uint[] nA = new uint[ScalarUints]; - DecodeScalar(k, 0, nA); + Scalar448.Decode(k, nA); - Init(out PointProjective pR); - ScalarMultStrausVar(nS, nA, ref pA, ref pR); - - byte[] check = new byte[PointBytes]; - return 0 != EncodePoint(ref pR, check, 0) && Arrays.AreEqual(check, R); + uint[] v0 = new uint[8]; + uint[] v1 = new uint[8]; #endif + + Scalar448.ReduceBasisVar(nA, v0, v1); + Scalar448.Multiply225Var(nS, v1, nS); + + Init(out PointProjective pZ); + ScalarMultStraus225Var(nS, v0, ref pA, v1, ref pR, ref pZ); + + F.Normalize(pZ.x); + F.Normalize(pZ.y); + F.Normalize(pZ.z); + + return IsNeutralElementVar(pZ.x, pZ.y, pZ.z); } private static void Init(out PointAffine r) @@ -996,12 +997,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointPrecomputeVar(ref p, points, 0, wnafPoints); - Init(out PointProjective p224); - F.Copy(B224_x, 0, p224.x, 0); - F.Copy(B224_y, 0, p224.y, 0); - F.One(p224.z); + Init(out PointProjective p225); + F.Copy(B225_x, 0, p225.x, 0); + F.Copy(B225_y, 0, p225.y, 0); + F.One(p225.z); - PointPrecomputeVar(ref p224, points, wnafPoints, wnafPoints); + PointPrecomputeVar(ref p225, points, wnafPoints, wnafPoints); int pointsIndex = wnafPoints * 2; PointProjective[] toothPowers = new PointProjective[PrecompTeeth]; @@ -1065,11 +1066,11 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Mul(q.y, q.z, r.y); F.Normalize(r.y); } - PrecompBase224Wnaf = new PointAffine[wnafPoints]; + PrecompBase225Wnaf = new PointAffine[wnafPoints]; for (int i = 0; i < wnafPoints; ++i) { ref PointProjective q = ref points[wnafPoints + i]; - ref PointAffine r = ref PrecompBase224Wnaf[i]; + ref PointAffine r = ref PrecompBase225Wnaf[i]; Init(out r); F.Mul(q.x, q.z, r.x); F.Normalize(r.x); @@ -1112,619 +1113,23 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } #endif - private static byte[] ReduceScalar(byte[] n) - { - byte[] r = new byte[ScalarBytes]; - #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - ReduceScalar(n, r); + private static void ScalarMult(ReadOnlySpan<byte> k, ref PointProjective p, ref PointProjective r) #else - ulong x00 = Codec.Decode32(n, 0); // x00:32/-- - ulong x01 = (Codec.Decode24(n, 4) << 4); // x01:28/-- - ulong x02 = Codec.Decode32(n, 7); // x02:32/-- - ulong x03 = (Codec.Decode24(n, 11) << 4); // x03:28/-- - ulong x04 = Codec.Decode32(n, 14); // x04:32/-- - ulong x05 = (Codec.Decode24(n, 18) << 4); // x05:28/-- - ulong x06 = Codec.Decode32(n, 21); // x06:32/-- - ulong x07 = (Codec.Decode24(n, 25) << 4); // x07:28/-- - ulong x08 = Codec.Decode32(n, 28); // x08:32/-- - ulong x09 = (Codec.Decode24(n, 32) << 4); // x09:28/-- - ulong x10 = Codec.Decode32(n, 35); // x10:32/-- - ulong x11 = (Codec.Decode24(n, 39) << 4); // x11:28/-- - ulong x12 = Codec.Decode32(n, 42); // x12:32/-- - ulong x13 = (Codec.Decode24(n, 46) << 4); // x13:28/-- - ulong x14 = Codec.Decode32(n, 49); // x14:32/-- - ulong x15 = (Codec.Decode24(n, 53) << 4); // x15:28/-- - ulong x16 = Codec.Decode32(n, 56); // x16:32/-- - ulong x17 = (Codec.Decode24(n, 60) << 4); // x17:28/-- - ulong x18 = Codec.Decode32(n, 63); // x18:32/-- - ulong x19 = (Codec.Decode24(n, 67) << 4); // x19:28/-- - ulong x20 = Codec.Decode32(n, 70); // x20:32/-- - ulong x21 = (Codec.Decode24(n, 74) << 4); // x21:28/-- - ulong x22 = Codec.Decode32(n, 77); // x22:32/-- - ulong x23 = (Codec.Decode24(n, 81) << 4); // x23:28/-- - ulong x24 = Codec.Decode32(n, 84); // x24:32/-- - ulong x25 = (Codec.Decode24(n, 88) << 4); // x25:28/-- - ulong x26 = Codec.Decode32(n, 91); // x26:32/-- - ulong x27 = (Codec.Decode24(n, 95) << 4); // x27:28/-- - ulong x28 = Codec.Decode32(n, 98); // x28:32/-- - ulong x29 = (Codec.Decode24(n, 102) << 4); // x29:28/-- - ulong x30 = Codec.Decode32(n, 105); // x30:32/-- - ulong x31 = (Codec.Decode24(n, 109) << 4); // x31:28/-- - ulong x32 = Codec.Decode16(n, 112); // x32:16/-- - - //x32 += (x31 >> 28); x31 &= M28UL; - x16 += x32 * L4_0; // x16:42/-- - x17 += x32 * L4_1; // x17:41/28 - x18 += x32 * L4_2; // x18:43/42 - x19 += x32 * L4_3; // x19:44/28 - x20 += x32 * L4_4; // x20:43/-- - x21 += x32 * L4_5; // x21:44/28 - x22 += x32 * L4_6; // x22:43/41 - x23 += x32 * L4_7; // x23:45/41 - - x31 += (x30 >> 28); x30 &= M28UL; // x31:28/--, x30:28/-- - x15 += x31 * L4_0; // x15:54/-- - x16 += x31 * L4_1; // x16:53/42 - x17 += x31 * L4_2; // x17:55/54 - x18 += x31 * L4_3; // x18:56/44 - x19 += x31 * L4_4; // x19:55/-- - x20 += x31 * L4_5; // x20:56/43 - x21 += x31 * L4_6; // x21:55/53 - x22 += x31 * L4_7; // x22:57/53 - - //x30 += (x29 >> 28); x29 &= M28UL; - x14 += x30 * L4_0; // x14:54/-- - x15 += x30 * L4_1; // x15:54/53 - x16 += x30 * L4_2; // x16:56/-- - x17 += x30 * L4_3; // x17:57/-- - x18 += x30 * L4_4; // x18:56/55 - x19 += x30 * L4_5; // x19:56/55 - x20 += x30 * L4_6; // x20:57/-- - x21 += x30 * L4_7; // x21:57/56 - - x29 += (x28 >> 28); x28 &= M28UL; // x29:28/--, x28:28/-- - x13 += x29 * L4_0; // x13:54/-- - x14 += x29 * L4_1; // x14:54/53 - x15 += x29 * L4_2; // x15:56/-- - x16 += x29 * L4_3; // x16:57/-- - x17 += x29 * L4_4; // x17:57/55 - x18 += x29 * L4_5; // x18:57/55 - x19 += x29 * L4_6; // x19:57/52 - x20 += x29 * L4_7; // x20:58/52 - - //x28 += (x27 >> 28); x27 &= M28UL; - x12 += x28 * L4_0; // x12:54/-- - x13 += x28 * L4_1; // x13:54/53 - x14 += x28 * L4_2; // x14:56/-- - x15 += x28 * L4_3; // x15:57/-- - x16 += x28 * L4_4; // x16:57/55 - x17 += x28 * L4_5; // x17:58/-- - x18 += x28 * L4_6; // x18:58/-- - x19 += x28 * L4_7; // x19:58/53 - - x27 += (x26 >> 28); x26 &= M28UL; // x27:28/--, x26:28/-- - x11 += x27 * L4_0; // x11:54/-- - x12 += x27 * L4_1; // x12:54/53 - x13 += x27 * L4_2; // x13:56/-- - x14 += x27 * L4_3; // x14:57/-- - x15 += x27 * L4_4; // x15:57/55 - x16 += x27 * L4_5; // x16:58/-- - x17 += x27 * L4_6; // x17:58/56 - x18 += x27 * L4_7; // x18:59/-- - - //x26 += (x25 >> 28); x25 &= M28UL; - x10 += x26 * L4_0; // x10:54/-- - x11 += x26 * L4_1; // x11:54/53 - x12 += x26 * L4_2; // x12:56/-- - x13 += x26 * L4_3; // x13:57/-- - x14 += x26 * L4_4; // x14:57/55 - x15 += x26 * L4_5; // x15:58/-- - x16 += x26 * L4_6; // x16:58/56 - x17 += x26 * L4_7; // x17:59/-- - - x25 += (x24 >> 28); x24 &= M28UL; // x25:28/--, x24:28/-- - x09 += x25 * L4_0; // x09:54/-- - x10 += x25 * L4_1; // x10:54/53 - x11 += x25 * L4_2; // x11:56/-- - x12 += x25 * L4_3; // x12:57/-- - x13 += x25 * L4_4; // x13:57/55 - x14 += x25 * L4_5; // x14:58/-- - x15 += x25 * L4_6; // x15:58/56 - x16 += x25 * L4_7; // x16:59/-- - - x21 += (x20 >> 28); x20 &= M28UL; // x21:58/--, x20:28/-- - x22 += (x21 >> 28); x21 &= M28UL; // x22:57/54, x21:28/-- - x23 += (x22 >> 28); x22 &= M28UL; // x23:45/42, x22:28/-- - x24 += (x23 >> 28); x23 &= M28UL; // x24:28/18, x23:28/-- - - x08 += x24 * L4_0; // x08:54/-- - x09 += x24 * L4_1; // x09:55/-- - x10 += x24 * L4_2; // x10:56/46 - x11 += x24 * L4_3; // x11:57/46 - x12 += x24 * L4_4; // x12:57/55 - x13 += x24 * L4_5; // x13:58/-- - x14 += x24 * L4_6; // x14:58/56 - x15 += x24 * L4_7; // x15:59/-- - - x07 += x23 * L4_0; // x07:54/-- - x08 += x23 * L4_1; // x08:54/53 - x09 += x23 * L4_2; // x09:56/53 - x10 += x23 * L4_3; // x10:57/46 - x11 += x23 * L4_4; // x11:57/55 - x12 += x23 * L4_5; // x12:58/-- - x13 += x23 * L4_6; // x13:58/56 - x14 += x23 * L4_7; // x14:59/-- - - x06 += x22 * L4_0; // x06:54/-- - x07 += x22 * L4_1; // x07:54/53 - x08 += x22 * L4_2; // x08:56/-- - x09 += x22 * L4_3; // x09:57/53 - x10 += x22 * L4_4; // x10:57/55 - x11 += x22 * L4_5; // x11:58/-- - x12 += x22 * L4_6; // x12:58/56 - x13 += x22 * L4_7; // x13:59/-- - - x18 += (x17 >> 28); x17 &= M28UL; // x18:59/31, x17:28/-- - x19 += (x18 >> 28); x18 &= M28UL; // x19:58/54, x18:28/-- - x20 += (x19 >> 28); x19 &= M28UL; // x20:30/29, x19:28/-- - x21 += (x20 >> 28); x20 &= M28UL; // x21:28/03, x20:28/-- - - x05 += x21 * L4_0; // x05:54/-- - x06 += x21 * L4_1; // x06:55/-- - x07 += x21 * L4_2; // x07:56/31 - x08 += x21 * L4_3; // x08:57/31 - x09 += x21 * L4_4; // x09:57/56 - x10 += x21 * L4_5; // x10:58/-- - x11 += x21 * L4_6; // x11:58/56 - x12 += x21 * L4_7; // x12:59/-- - - x04 += x20 * L4_0; // x04:54/-- - x05 += x20 * L4_1; // x05:54/53 - x06 += x20 * L4_2; // x06:56/53 - x07 += x20 * L4_3; // x07:57/31 - x08 += x20 * L4_4; // x08:57/55 - x09 += x20 * L4_5; // x09:58/-- - x10 += x20 * L4_6; // x10:58/56 - x11 += x20 * L4_7; // x11:59/-- - - x03 += x19 * L4_0; // x03:54/-- - x04 += x19 * L4_1; // x04:54/53 - x05 += x19 * L4_2; // x05:56/-- - x06 += x19 * L4_3; // x06:57/53 - x07 += x19 * L4_4; // x07:57/55 - x08 += x19 * L4_5; // x08:58/-- - x09 += x19 * L4_6; // x09:58/56 - x10 += x19 * L4_7; // x10:59/-- - - x15 += (x14 >> 28); x14 &= M28UL; // x15:59/31, x14:28/-- - x16 += (x15 >> 28); x15 &= M28UL; // x16:59/32, x15:28/-- - x17 += (x16 >> 28); x16 &= M28UL; // x17:31/29, x16:28/-- - x18 += (x17 >> 28); x17 &= M28UL; // x18:28/04, x17:28/-- - - x02 += x18 * L4_0; // x02:54/-- - x03 += x18 * L4_1; // x03:55/-- - x04 += x18 * L4_2; // x04:56/32 - x05 += x18 * L4_3; // x05:57/32 - x06 += x18 * L4_4; // x06:57/56 - x07 += x18 * L4_5; // x07:58/-- - x08 += x18 * L4_6; // x08:58/56 - x09 += x18 * L4_7; // x09:59/-- - - x01 += x17 * L4_0; // x01:54/-- - x02 += x17 * L4_1; // x02:54/53 - x03 += x17 * L4_2; // x03:56/53 - x04 += x17 * L4_3; // x04:57/32 - x05 += x17 * L4_4; // x05:57/55 - x06 += x17 * L4_5; // x06:58/-- - x07 += x17 * L4_6; // x07:58/56 - x08 += x17 * L4_7; // x08:59/-- - - x16 *= 4; - x16 += (x15 >> 26); x15 &= M26UL; - x16 += 1; // x16:30/01 - - x00 += x16 * L_0; - x01 += x16 * L_1; - x02 += x16 * L_2; - x03 += x16 * L_3; - x04 += x16 * L_4; - x05 += x16 * L_5; - x06 += x16 * L_6; - x07 += x16 * L_7; - - x01 += (x00 >> 28); x00 &= M28UL; - x02 += (x01 >> 28); x01 &= M28UL; - x03 += (x02 >> 28); x02 &= M28UL; - x04 += (x03 >> 28); x03 &= M28UL; - x05 += (x04 >> 28); x04 &= M28UL; - x06 += (x05 >> 28); x05 &= M28UL; - x07 += (x06 >> 28); x06 &= M28UL; - x08 += (x07 >> 28); x07 &= M28UL; - x09 += (x08 >> 28); x08 &= M28UL; - x10 += (x09 >> 28); x09 &= M28UL; - x11 += (x10 >> 28); x10 &= M28UL; - x12 += (x11 >> 28); x11 &= M28UL; - x13 += (x12 >> 28); x12 &= M28UL; - x14 += (x13 >> 28); x13 &= M28UL; - x15 += (x14 >> 28); x14 &= M28UL; - x16 = (x15 >> 26); x15 &= M26UL; - - x16 -= 1; - - Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue); - - x00 -= x16 & L_0; - x01 -= x16 & L_1; - x02 -= x16 & L_2; - x03 -= x16 & L_3; - x04 -= x16 & L_4; - x05 -= x16 & L_5; - x06 -= x16 & L_6; - x07 -= x16 & L_7; - - x01 += (ulong)((long)x00 >> 28); x00 &= M28UL; - x02 += (ulong)((long)x01 >> 28); x01 &= M28UL; - x03 += (ulong)((long)x02 >> 28); x02 &= M28UL; - x04 += (ulong)((long)x03 >> 28); x03 &= M28UL; - x05 += (ulong)((long)x04 >> 28); x04 &= M28UL; - x06 += (ulong)((long)x05 >> 28); x05 &= M28UL; - x07 += (ulong)((long)x06 >> 28); x06 &= M28UL; - x08 += (ulong)((long)x07 >> 28); x07 &= M28UL; - x09 += (ulong)((long)x08 >> 28); x08 &= M28UL; - x10 += (ulong)((long)x09 >> 28); x09 &= M28UL; - x11 += (ulong)((long)x10 >> 28); x10 &= M28UL; - x12 += (ulong)((long)x11 >> 28); x11 &= M28UL; - x13 += (ulong)((long)x12 >> 28); x12 &= M28UL; - x14 += (ulong)((long)x13 >> 28); x13 &= M28UL; - x15 += (ulong)((long)x14 >> 28); x14 &= M28UL; - - Debug.Assert(x15 >> 26 == 0UL); - - Codec.Encode56(x00 | (x01 << 28), r, 0); - Codec.Encode56(x02 | (x03 << 28), r, 7); - Codec.Encode56(x04 | (x05 << 28), r, 14); - Codec.Encode56(x06 | (x07 << 28), r, 21); - Codec.Encode56(x08 | (x09 << 28), r, 28); - Codec.Encode56(x10 | (x11 << 28), r, 35); - Codec.Encode56(x12 | (x13 << 28), r, 42); - Codec.Encode56(x14 | (x15 << 28), r, 49); - //r[ScalarBytes - 1] = 0; -#endif - - return r; - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static void ReduceScalar(ReadOnlySpan<byte> n, Span<byte> r) - { - ulong x00 = Codec.Decode32(n[ 0..]); // x00:32/-- - ulong x01 = (Codec.Decode24(n[ 4..]) << 4); // x01:28/-- - ulong x02 = Codec.Decode32(n[ 7..]); // x02:32/-- - ulong x03 = (Codec.Decode24(n[ 11..]) << 4); // x03:28/-- - ulong x04 = Codec.Decode32(n[ 14..]); // x04:32/-- - ulong x05 = (Codec.Decode24(n[ 18..]) << 4); // x05:28/-- - ulong x06 = Codec.Decode32(n[ 21..]); // x06:32/-- - ulong x07 = (Codec.Decode24(n[ 25..]) << 4); // x07:28/-- - ulong x08 = Codec.Decode32(n[ 28..]); // x08:32/-- - ulong x09 = (Codec.Decode24(n[ 32..]) << 4); // x09:28/-- - ulong x10 = Codec.Decode32(n[ 35..]); // x10:32/-- - ulong x11 = (Codec.Decode24(n[ 39..]) << 4); // x11:28/-- - ulong x12 = Codec.Decode32(n[ 42..]); // x12:32/-- - ulong x13 = (Codec.Decode24(n[ 46..]) << 4); // x13:28/-- - ulong x14 = Codec.Decode32(n[ 49..]); // x14:32/-- - ulong x15 = (Codec.Decode24(n[ 53..]) << 4); // x15:28/-- - ulong x16 = Codec.Decode32(n[ 56..]); // x16:32/-- - ulong x17 = (Codec.Decode24(n[ 60..]) << 4); // x17:28/-- - ulong x18 = Codec.Decode32(n[ 63..]); // x18:32/-- - ulong x19 = (Codec.Decode24(n[ 67..]) << 4); // x19:28/-- - ulong x20 = Codec.Decode32(n[ 70..]); // x20:32/-- - ulong x21 = (Codec.Decode24(n[ 74..]) << 4); // x21:28/-- - ulong x22 = Codec.Decode32(n[ 77..]); // x22:32/-- - ulong x23 = (Codec.Decode24(n[ 81..]) << 4); // x23:28/-- - ulong x24 = Codec.Decode32(n[ 84..]); // x24:32/-- - ulong x25 = (Codec.Decode24(n[ 88..]) << 4); // x25:28/-- - ulong x26 = Codec.Decode32(n[ 91..]); // x26:32/-- - ulong x27 = (Codec.Decode24(n[ 95..]) << 4); // x27:28/-- - ulong x28 = Codec.Decode32(n[ 98..]); // x28:32/-- - ulong x29 = (Codec.Decode24(n[102..]) << 4); // x29:28/-- - ulong x30 = Codec.Decode32(n[105..]); // x30:32/-- - ulong x31 = (Codec.Decode24(n[109..]) << 4); // x31:28/-- - ulong x32 = Codec.Decode16(n[112..]); // x32:16/-- - - //x32 += (x31 >> 28); x31 &= M28UL; - x16 += x32 * L4_0; // x16:42/-- - x17 += x32 * L4_1; // x17:41/28 - x18 += x32 * L4_2; // x18:43/42 - x19 += x32 * L4_3; // x19:44/28 - x20 += x32 * L4_4; // x20:43/-- - x21 += x32 * L4_5; // x21:44/28 - x22 += x32 * L4_6; // x22:43/41 - x23 += x32 * L4_7; // x23:45/41 - - x31 += (x30 >> 28); x30 &= M28UL; // x31:28/--, x30:28/-- - x15 += x31 * L4_0; // x15:54/-- - x16 += x31 * L4_1; // x16:53/42 - x17 += x31 * L4_2; // x17:55/54 - x18 += x31 * L4_3; // x18:56/44 - x19 += x31 * L4_4; // x19:55/-- - x20 += x31 * L4_5; // x20:56/43 - x21 += x31 * L4_6; // x21:55/53 - x22 += x31 * L4_7; // x22:57/53 - - //x30 += (x29 >> 28); x29 &= M28UL; - x14 += x30 * L4_0; // x14:54/-- - x15 += x30 * L4_1; // x15:54/53 - x16 += x30 * L4_2; // x16:56/-- - x17 += x30 * L4_3; // x17:57/-- - x18 += x30 * L4_4; // x18:56/55 - x19 += x30 * L4_5; // x19:56/55 - x20 += x30 * L4_6; // x20:57/-- - x21 += x30 * L4_7; // x21:57/56 - - x29 += (x28 >> 28); x28 &= M28UL; // x29:28/--, x28:28/-- - x13 += x29 * L4_0; // x13:54/-- - x14 += x29 * L4_1; // x14:54/53 - x15 += x29 * L4_2; // x15:56/-- - x16 += x29 * L4_3; // x16:57/-- - x17 += x29 * L4_4; // x17:57/55 - x18 += x29 * L4_5; // x18:57/55 - x19 += x29 * L4_6; // x19:57/52 - x20 += x29 * L4_7; // x20:58/52 - - //x28 += (x27 >> 28); x27 &= M28UL; - x12 += x28 * L4_0; // x12:54/-- - x13 += x28 * L4_1; // x13:54/53 - x14 += x28 * L4_2; // x14:56/-- - x15 += x28 * L4_3; // x15:57/-- - x16 += x28 * L4_4; // x16:57/55 - x17 += x28 * L4_5; // x17:58/-- - x18 += x28 * L4_6; // x18:58/-- - x19 += x28 * L4_7; // x19:58/53 - - x27 += (x26 >> 28); x26 &= M28UL; // x27:28/--, x26:28/-- - x11 += x27 * L4_0; // x11:54/-- - x12 += x27 * L4_1; // x12:54/53 - x13 += x27 * L4_2; // x13:56/-- - x14 += x27 * L4_3; // x14:57/-- - x15 += x27 * L4_4; // x15:57/55 - x16 += x27 * L4_5; // x16:58/-- - x17 += x27 * L4_6; // x17:58/56 - x18 += x27 * L4_7; // x18:59/-- - - //x26 += (x25 >> 28); x25 &= M28UL; - x10 += x26 * L4_0; // x10:54/-- - x11 += x26 * L4_1; // x11:54/53 - x12 += x26 * L4_2; // x12:56/-- - x13 += x26 * L4_3; // x13:57/-- - x14 += x26 * L4_4; // x14:57/55 - x15 += x26 * L4_5; // x15:58/-- - x16 += x26 * L4_6; // x16:58/56 - x17 += x26 * L4_7; // x17:59/-- - - x25 += (x24 >> 28); x24 &= M28UL; // x25:28/--, x24:28/-- - x09 += x25 * L4_0; // x09:54/-- - x10 += x25 * L4_1; // x10:54/53 - x11 += x25 * L4_2; // x11:56/-- - x12 += x25 * L4_3; // x12:57/-- - x13 += x25 * L4_4; // x13:57/55 - x14 += x25 * L4_5; // x14:58/-- - x15 += x25 * L4_6; // x15:58/56 - x16 += x25 * L4_7; // x16:59/-- - - x21 += (x20 >> 28); x20 &= M28UL; // x21:58/--, x20:28/-- - x22 += (x21 >> 28); x21 &= M28UL; // x22:57/54, x21:28/-- - x23 += (x22 >> 28); x22 &= M28UL; // x23:45/42, x22:28/-- - x24 += (x23 >> 28); x23 &= M28UL; // x24:28/18, x23:28/-- - - x08 += x24 * L4_0; // x08:54/-- - x09 += x24 * L4_1; // x09:55/-- - x10 += x24 * L4_2; // x10:56/46 - x11 += x24 * L4_3; // x11:57/46 - x12 += x24 * L4_4; // x12:57/55 - x13 += x24 * L4_5; // x13:58/-- - x14 += x24 * L4_6; // x14:58/56 - x15 += x24 * L4_7; // x15:59/-- - - x07 += x23 * L4_0; // x07:54/-- - x08 += x23 * L4_1; // x08:54/53 - x09 += x23 * L4_2; // x09:56/53 - x10 += x23 * L4_3; // x10:57/46 - x11 += x23 * L4_4; // x11:57/55 - x12 += x23 * L4_5; // x12:58/-- - x13 += x23 * L4_6; // x13:58/56 - x14 += x23 * L4_7; // x14:59/-- - - x06 += x22 * L4_0; // x06:54/-- - x07 += x22 * L4_1; // x07:54/53 - x08 += x22 * L4_2; // x08:56/-- - x09 += x22 * L4_3; // x09:57/53 - x10 += x22 * L4_4; // x10:57/55 - x11 += x22 * L4_5; // x11:58/-- - x12 += x22 * L4_6; // x12:58/56 - x13 += x22 * L4_7; // x13:59/-- - - x18 += (x17 >> 28); x17 &= M28UL; // x18:59/31, x17:28/-- - x19 += (x18 >> 28); x18 &= M28UL; // x19:58/54, x18:28/-- - x20 += (x19 >> 28); x19 &= M28UL; // x20:30/29, x19:28/-- - x21 += (x20 >> 28); x20 &= M28UL; // x21:28/03, x20:28/-- - - x05 += x21 * L4_0; // x05:54/-- - x06 += x21 * L4_1; // x06:55/-- - x07 += x21 * L4_2; // x07:56/31 - x08 += x21 * L4_3; // x08:57/31 - x09 += x21 * L4_4; // x09:57/56 - x10 += x21 * L4_5; // x10:58/-- - x11 += x21 * L4_6; // x11:58/56 - x12 += x21 * L4_7; // x12:59/-- - - x04 += x20 * L4_0; // x04:54/-- - x05 += x20 * L4_1; // x05:54/53 - x06 += x20 * L4_2; // x06:56/53 - x07 += x20 * L4_3; // x07:57/31 - x08 += x20 * L4_4; // x08:57/55 - x09 += x20 * L4_5; // x09:58/-- - x10 += x20 * L4_6; // x10:58/56 - x11 += x20 * L4_7; // x11:59/-- - - x03 += x19 * L4_0; // x03:54/-- - x04 += x19 * L4_1; // x04:54/53 - x05 += x19 * L4_2; // x05:56/-- - x06 += x19 * L4_3; // x06:57/53 - x07 += x19 * L4_4; // x07:57/55 - x08 += x19 * L4_5; // x08:58/-- - x09 += x19 * L4_6; // x09:58/56 - x10 += x19 * L4_7; // x10:59/-- - - x15 += (x14 >> 28); x14 &= M28UL; // x15:59/31, x14:28/-- - x16 += (x15 >> 28); x15 &= M28UL; // x16:59/32, x15:28/-- - x17 += (x16 >> 28); x16 &= M28UL; // x17:31/29, x16:28/-- - x18 += (x17 >> 28); x17 &= M28UL; // x18:28/04, x17:28/-- - - x02 += x18 * L4_0; // x02:54/-- - x03 += x18 * L4_1; // x03:55/-- - x04 += x18 * L4_2; // x04:56/32 - x05 += x18 * L4_3; // x05:57/32 - x06 += x18 * L4_4; // x06:57/56 - x07 += x18 * L4_5; // x07:58/-- - x08 += x18 * L4_6; // x08:58/56 - x09 += x18 * L4_7; // x09:59/-- - - x01 += x17 * L4_0; // x01:54/-- - x02 += x17 * L4_1; // x02:54/53 - x03 += x17 * L4_2; // x03:56/53 - x04 += x17 * L4_3; // x04:57/32 - x05 += x17 * L4_4; // x05:57/55 - x06 += x17 * L4_5; // x06:58/-- - x07 += x17 * L4_6; // x07:58/56 - x08 += x17 * L4_7; // x08:59/-- - - x16 *= 4; - x16 += (x15 >> 26); x15 &= M26UL; - x16 += 1; // x16:30/01 - - x00 += x16 * L_0; - x01 += x16 * L_1; - x02 += x16 * L_2; - x03 += x16 * L_3; - x04 += x16 * L_4; - x05 += x16 * L_5; - x06 += x16 * L_6; - x07 += x16 * L_7; - - x01 += (x00 >> 28); x00 &= M28UL; - x02 += (x01 >> 28); x01 &= M28UL; - x03 += (x02 >> 28); x02 &= M28UL; - x04 += (x03 >> 28); x03 &= M28UL; - x05 += (x04 >> 28); x04 &= M28UL; - x06 += (x05 >> 28); x05 &= M28UL; - x07 += (x06 >> 28); x06 &= M28UL; - x08 += (x07 >> 28); x07 &= M28UL; - x09 += (x08 >> 28); x08 &= M28UL; - x10 += (x09 >> 28); x09 &= M28UL; - x11 += (x10 >> 28); x10 &= M28UL; - x12 += (x11 >> 28); x11 &= M28UL; - x13 += (x12 >> 28); x12 &= M28UL; - x14 += (x13 >> 28); x13 &= M28UL; - x15 += (x14 >> 28); x14 &= M28UL; - x16 = (x15 >> 26); x15 &= M26UL; - - x16 -= 1; - - Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue); - - x00 -= x16 & L_0; - x01 -= x16 & L_1; - x02 -= x16 & L_2; - x03 -= x16 & L_3; - x04 -= x16 & L_4; - x05 -= x16 & L_5; - x06 -= x16 & L_6; - x07 -= x16 & L_7; - - x01 += (ulong)((long)x00 >> 28); x00 &= M28UL; - x02 += (ulong)((long)x01 >> 28); x01 &= M28UL; - x03 += (ulong)((long)x02 >> 28); x02 &= M28UL; - x04 += (ulong)((long)x03 >> 28); x03 &= M28UL; - x05 += (ulong)((long)x04 >> 28); x04 &= M28UL; - x06 += (ulong)((long)x05 >> 28); x05 &= M28UL; - x07 += (ulong)((long)x06 >> 28); x06 &= M28UL; - x08 += (ulong)((long)x07 >> 28); x07 &= M28UL; - x09 += (ulong)((long)x08 >> 28); x08 &= M28UL; - x10 += (ulong)((long)x09 >> 28); x09 &= M28UL; - x11 += (ulong)((long)x10 >> 28); x10 &= M28UL; - x12 += (ulong)((long)x11 >> 28); x11 &= M28UL; - x13 += (ulong)((long)x12 >> 28); x12 &= M28UL; - x14 += (ulong)((long)x13 >> 28); x13 &= M28UL; - x15 += (ulong)((long)x14 >> 28); x14 &= M28UL; - - Debug.Assert(x15 >> 26 == 0UL); - - Codec.Encode56(x00 | (x01 << 28), r); - Codec.Encode56(x02 | (x03 << 28), r[7..]); - Codec.Encode56(x04 | (x05 << 28), r[14..]); - Codec.Encode56(x06 | (x07 << 28), r[21..]); - Codec.Encode56(x08 | (x09 << 28), r[28..]); - Codec.Encode56(x10 | (x11 << 28), r[35..]); - Codec.Encode56(x12 | (x13 << 28), r[42..]); - Codec.Encode56(x14 | (x15 << 28), r[49..]); - r[ScalarBytes - 1] = 0; - } -#endif - private static void ScalarMult(byte[] k, ref PointProjective p, ref PointProjective r) +#endif { #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - ScalarMult(k.AsSpan(), ref p, ref r); + Span<uint> n = stackalloc uint[ScalarUints + 1]; #else - uint[] n = new uint[ScalarUints]; - DecodeScalar(k, 0, n); - - // Recode the scalar into signed-digit form - { - uint c1 = Nat.CAdd(ScalarUints, ~(int)n[0] & 1, n, L, n); - uint c2 = Nat.ShiftDownBit(ScalarUints, n, c1); Debug.Assert(c2 == (1U << 31)); - - // NOTE: Bit 448 is implicitly set after the signed-digit recoding - } - - uint[] table = PointPrecompute(ref p, 8); - Init(out PointProjective q); - - // Replace first 4 doublings (2^4 * P) with 1 addition (P + 15 * P) - PointLookup15(table, ref r); - PointAdd(ref p, ref r); - - int w = 111; - for (;;) - { - PointLookup(n, w, table, ref q); - PointAdd(ref q, ref r); - - if (--w < 0) - break; - - for (int i = 0; i < 4; ++i) - { - PointDouble(ref r); - } - } + uint[] n = new uint[ScalarUints + 1]; #endif - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static void ScalarMult(ReadOnlySpan<byte> k, ref PointProjective p, ref PointProjective r) - { - Span<uint> n = stackalloc uint[ScalarUints]; - DecodeScalar(k, n); - // Recode the scalar into signed-digit form - { - uint c1 = Nat.CAdd(ScalarUints, ~(int)n[0] & 1, n, L, n); - uint c2 = Nat.ShiftDownBit(ScalarUints, n, c1); Debug.Assert(c2 == (1U << 31)); + Scalar448.Decode(k, n); + Scalar448.ToSignedDigits(449, n, n); - // NOTE: Bit 448 is implicitly set after the signed-digit recoding - } + // NOTE: Bit 448 is handled explicitly by an initial addition + Debug.Assert(n[ScalarUints] == 1U); uint[] table = PointPrecompute(ref p, 8); Init(out PointProjective q); @@ -1748,76 +1153,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } } } -#endif - private static void ScalarMultBase(byte[] k, ref PointProjective r) - { #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - ScalarMultBase(k.AsSpan(), ref r); + private static void ScalarMultBase(ReadOnlySpan<byte> k, ref PointProjective r) #else - // Equivalent (but much slower) - //Init(out PointProjective p); - //F.Copy(B_x, 0, p.x, 0); - //F.Copy(B_y, 0, p.y, 0); - //F.One(p.z); - //ScalarMult(k, ref p, ref r); - - Precompute(); - - uint[] n = new uint[ScalarUints + 1]; - DecodeScalar(k, 0, n); - - // Recode the scalar into signed-digit form - { - n[ScalarUints] = (1U << (PrecompRange - 448)) - + Nat.CAdd(ScalarUints, ~(int)n[0] & 1, n, L, n); - uint c = Nat.ShiftDownBit(n.Length, n, 0); - Debug.Assert(c == (1U << 31)); - } - - Init(out PointAffine p); - - PointSetNeutral(ref r); - - int cOff = PrecompSpacing - 1; - for (;;) - { - int tPos = cOff; - - for (int b = 0; b < PrecompBlocks; ++b) - { - uint w = 0; - for (int t = 0; t < PrecompTeeth; ++t) - { - uint tBit = n[tPos >> 5] >> (tPos & 0x1F); - w &= ~(1U << t); - w ^= (tBit << t); - tPos += PrecompSpacing; - } - - int sign = (int)(w >> (PrecompTeeth - 1)) & 1; - int abs = ((int)w ^ -sign) & PrecompMask; - - Debug.Assert(sign == 0 || sign == 1); - Debug.Assert(0 <= abs && abs < PrecompPoints); - - PointLookup(b, abs, ref p); - - F.CNegate(sign, p.x); - - PointAdd(ref p, ref r); - } - - if (--cOff < 0) - break; - - PointDouble(ref r); - } + private static void ScalarMultBase(byte[] k, ref PointProjective r) #endif - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static void ScalarMultBase(ReadOnlySpan<byte> k, ref PointProjective r) { // Equivalent (but much slower) //Init(out PointProjective p); @@ -1828,16 +1169,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Precompute(); +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER Span<uint> n = stackalloc uint[ScalarUints + 1]; - DecodeScalar(k, n); +#else + uint[] n = new uint[ScalarUints + 1]; +#endif - // Recode the scalar into signed-digit form - { - n[ScalarUints] = (1U << (PrecompRange - 448)) - + Nat.CAdd(ScalarUints, ~(int)n[0] & 1, n, L, n); - uint c = Nat.ShiftDownBit(n.Length, n, 0); - Debug.Assert(c == (1U << 31)); - } + Scalar448.Decode(k, n); + Scalar448.ToSignedDigits(PrecompRange, n, n); Init(out PointAffine p); @@ -1878,7 +1217,6 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointDouble(ref r); } } -#endif private static void ScalarMultBaseEncoded(byte[] k, byte[] r, int rOff) { @@ -1945,9 +1283,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #else sbyte[] ws_p = new sbyte[447]; #endif - Wnaf.GetSignedVar(L, WnafWidth, ws_p); + // NOTE: WnafWidth225 because of the special structure of the order + Scalar448.GetOrderWnafVar(WnafWidth225, ws_p); - int count = 1 << (WnafWidth - 2); + int count = 1 << (WnafWidth225 - 2); PointProjective[] tp = new PointProjective[count]; PointPrecomputeVar(ref p, tp, 0, count); @@ -1970,38 +1309,46 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static void ScalarMultStrausVar(ReadOnlySpan<uint> nb, ReadOnlySpan<uint> np, ref PointProjective p, - ref PointProjective r) + private static void ScalarMultStraus225Var(ReadOnlySpan<uint> nb, ReadOnlySpan<uint> np, ref PointProjective p, + ReadOnlySpan<uint> nq, ref PointProjective q, ref PointProjective r) #else - private static void ScalarMultStrausVar(uint[] nb, uint[] np, ref PointProjective p, ref PointProjective r) + private static void ScalarMultStraus225Var(uint[] nb, uint[] np, ref PointProjective p, uint[] nq, + ref PointProjective q, ref PointProjective r) #endif { Debug.Assert(nb.Length == ScalarUints); - Debug.Assert(nb[ScalarUints - 1] <= L[ScalarUints - 1]); - - Debug.Assert(np.Length == ScalarUints); - Debug.Assert(np[ScalarUints - 1] <= L[ScalarUints - 1]); + Debug.Assert((int)nb[ScalarUints - 1] >= 0); + Debug.Assert(np.Length == 8); + Debug.Assert((int)np[7] >> 31 == (int)np[7] >> 1); + Debug.Assert(nq.Length == 8); + Debug.Assert((int)nq[7] >> 31 == (int)nq[7] >> 1); Precompute(); #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - Span<sbyte> ws_b = stackalloc sbyte[447]; - Span<sbyte> ws_p = stackalloc sbyte[447]; + Span<sbyte> ws_b = stackalloc sbyte[450]; + Span<sbyte> ws_p = stackalloc sbyte[225]; + Span<sbyte> ws_q = stackalloc sbyte[225]; #else - sbyte[] ws_b = new sbyte[447]; - sbyte[] ws_p = new sbyte[447]; + sbyte[] ws_b = new sbyte[450]; + sbyte[] ws_p = new sbyte[225]; + sbyte[] ws_q = new sbyte[225]; #endif Wnaf.GetSignedVar(nb, WnafWidthBase, ws_b); - Wnaf.GetSignedVar(np, WnafWidth, ws_p); + Wnaf.GetSignedVar(np, WnafWidth225, ws_p); + Wnaf.GetSignedVar(nq, WnafWidth225, ws_q); - int count = 1 << (WnafWidth - 2); + int count = 1 << (WnafWidth225 - 2); PointProjective[] tp = new PointProjective[count]; + PointProjective[] tq = new PointProjective[count]; PointPrecomputeVar(ref p, tp, 0, count); + PointPrecomputeVar(ref q, tq, 0, count); PointSetNeutral(ref r); - for (int bit = 446;;) + int bit = 225; + while (--bit >= 0) { int wb = ws_b[bit]; if (wb != 0) @@ -2010,6 +1357,13 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointAddVar(wb < 0, ref PrecompBaseWnaf[index], ref r); } + int wb225 = ws_b[225 + bit]; + if (wb225 != 0) + { + int index = (wb225 >> 1) ^ (wb225 >> 31); + PointAddVar(wb225 < 0, ref PrecompBase225Wnaf[index], ref r); + } + int wp = ws_p[bit]; if (wp != 0) { @@ -2017,11 +1371,18 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointAddVar(wp < 0, ref tp[index], ref r); } - if (--bit < 0) - break; + int wq = ws_q[bit]; + if (wq != 0) + { + int index = (wq >> 1) ^ (wq >> 31); + PointAddVar(wq < 0, ref tq[index], ref r); + } PointDouble(ref r); } + + // NOTE: Together with the final PointDouble of the loop, this clears the cofactor of 4 + PointDouble(ref r); } public static void Sign(byte[] sk, int skOff, byte[] ctx, byte[] m, int mOff, int mLen, byte[] sig, int sigOff) @@ -2076,24 +1437,44 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 public static bool ValidatePublicKeyFull(byte[] pk, int pkOff) { - Init(out PointProjective p); - if (!DecodePointVar(pk, pkOff, false, ref p)) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Span<byte> A = stackalloc byte[PublicKeySize]; + A.CopyFrom(pk.AsSpan(pkOff)); +#else + byte[] A = Copy(pk, pkOff, PublicKeySize); +#endif + + if (!CheckPointFullVar(A)) return false; - Init(out PointProjective r); - ScalarMultOrderVar(ref p, ref r); + Init(out PointProjective pA); + if (!DecodePointVar(A, false, ref pA)) + return false; - F.Normalize(r.x); - F.Normalize(r.y); - F.Normalize(r.z); + Init(out PointProjective pR); + ScalarMultOrderVar(ref pA, ref pR); - return IsNeutralElementVar(r.x, r.y, r.z); + F.Normalize(pR.x); + F.Normalize(pR.y); + F.Normalize(pR.z); + + return IsNeutralElementVar(pR.x, pR.y, pR.z); } public static bool ValidatePublicKeyPartial(byte[] pk, int pkOff) { - Init(out PointProjective p); - return DecodePointVar(pk, pkOff, false, ref p); +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Span<byte> A = stackalloc byte[PublicKeySize]; + A.CopyFrom(pk.AsSpan(pkOff)); +#else + byte[] A = Copy(pk, pkOff, PublicKeySize); +#endif + + if (!CheckPointFullVar(A)) + return false; + + Init(out PointProjective pA); + return DecodePointVar(A, false, ref pA); } public static bool Verify(byte[] sig, int sigOff, byte[] pk, int pkOff, byte[] ctx, byte[] m, int mOff, int mLen) diff --git a/crypto/src/math/ec/rfc8032/Scalar25519.cs b/crypto/src/math/ec/rfc8032/Scalar25519.cs index 738ce63cb..0a443abaa 100644 --- a/crypto/src/math/ec/rfc8032/Scalar25519.cs +++ b/crypto/src/math/ec/rfc8032/Scalar25519.cs @@ -74,7 +74,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Span<uint> tt = stackalloc uint[16]; Nat.Mul(y128, x, tt); - if ((y128[3] >> 31) != 0) + if ((int)y128[3] < 0) { Nat.AddTo(8, L, tt[4..]); Nat.SubFrom(8, x, tt[4..]); @@ -90,7 +90,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 uint[] tt = new uint[12]; Nat.Mul(y128, 0, 4, x, 0, 8, tt, 0); - if ((y128[3] >> 31) != 0) + if ((int)y128[3] < 0) { Nat256.AddTo(L, 0, tt, 4, 0U); Nat256.SubFrom(x, 0, tt, 4); @@ -100,7 +100,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Codec.Encode32(tt, 0, 12, bytes, 0); byte[] r = Reduce(bytes); - Codec.Decode32(r, 0, z, 0, 8); + Decode(r, z); } #endif @@ -400,33 +400,33 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Span<uint> v1 = stackalloc uint[4]; v1[0] = 1U; int last = 15; - int len_Nv = GetBitLengthPositive(last, Nv); + int len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); while (len_Nv > TargetLength) { - int len_p = GetBitLength(last, p); + int len_p = ScalarUtilities.GetBitLength(last, p); int s = len_p - len_Nv; s &= ~(s >> 31); if ((int)p[last] < 0) { - AddShifted_NP(last, s, Nu, Nv, p); - AddShifted_UV(3, s, u0, u1, v0, v1); + ScalarUtilities.AddShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.AddShifted_UV(last: 3, s, u0, u1, v0, v1); } else { - SubShifted_NP(last, s, Nu, Nv, p); - SubShifted_UV(3, s, u0, u1, v0, v1); + ScalarUtilities.SubShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.SubShifted_UV(last: 3, s, u0, u1, v0, v1); } - if (LessThan(last, Nu, Nv)) + if (ScalarUtilities.LessThan(last, Nu, Nv)) { - Swap(ref u0, ref v0); - Swap(ref u1, ref v1); - Swap(ref Nu, ref Nv); + ScalarUtilities.Swap(ref u0, ref v0); + ScalarUtilities.Swap(ref u1, ref v1); + ScalarUtilities.Swap(ref Nu, ref Nv); last = len_Nv >> 5; - len_Nv = GetBitLengthPositive(last, Nv); + len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); } } @@ -452,33 +452,33 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 uint[] v1 = new uint[4]; v1[0] = 1U; int last = 15; - int len_Nv = GetBitLengthPositive(last, Nv); + int len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); while (len_Nv > TargetLength) { - int len_p = GetBitLength(last, p); + int len_p = ScalarUtilities.GetBitLength(last, p); int s = len_p - len_Nv; s &= ~(s >> 31); if ((int)p[last] < 0) { - AddShifted_NP(last, s, Nu, Nv, p); - AddShifted_UV(3, s, u0, u1, v0, v1); + ScalarUtilities.AddShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.AddShifted_UV(last: 3, s, u0, u1, v0, v1); } else { - SubShifted_NP(last, s, Nu, Nv, p); - SubShifted_UV(3, s, u0, u1, v0, v1); + ScalarUtilities.SubShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.SubShifted_UV(last: 3, s, u0, u1, v0, v1); } - if (LessThan(last, Nu, Nv)) + if (ScalarUtilities.LessThan(last, Nu, Nv)) { - Swap(ref u0, ref v0); - Swap(ref u1, ref v1); - Swap(ref Nu, ref Nv); + ScalarUtilities.Swap(ref u0, ref v0); + ScalarUtilities.Swap(ref u1, ref v1); + ScalarUtilities.Swap(ref Nu, ref Nv); last = len_Nv >> 5; - len_Nv = GetBitLengthPositive(last, Nv); + len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); } } @@ -489,294 +489,16 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - internal static void ToSignedDigits(ReadOnlySpan<uint> x, Span<uint> z) + internal static void ToSignedDigits(int bits, ReadOnlySpan<uint> x, Span<uint> z) #else - internal static void ToSignedDigits(uint[] x, uint[] z) + internal static void ToSignedDigits(int bits, uint[] x, uint[] z) #endif { + Debug.Assert(bits == 256); + Debug.Assert(z.Length >= Size); + uint c1 = Nat.CAdd(Size, ~(int)x[0] & 1, x, L, z); Debug.Assert(c1 == 0U); uint c2 = Nat.ShiftDownBit(Size, z, 1U); Debug.Assert(c2 == (1U << 31)); } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void AddShifted_NP(int last, int s, Span<uint> Nu, ReadOnlySpan<uint> Nv, Span<uint> _p) -#else - private static void AddShifted_NP(int last, int s, uint[] Nu, uint[] Nv, uint[] _p) -#endif - { - int sWords = s >> 5, sBits = s & 31; - - ulong cc__p = 0UL; - ulong cc_Nu = 0UL; - - if (sBits == 0) - { - for (int i = sWords; i <= last; ++i) - { - cc_Nu += Nu[i]; - cc_Nu += _p[i - sWords]; - - cc__p += _p[i]; - cc__p += Nv[i - sWords]; - _p[i] = (uint)cc__p; cc__p >>= 32; - - cc_Nu += _p[i - sWords]; - Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; - } - } - else - { - uint prev_p = 0U; - uint prev_q = 0U; - uint prev_v = 0U; - - for (int i = sWords; i <= last; ++i) - { - uint next_p = _p[i - sWords]; - uint p_s = (next_p << sBits) | (prev_p >> -sBits); - prev_p = next_p; - - cc_Nu += Nu[i]; - cc_Nu += p_s; - - uint next_v = Nv[i - sWords]; - uint v_s = (next_v << sBits) | (prev_v >> -sBits); - prev_v = next_v; - - cc__p += _p[i]; - cc__p += v_s; - _p[i] = (uint)cc__p; cc__p >>= 32; - - uint next_q = _p[i - sWords]; - uint q_s = (next_q << sBits) | (prev_q >> -sBits); - prev_q = next_q; - - cc_Nu += q_s; - Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; - } - } - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void AddShifted_UV(int last, int s, Span<uint> u0, Span<uint> u1, ReadOnlySpan<uint> v0, - ReadOnlySpan<uint> v1) -#else - private static void AddShifted_UV(int last, int s, uint[] u0, uint[] u1, uint[] v0, uint[] v1) -#endif - { - int sWords = s >> 5, sBits = s & 31; - - ulong cc_u0 = 0UL; - ulong cc_u1 = 0UL; - - if (sBits == 0) - { - for (int i = sWords; i <= last; ++i) - { - cc_u0 += u0[i]; - cc_u1 += u1[i]; - cc_u0 += v0[i - sWords]; - cc_u1 += v1[i - sWords]; - u0[i] = (uint)cc_u0; cc_u0 >>= 32; - u1[i] = (uint)cc_u1; cc_u1 >>= 32; - } - } - else - { - uint prev_v0 = 0U; - uint prev_v1 = 0U; - - for (int i = sWords; i <= last; ++i) - { - uint next_v0 = v0[i - sWords]; - uint next_v1 = v1[i - sWords]; - uint v0_s = (next_v0 << sBits) | (prev_v0 >> -sBits); - uint v1_s = (next_v1 << sBits) | (prev_v1 >> -sBits); - prev_v0 = next_v0; - prev_v1 = next_v1; - - cc_u0 += u0[i]; - cc_u1 += u1[i]; - cc_u0 += v0_s; - cc_u1 += v1_s; - u0[i] = (uint)cc_u0; cc_u0 >>= 32; - u1[i] = (uint)cc_u1; cc_u1 >>= 32; - } - } - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int GetBitLength(int last, ReadOnlySpan<uint> x) -#else - private static int GetBitLength(int last, uint[] x) -#endif - { - int i = last; - uint sign = (uint)((int)x[i] >> 31); - while (i > 0 && x[i] == sign) - { - --i; - } - return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)(x[i] ^ sign)); - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int GetBitLengthPositive(int last, ReadOnlySpan<uint> x) -#else - private static int GetBitLengthPositive(int last, uint[] x) -#endif - { - int i = last; - while (i > 0 && x[i] == 0) - { - --i; - } - return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x[i]); - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool LessThan(int last, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y) -#else - private static bool LessThan(int last, uint[] x, uint[] y) -#endif - { - int i = last; - if ((int)x[i] < (int)y[i]) - return true; - if ((int)x[i] > (int)y[i]) - return false; - while (--i >= 0) - { - if (x[i] < y[i]) - return true; - if (x[i] > y[i]) - return false; - } - return false; - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void SubShifted_NP(int last, int s, Span<uint> Nu, ReadOnlySpan<uint> Nv, Span<uint> _p) -#else - private static void SubShifted_NP(int last, int s, uint[] Nu, uint[] Nv, uint[] _p) -#endif - { - int sWords = s >> 5, sBits = s & 31; - - long cc__p = 0L; - long cc_Nu = 0L; - - if (sBits == 0) - { - for (int i = sWords; i <= last; ++i) - { - cc_Nu += Nu[i]; - cc_Nu -= _p[i - sWords]; - - cc__p += _p[i]; - cc__p -= Nv[i - sWords]; - _p[i] = (uint)cc__p; cc__p >>= 32; - - cc_Nu -= _p[i - sWords]; - Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; - } - } - else - { - uint prev_p = 0U; - uint prev_q = 0U; - uint prev_v = 0U; - - for (int i = sWords; i <= last; ++i) - { - uint next_p = _p[i - sWords]; - uint p_s = (next_p << sBits) | (prev_p >> -sBits); - prev_p = next_p; - - cc_Nu += Nu[i]; - cc_Nu -= p_s; - - uint next_v = Nv[i - sWords]; - uint v_s = (next_v << sBits) | (prev_v >> -sBits); - prev_v = next_v; - - cc__p += _p[i]; - cc__p -= v_s; - _p[i] = (uint)cc__p; cc__p >>= 32; - - uint next_q = _p[i - sWords]; - uint q_s = (next_q << sBits) | (prev_q >> -sBits); - prev_q = next_q; - - cc_Nu -= q_s; - Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; - } - } - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void SubShifted_UV(int last, int s, Span<uint> u0, Span<uint> u1, ReadOnlySpan<uint> v0, - ReadOnlySpan<uint> v1) -#else - private static void SubShifted_UV(int last, int s, uint[] u0, uint[] u1, uint[] v0, uint[] v1) -#endif - { - int sWords = s >> 5, sBits = s & 31; - - long cc_u0 = 0L; - long cc_u1 = 0L; - - if (sBits == 0) - { - for (int i = sWords; i <= last; ++i) - { - cc_u0 += u0[i]; - cc_u1 += u1[i]; - cc_u0 -= v0[i - sWords]; - cc_u1 -= v1[i - sWords]; - u0[i] = (uint)cc_u0; cc_u0 >>= 32; - u1[i] = (uint)cc_u1; cc_u1 >>= 32; - } - } - else - { - uint prev_v0 = 0U; - uint prev_v1 = 0U; - - for (int i = sWords; i <= last; ++i) - { - uint next_v0 = v0[i - sWords]; - uint next_v1 = v1[i - sWords]; - uint v0_s = (next_v0 << sBits) | (prev_v0 >> -sBits); - uint v1_s = (next_v1 << sBits) | (prev_v1 >> -sBits); - prev_v0 = next_v0; - prev_v1 = next_v1; - - cc_u0 += u0[i]; - cc_u1 += u1[i]; - cc_u0 -= v0_s; - cc_u1 -= v1_s; - u0[i] = (uint)cc_u0; cc_u0 >>= 32; - u1[i] = (uint)cc_u1; cc_u1 >>= 32; - } - } - } - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void Swap(ref Span<uint> x, ref Span<uint> y) -#else - private static void Swap(ref uint[] x, ref uint[] y) -#endif - { - var t = x; x = y; y = t; - } } } diff --git a/crypto/src/math/ec/rfc8032/Scalar448.cs b/crypto/src/math/ec/rfc8032/Scalar448.cs new file mode 100644 index 000000000..e17f48a99 --- /dev/null +++ b/crypto/src/math/ec/rfc8032/Scalar448.cs @@ -0,0 +1,819 @@ +using System; +using System.Diagnostics; +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#endif + +using Org.BouncyCastle.Crypto.Utilities; +using Org.BouncyCastle.Math.Raw; +using Org.BouncyCastle.Utilities; + +namespace Org.BouncyCastle.Math.EC.Rfc8032 +{ + internal static class Scalar448 + { + internal const int Size = 14; + + internal const int ScalarBytes = Size * 4 + 1; + + private const ulong M26UL = 0x03FFFFFFUL; + private const ulong M28UL = 0x0FFFFFFFUL; + + private const int TargetLength = 447; + + private static readonly uint[] L = { 0xAB5844F3U, 0x2378C292U, 0x8DC58F55U, 0x216CC272U, 0xAED63690U, + 0xC44EDB49U, 0x7CCA23E9U, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0x3FFFFFFFU }; + private static readonly uint[] LSq = { 0x1BA1FEA9U, 0xC1ADFBB8U, 0x49E0A8B2U, 0xB91BF537U, 0xE764D815U, + 0x4525492BU, 0xA2B8716DU, 0x4AE17CF6U, 0xBA3C47C4U, 0xF1A9CC14U, 0x7E4D070AU, 0x92052BCBU, 0x9F823B72U, + 0xC3402A93U, 0x55AC2279U, 0x91BC6149U, 0x46E2C7AAU, 0x10B66139U, 0xD76B1B48U, 0xE2276DA4U, 0xBE6511F4U, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x0FFFFFFFU }; + + private const int L_0 = 0x04A7BB0D; // L_0:26/24 + private const int L_1 = 0x0873D6D5; // L_1:27/23 + private const int L_2 = 0x0A70AADC; // L_2:27/26 + private const int L_3 = 0x03D8D723; // L_3:26/-- + private const int L_4 = 0x096FDE93; // L_4:27/25 + private const int L_5 = 0x0B65129C; // L_5:27/26 + private const int L_6 = 0x063BB124; // L_6:27/-- + private const int L_7 = 0x08335DC1; // L_7:27/22 + + private const int L4_0 = 0x029EEC34; // L4_0:25/24 + private const int L4_1 = 0x01CF5B55; // L4_1:25/-- + private const int L4_2 = 0x09C2AB72; // L4_2:27/25 + private const int L4_3 = 0x0F635C8E; // L4_3:28/-- + private const int L4_4 = 0x05BF7A4C; // L4_4:26/25 + private const int L4_5 = 0x0D944A72; // L4_5:28/-- + private const int L4_6 = 0x08EEC492; // L4_6:27/24 + private const int L4_7 = 0x20CD7705; // L4_7:29/24 + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static bool CheckVar(ReadOnlySpan<byte> s, Span<uint> n) + { + if (s[ScalarBytes - 1] != 0x00) + return false; + + Decode(s, n); + return !Nat.Gte(Size, n, L); + } +#else + internal static bool CheckVar(byte[] s, uint[] n) + { + if (s[ScalarBytes - 1] != 0x00) + return false; + + Decode(s, n); + return !Nat.Gte(Size, n, L); + } +#endif + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void Decode(ReadOnlySpan<byte> k, Span<uint> n) + { + Debug.Assert(k[ScalarBytes - 1] == 0x00); + + Codec.Decode32(k, n[..Size]); + } +#else + internal static void Decode(byte[] k, uint[] n) + { + Debug.Assert(k[ScalarBytes - 1] == 0x00); + + Codec.Decode32(k, 0, n, 0, Size); + } +#endif + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void GetOrderWnafVar(int width, Span<sbyte> ws) +#else + internal static void GetOrderWnafVar(int width, sbyte[] ws) +#endif + { + Wnaf.GetSignedVar(L, width, ws); + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void Multiply225Var(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y225, Span<uint> z) + { + Span<uint> tt = stackalloc uint[29]; + Nat.Mul(y225, x, tt); + + if ((int)y225[7] < 0) + { + Nat.AddTo(14, L, tt[8..]); + Nat.SubFrom(14, x, tt[8..]); + } + + Span<byte> r = MemoryMarshal.AsBytes(tt); + Reduce(r, r); + tt[..14].CopyTo(z); + } +#else + internal static void Multiply225Var(uint[] x, uint[] y225, uint[] z) + { + uint[] tt = new uint[22]; + Nat.Mul(y225, 0, 8, x, 0, 14, tt, 0); + + if ((int)y225[7] < 0) + { + Nat.AddTo(14, L, 0, tt, 8); + Nat.SubFrom(14, x, 0, tt, 8); + } + + byte[] bytes = new byte[114]; + Codec.Encode32(tt, 0, 22, bytes, 0); + + byte[] r = Reduce(bytes); + Decode(r, z); + } +#endif + + internal static byte[] Reduce(byte[] n) + { + byte[] r = new byte[ScalarBytes]; + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Reduce(n, r); +#else + ulong x00 = Codec.Decode32(n, 0); // x00:32/-- + ulong x01 = (Codec.Decode24(n, 4) << 4); // x01:28/-- + ulong x02 = Codec.Decode32(n, 7); // x02:32/-- + ulong x03 = (Codec.Decode24(n, 11) << 4); // x03:28/-- + ulong x04 = Codec.Decode32(n, 14); // x04:32/-- + ulong x05 = (Codec.Decode24(n, 18) << 4); // x05:28/-- + ulong x06 = Codec.Decode32(n, 21); // x06:32/-- + ulong x07 = (Codec.Decode24(n, 25) << 4); // x07:28/-- + ulong x08 = Codec.Decode32(n, 28); // x08:32/-- + ulong x09 = (Codec.Decode24(n, 32) << 4); // x09:28/-- + ulong x10 = Codec.Decode32(n, 35); // x10:32/-- + ulong x11 = (Codec.Decode24(n, 39) << 4); // x11:28/-- + ulong x12 = Codec.Decode32(n, 42); // x12:32/-- + ulong x13 = (Codec.Decode24(n, 46) << 4); // x13:28/-- + ulong x14 = Codec.Decode32(n, 49); // x14:32/-- + ulong x15 = (Codec.Decode24(n, 53) << 4); // x15:28/-- + ulong x16 = Codec.Decode32(n, 56); // x16:32/-- + ulong x17 = (Codec.Decode24(n, 60) << 4); // x17:28/-- + ulong x18 = Codec.Decode32(n, 63); // x18:32/-- + ulong x19 = (Codec.Decode24(n, 67) << 4); // x19:28/-- + ulong x20 = Codec.Decode32(n, 70); // x20:32/-- + ulong x21 = (Codec.Decode24(n, 74) << 4); // x21:28/-- + ulong x22 = Codec.Decode32(n, 77); // x22:32/-- + ulong x23 = (Codec.Decode24(n, 81) << 4); // x23:28/-- + ulong x24 = Codec.Decode32(n, 84); // x24:32/-- + ulong x25 = (Codec.Decode24(n, 88) << 4); // x25:28/-- + ulong x26 = Codec.Decode32(n, 91); // x26:32/-- + ulong x27 = (Codec.Decode24(n, 95) << 4); // x27:28/-- + ulong x28 = Codec.Decode32(n, 98); // x28:32/-- + ulong x29 = (Codec.Decode24(n, 102) << 4); // x29:28/-- + ulong x30 = Codec.Decode32(n, 105); // x30:32/-- + ulong x31 = (Codec.Decode24(n, 109) << 4); // x31:28/-- + ulong x32 = Codec.Decode16(n, 112); // x32:16/-- + + //x32 += (x31 >> 28); x31 &= M28UL; + x16 += x32 * L4_0; // x16:42/-- + x17 += x32 * L4_1; // x17:41/28 + x18 += x32 * L4_2; // x18:43/42 + x19 += x32 * L4_3; // x19:44/28 + x20 += x32 * L4_4; // x20:43/-- + x21 += x32 * L4_5; // x21:44/28 + x22 += x32 * L4_6; // x22:43/41 + x23 += x32 * L4_7; // x23:45/41 + + x31 += (x30 >> 28); x30 &= M28UL; // x31:28/--, x30:28/-- + x15 += x31 * L4_0; // x15:54/-- + x16 += x31 * L4_1; // x16:53/42 + x17 += x31 * L4_2; // x17:55/54 + x18 += x31 * L4_3; // x18:56/44 + x19 += x31 * L4_4; // x19:55/-- + x20 += x31 * L4_5; // x20:56/43 + x21 += x31 * L4_6; // x21:55/53 + x22 += x31 * L4_7; // x22:57/53 + + //x30 += (x29 >> 28); x29 &= M28UL; + x14 += x30 * L4_0; // x14:54/-- + x15 += x30 * L4_1; // x15:54/53 + x16 += x30 * L4_2; // x16:56/-- + x17 += x30 * L4_3; // x17:57/-- + x18 += x30 * L4_4; // x18:56/55 + x19 += x30 * L4_5; // x19:56/55 + x20 += x30 * L4_6; // x20:57/-- + x21 += x30 * L4_7; // x21:57/56 + + x29 += (x28 >> 28); x28 &= M28UL; // x29:28/--, x28:28/-- + x13 += x29 * L4_0; // x13:54/-- + x14 += x29 * L4_1; // x14:54/53 + x15 += x29 * L4_2; // x15:56/-- + x16 += x29 * L4_3; // x16:57/-- + x17 += x29 * L4_4; // x17:57/55 + x18 += x29 * L4_5; // x18:57/55 + x19 += x29 * L4_6; // x19:57/52 + x20 += x29 * L4_7; // x20:58/52 + + //x28 += (x27 >> 28); x27 &= M28UL; + x12 += x28 * L4_0; // x12:54/-- + x13 += x28 * L4_1; // x13:54/53 + x14 += x28 * L4_2; // x14:56/-- + x15 += x28 * L4_3; // x15:57/-- + x16 += x28 * L4_4; // x16:57/55 + x17 += x28 * L4_5; // x17:58/-- + x18 += x28 * L4_6; // x18:58/-- + x19 += x28 * L4_7; // x19:58/53 + + x27 += (x26 >> 28); x26 &= M28UL; // x27:28/--, x26:28/-- + x11 += x27 * L4_0; // x11:54/-- + x12 += x27 * L4_1; // x12:54/53 + x13 += x27 * L4_2; // x13:56/-- + x14 += x27 * L4_3; // x14:57/-- + x15 += x27 * L4_4; // x15:57/55 + x16 += x27 * L4_5; // x16:58/-- + x17 += x27 * L4_6; // x17:58/56 + x18 += x27 * L4_7; // x18:59/-- + + //x26 += (x25 >> 28); x25 &= M28UL; + x10 += x26 * L4_0; // x10:54/-- + x11 += x26 * L4_1; // x11:54/53 + x12 += x26 * L4_2; // x12:56/-- + x13 += x26 * L4_3; // x13:57/-- + x14 += x26 * L4_4; // x14:57/55 + x15 += x26 * L4_5; // x15:58/-- + x16 += x26 * L4_6; // x16:58/56 + x17 += x26 * L4_7; // x17:59/-- + + x25 += (x24 >> 28); x24 &= M28UL; // x25:28/--, x24:28/-- + x09 += x25 * L4_0; // x09:54/-- + x10 += x25 * L4_1; // x10:54/53 + x11 += x25 * L4_2; // x11:56/-- + x12 += x25 * L4_3; // x12:57/-- + x13 += x25 * L4_4; // x13:57/55 + x14 += x25 * L4_5; // x14:58/-- + x15 += x25 * L4_6; // x15:58/56 + x16 += x25 * L4_7; // x16:59/-- + + x21 += (x20 >> 28); x20 &= M28UL; // x21:58/--, x20:28/-- + x22 += (x21 >> 28); x21 &= M28UL; // x22:57/54, x21:28/-- + x23 += (x22 >> 28); x22 &= M28UL; // x23:45/42, x22:28/-- + x24 += (x23 >> 28); x23 &= M28UL; // x24:28/18, x23:28/-- + + x08 += x24 * L4_0; // x08:54/-- + x09 += x24 * L4_1; // x09:55/-- + x10 += x24 * L4_2; // x10:56/46 + x11 += x24 * L4_3; // x11:57/46 + x12 += x24 * L4_4; // x12:57/55 + x13 += x24 * L4_5; // x13:58/-- + x14 += x24 * L4_6; // x14:58/56 + x15 += x24 * L4_7; // x15:59/-- + + x07 += x23 * L4_0; // x07:54/-- + x08 += x23 * L4_1; // x08:54/53 + x09 += x23 * L4_2; // x09:56/53 + x10 += x23 * L4_3; // x10:57/46 + x11 += x23 * L4_4; // x11:57/55 + x12 += x23 * L4_5; // x12:58/-- + x13 += x23 * L4_6; // x13:58/56 + x14 += x23 * L4_7; // x14:59/-- + + x06 += x22 * L4_0; // x06:54/-- + x07 += x22 * L4_1; // x07:54/53 + x08 += x22 * L4_2; // x08:56/-- + x09 += x22 * L4_3; // x09:57/53 + x10 += x22 * L4_4; // x10:57/55 + x11 += x22 * L4_5; // x11:58/-- + x12 += x22 * L4_6; // x12:58/56 + x13 += x22 * L4_7; // x13:59/-- + + x18 += (x17 >> 28); x17 &= M28UL; // x18:59/31, x17:28/-- + x19 += (x18 >> 28); x18 &= M28UL; // x19:58/54, x18:28/-- + x20 += (x19 >> 28); x19 &= M28UL; // x20:30/29, x19:28/-- + x21 += (x20 >> 28); x20 &= M28UL; // x21:28/03, x20:28/-- + + x05 += x21 * L4_0; // x05:54/-- + x06 += x21 * L4_1; // x06:55/-- + x07 += x21 * L4_2; // x07:56/31 + x08 += x21 * L4_3; // x08:57/31 + x09 += x21 * L4_4; // x09:57/56 + x10 += x21 * L4_5; // x10:58/-- + x11 += x21 * L4_6; // x11:58/56 + x12 += x21 * L4_7; // x12:59/-- + + x04 += x20 * L4_0; // x04:54/-- + x05 += x20 * L4_1; // x05:54/53 + x06 += x20 * L4_2; // x06:56/53 + x07 += x20 * L4_3; // x07:57/31 + x08 += x20 * L4_4; // x08:57/55 + x09 += x20 * L4_5; // x09:58/-- + x10 += x20 * L4_6; // x10:58/56 + x11 += x20 * L4_7; // x11:59/-- + + x03 += x19 * L4_0; // x03:54/-- + x04 += x19 * L4_1; // x04:54/53 + x05 += x19 * L4_2; // x05:56/-- + x06 += x19 * L4_3; // x06:57/53 + x07 += x19 * L4_4; // x07:57/55 + x08 += x19 * L4_5; // x08:58/-- + x09 += x19 * L4_6; // x09:58/56 + x10 += x19 * L4_7; // x10:59/-- + + x15 += (x14 >> 28); x14 &= M28UL; // x15:59/31, x14:28/-- + x16 += (x15 >> 28); x15 &= M28UL; // x16:59/32, x15:28/-- + x17 += (x16 >> 28); x16 &= M28UL; // x17:31/29, x16:28/-- + x18 += (x17 >> 28); x17 &= M28UL; // x18:28/04, x17:28/-- + + x02 += x18 * L4_0; // x02:54/-- + x03 += x18 * L4_1; // x03:55/-- + x04 += x18 * L4_2; // x04:56/32 + x05 += x18 * L4_3; // x05:57/32 + x06 += x18 * L4_4; // x06:57/56 + x07 += x18 * L4_5; // x07:58/-- + x08 += x18 * L4_6; // x08:58/56 + x09 += x18 * L4_7; // x09:59/-- + + x01 += x17 * L4_0; // x01:54/-- + x02 += x17 * L4_1; // x02:54/53 + x03 += x17 * L4_2; // x03:56/53 + x04 += x17 * L4_3; // x04:57/32 + x05 += x17 * L4_4; // x05:57/55 + x06 += x17 * L4_5; // x06:58/-- + x07 += x17 * L4_6; // x07:58/56 + x08 += x17 * L4_7; // x08:59/-- + + x16 *= 4; + x16 += (x15 >> 26); x15 &= M26UL; + x16 += 1; // x16:30/01 + + x00 += x16 * L_0; + x01 += x16 * L_1; + x02 += x16 * L_2; + x03 += x16 * L_3; + x04 += x16 * L_4; + x05 += x16 * L_5; + x06 += x16 * L_6; + x07 += x16 * L_7; + + x01 += (x00 >> 28); x00 &= M28UL; + x02 += (x01 >> 28); x01 &= M28UL; + x03 += (x02 >> 28); x02 &= M28UL; + x04 += (x03 >> 28); x03 &= M28UL; + x05 += (x04 >> 28); x04 &= M28UL; + x06 += (x05 >> 28); x05 &= M28UL; + x07 += (x06 >> 28); x06 &= M28UL; + x08 += (x07 >> 28); x07 &= M28UL; + x09 += (x08 >> 28); x08 &= M28UL; + x10 += (x09 >> 28); x09 &= M28UL; + x11 += (x10 >> 28); x10 &= M28UL; + x12 += (x11 >> 28); x11 &= M28UL; + x13 += (x12 >> 28); x12 &= M28UL; + x14 += (x13 >> 28); x13 &= M28UL; + x15 += (x14 >> 28); x14 &= M28UL; + x16 = (x15 >> 26); x15 &= M26UL; + + x16 -= 1; + + Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue); + + x00 -= x16 & L_0; + x01 -= x16 & L_1; + x02 -= x16 & L_2; + x03 -= x16 & L_3; + x04 -= x16 & L_4; + x05 -= x16 & L_5; + x06 -= x16 & L_6; + x07 -= x16 & L_7; + + x01 += (ulong)((long)x00 >> 28); x00 &= M28UL; + x02 += (ulong)((long)x01 >> 28); x01 &= M28UL; + x03 += (ulong)((long)x02 >> 28); x02 &= M28UL; + x04 += (ulong)((long)x03 >> 28); x03 &= M28UL; + x05 += (ulong)((long)x04 >> 28); x04 &= M28UL; + x06 += (ulong)((long)x05 >> 28); x05 &= M28UL; + x07 += (ulong)((long)x06 >> 28); x06 &= M28UL; + x08 += (ulong)((long)x07 >> 28); x07 &= M28UL; + x09 += (ulong)((long)x08 >> 28); x08 &= M28UL; + x10 += (ulong)((long)x09 >> 28); x09 &= M28UL; + x11 += (ulong)((long)x10 >> 28); x10 &= M28UL; + x12 += (ulong)((long)x11 >> 28); x11 &= M28UL; + x13 += (ulong)((long)x12 >> 28); x12 &= M28UL; + x14 += (ulong)((long)x13 >> 28); x13 &= M28UL; + x15 += (ulong)((long)x14 >> 28); x14 &= M28UL; + + Debug.Assert(x15 >> 26 == 0UL); + + Codec.Encode56(x00 | (x01 << 28), r, 0); + Codec.Encode56(x02 | (x03 << 28), r, 7); + Codec.Encode56(x04 | (x05 << 28), r, 14); + Codec.Encode56(x06 | (x07 << 28), r, 21); + Codec.Encode56(x08 | (x09 << 28), r, 28); + Codec.Encode56(x10 | (x11 << 28), r, 35); + Codec.Encode56(x12 | (x13 << 28), r, 42); + Codec.Encode56(x14 | (x15 << 28), r, 49); + //r[ScalarBytes - 1] = 0; +#endif + + return r; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r) + { + ulong x00 = Codec.Decode32(n[ 0..]); // x00:32/-- + ulong x01 = (Codec.Decode24(n[ 4..]) << 4); // x01:28/-- + ulong x02 = Codec.Decode32(n[ 7..]); // x02:32/-- + ulong x03 = (Codec.Decode24(n[ 11..]) << 4); // x03:28/-- + ulong x04 = Codec.Decode32(n[ 14..]); // x04:32/-- + ulong x05 = (Codec.Decode24(n[ 18..]) << 4); // x05:28/-- + ulong x06 = Codec.Decode32(n[ 21..]); // x06:32/-- + ulong x07 = (Codec.Decode24(n[ 25..]) << 4); // x07:28/-- + ulong x08 = Codec.Decode32(n[ 28..]); // x08:32/-- + ulong x09 = (Codec.Decode24(n[ 32..]) << 4); // x09:28/-- + ulong x10 = Codec.Decode32(n[ 35..]); // x10:32/-- + ulong x11 = (Codec.Decode24(n[ 39..]) << 4); // x11:28/-- + ulong x12 = Codec.Decode32(n[ 42..]); // x12:32/-- + ulong x13 = (Codec.Decode24(n[ 46..]) << 4); // x13:28/-- + ulong x14 = Codec.Decode32(n[ 49..]); // x14:32/-- + ulong x15 = (Codec.Decode24(n[ 53..]) << 4); // x15:28/-- + ulong x16 = Codec.Decode32(n[ 56..]); // x16:32/-- + ulong x17 = (Codec.Decode24(n[ 60..]) << 4); // x17:28/-- + ulong x18 = Codec.Decode32(n[ 63..]); // x18:32/-- + ulong x19 = (Codec.Decode24(n[ 67..]) << 4); // x19:28/-- + ulong x20 = Codec.Decode32(n[ 70..]); // x20:32/-- + ulong x21 = (Codec.Decode24(n[ 74..]) << 4); // x21:28/-- + ulong x22 = Codec.Decode32(n[ 77..]); // x22:32/-- + ulong x23 = (Codec.Decode24(n[ 81..]) << 4); // x23:28/-- + ulong x24 = Codec.Decode32(n[ 84..]); // x24:32/-- + ulong x25 = (Codec.Decode24(n[ 88..]) << 4); // x25:28/-- + ulong x26 = Codec.Decode32(n[ 91..]); // x26:32/-- + ulong x27 = (Codec.Decode24(n[ 95..]) << 4); // x27:28/-- + ulong x28 = Codec.Decode32(n[ 98..]); // x28:32/-- + ulong x29 = (Codec.Decode24(n[102..]) << 4); // x29:28/-- + ulong x30 = Codec.Decode32(n[105..]); // x30:32/-- + ulong x31 = (Codec.Decode24(n[109..]) << 4); // x31:28/-- + ulong x32 = Codec.Decode16(n[112..]); // x32:16/-- + + //x32 += (x31 >> 28); x31 &= M28UL; + x16 += x32 * L4_0; // x16:42/-- + x17 += x32 * L4_1; // x17:41/28 + x18 += x32 * L4_2; // x18:43/42 + x19 += x32 * L4_3; // x19:44/28 + x20 += x32 * L4_4; // x20:43/-- + x21 += x32 * L4_5; // x21:44/28 + x22 += x32 * L4_6; // x22:43/41 + x23 += x32 * L4_7; // x23:45/41 + + x31 += (x30 >> 28); x30 &= M28UL; // x31:28/--, x30:28/-- + x15 += x31 * L4_0; // x15:54/-- + x16 += x31 * L4_1; // x16:53/42 + x17 += x31 * L4_2; // x17:55/54 + x18 += x31 * L4_3; // x18:56/44 + x19 += x31 * L4_4; // x19:55/-- + x20 += x31 * L4_5; // x20:56/43 + x21 += x31 * L4_6; // x21:55/53 + x22 += x31 * L4_7; // x22:57/53 + + //x30 += (x29 >> 28); x29 &= M28UL; + x14 += x30 * L4_0; // x14:54/-- + x15 += x30 * L4_1; // x15:54/53 + x16 += x30 * L4_2; // x16:56/-- + x17 += x30 * L4_3; // x17:57/-- + x18 += x30 * L4_4; // x18:56/55 + x19 += x30 * L4_5; // x19:56/55 + x20 += x30 * L4_6; // x20:57/-- + x21 += x30 * L4_7; // x21:57/56 + + x29 += (x28 >> 28); x28 &= M28UL; // x29:28/--, x28:28/-- + x13 += x29 * L4_0; // x13:54/-- + x14 += x29 * L4_1; // x14:54/53 + x15 += x29 * L4_2; // x15:56/-- + x16 += x29 * L4_3; // x16:57/-- + x17 += x29 * L4_4; // x17:57/55 + x18 += x29 * L4_5; // x18:57/55 + x19 += x29 * L4_6; // x19:57/52 + x20 += x29 * L4_7; // x20:58/52 + + //x28 += (x27 >> 28); x27 &= M28UL; + x12 += x28 * L4_0; // x12:54/-- + x13 += x28 * L4_1; // x13:54/53 + x14 += x28 * L4_2; // x14:56/-- + x15 += x28 * L4_3; // x15:57/-- + x16 += x28 * L4_4; // x16:57/55 + x17 += x28 * L4_5; // x17:58/-- + x18 += x28 * L4_6; // x18:58/-- + x19 += x28 * L4_7; // x19:58/53 + + x27 += (x26 >> 28); x26 &= M28UL; // x27:28/--, x26:28/-- + x11 += x27 * L4_0; // x11:54/-- + x12 += x27 * L4_1; // x12:54/53 + x13 += x27 * L4_2; // x13:56/-- + x14 += x27 * L4_3; // x14:57/-- + x15 += x27 * L4_4; // x15:57/55 + x16 += x27 * L4_5; // x16:58/-- + x17 += x27 * L4_6; // x17:58/56 + x18 += x27 * L4_7; // x18:59/-- + + //x26 += (x25 >> 28); x25 &= M28UL; + x10 += x26 * L4_0; // x10:54/-- + x11 += x26 * L4_1; // x11:54/53 + x12 += x26 * L4_2; // x12:56/-- + x13 += x26 * L4_3; // x13:57/-- + x14 += x26 * L4_4; // x14:57/55 + x15 += x26 * L4_5; // x15:58/-- + x16 += x26 * L4_6; // x16:58/56 + x17 += x26 * L4_7; // x17:59/-- + + x25 += (x24 >> 28); x24 &= M28UL; // x25:28/--, x24:28/-- + x09 += x25 * L4_0; // x09:54/-- + x10 += x25 * L4_1; // x10:54/53 + x11 += x25 * L4_2; // x11:56/-- + x12 += x25 * L4_3; // x12:57/-- + x13 += x25 * L4_4; // x13:57/55 + x14 += x25 * L4_5; // x14:58/-- + x15 += x25 * L4_6; // x15:58/56 + x16 += x25 * L4_7; // x16:59/-- + + x21 += (x20 >> 28); x20 &= M28UL; // x21:58/--, x20:28/-- + x22 += (x21 >> 28); x21 &= M28UL; // x22:57/54, x21:28/-- + x23 += (x22 >> 28); x22 &= M28UL; // x23:45/42, x22:28/-- + x24 += (x23 >> 28); x23 &= M28UL; // x24:28/18, x23:28/-- + + x08 += x24 * L4_0; // x08:54/-- + x09 += x24 * L4_1; // x09:55/-- + x10 += x24 * L4_2; // x10:56/46 + x11 += x24 * L4_3; // x11:57/46 + x12 += x24 * L4_4; // x12:57/55 + x13 += x24 * L4_5; // x13:58/-- + x14 += x24 * L4_6; // x14:58/56 + x15 += x24 * L4_7; // x15:59/-- + + x07 += x23 * L4_0; // x07:54/-- + x08 += x23 * L4_1; // x08:54/53 + x09 += x23 * L4_2; // x09:56/53 + x10 += x23 * L4_3; // x10:57/46 + x11 += x23 * L4_4; // x11:57/55 + x12 += x23 * L4_5; // x12:58/-- + x13 += x23 * L4_6; // x13:58/56 + x14 += x23 * L4_7; // x14:59/-- + + x06 += x22 * L4_0; // x06:54/-- + x07 += x22 * L4_1; // x07:54/53 + x08 += x22 * L4_2; // x08:56/-- + x09 += x22 * L4_3; // x09:57/53 + x10 += x22 * L4_4; // x10:57/55 + x11 += x22 * L4_5; // x11:58/-- + x12 += x22 * L4_6; // x12:58/56 + x13 += x22 * L4_7; // x13:59/-- + + x18 += (x17 >> 28); x17 &= M28UL; // x18:59/31, x17:28/-- + x19 += (x18 >> 28); x18 &= M28UL; // x19:58/54, x18:28/-- + x20 += (x19 >> 28); x19 &= M28UL; // x20:30/29, x19:28/-- + x21 += (x20 >> 28); x20 &= M28UL; // x21:28/03, x20:28/-- + + x05 += x21 * L4_0; // x05:54/-- + x06 += x21 * L4_1; // x06:55/-- + x07 += x21 * L4_2; // x07:56/31 + x08 += x21 * L4_3; // x08:57/31 + x09 += x21 * L4_4; // x09:57/56 + x10 += x21 * L4_5; // x10:58/-- + x11 += x21 * L4_6; // x11:58/56 + x12 += x21 * L4_7; // x12:59/-- + + x04 += x20 * L4_0; // x04:54/-- + x05 += x20 * L4_1; // x05:54/53 + x06 += x20 * L4_2; // x06:56/53 + x07 += x20 * L4_3; // x07:57/31 + x08 += x20 * L4_4; // x08:57/55 + x09 += x20 * L4_5; // x09:58/-- + x10 += x20 * L4_6; // x10:58/56 + x11 += x20 * L4_7; // x11:59/-- + + x03 += x19 * L4_0; // x03:54/-- + x04 += x19 * L4_1; // x04:54/53 + x05 += x19 * L4_2; // x05:56/-- + x06 += x19 * L4_3; // x06:57/53 + x07 += x19 * L4_4; // x07:57/55 + x08 += x19 * L4_5; // x08:58/-- + x09 += x19 * L4_6; // x09:58/56 + x10 += x19 * L4_7; // x10:59/-- + + x15 += (x14 >> 28); x14 &= M28UL; // x15:59/31, x14:28/-- + x16 += (x15 >> 28); x15 &= M28UL; // x16:59/32, x15:28/-- + x17 += (x16 >> 28); x16 &= M28UL; // x17:31/29, x16:28/-- + x18 += (x17 >> 28); x17 &= M28UL; // x18:28/04, x17:28/-- + + x02 += x18 * L4_0; // x02:54/-- + x03 += x18 * L4_1; // x03:55/-- + x04 += x18 * L4_2; // x04:56/32 + x05 += x18 * L4_3; // x05:57/32 + x06 += x18 * L4_4; // x06:57/56 + x07 += x18 * L4_5; // x07:58/-- + x08 += x18 * L4_6; // x08:58/56 + x09 += x18 * L4_7; // x09:59/-- + + x01 += x17 * L4_0; // x01:54/-- + x02 += x17 * L4_1; // x02:54/53 + x03 += x17 * L4_2; // x03:56/53 + x04 += x17 * L4_3; // x04:57/32 + x05 += x17 * L4_4; // x05:57/55 + x06 += x17 * L4_5; // x06:58/-- + x07 += x17 * L4_6; // x07:58/56 + x08 += x17 * L4_7; // x08:59/-- + + x16 *= 4; + x16 += (x15 >> 26); x15 &= M26UL; + x16 += 1; // x16:30/01 + + x00 += x16 * L_0; + x01 += x16 * L_1; + x02 += x16 * L_2; + x03 += x16 * L_3; + x04 += x16 * L_4; + x05 += x16 * L_5; + x06 += x16 * L_6; + x07 += x16 * L_7; + + x01 += (x00 >> 28); x00 &= M28UL; + x02 += (x01 >> 28); x01 &= M28UL; + x03 += (x02 >> 28); x02 &= M28UL; + x04 += (x03 >> 28); x03 &= M28UL; + x05 += (x04 >> 28); x04 &= M28UL; + x06 += (x05 >> 28); x05 &= M28UL; + x07 += (x06 >> 28); x06 &= M28UL; + x08 += (x07 >> 28); x07 &= M28UL; + x09 += (x08 >> 28); x08 &= M28UL; + x10 += (x09 >> 28); x09 &= M28UL; + x11 += (x10 >> 28); x10 &= M28UL; + x12 += (x11 >> 28); x11 &= M28UL; + x13 += (x12 >> 28); x12 &= M28UL; + x14 += (x13 >> 28); x13 &= M28UL; + x15 += (x14 >> 28); x14 &= M28UL; + x16 = (x15 >> 26); x15 &= M26UL; + + x16 -= 1; + + Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue); + + x00 -= x16 & L_0; + x01 -= x16 & L_1; + x02 -= x16 & L_2; + x03 -= x16 & L_3; + x04 -= x16 & L_4; + x05 -= x16 & L_5; + x06 -= x16 & L_6; + x07 -= x16 & L_7; + + x01 += (ulong)((long)x00 >> 28); x00 &= M28UL; + x02 += (ulong)((long)x01 >> 28); x01 &= M28UL; + x03 += (ulong)((long)x02 >> 28); x02 &= M28UL; + x04 += (ulong)((long)x03 >> 28); x03 &= M28UL; + x05 += (ulong)((long)x04 >> 28); x04 &= M28UL; + x06 += (ulong)((long)x05 >> 28); x05 &= M28UL; + x07 += (ulong)((long)x06 >> 28); x06 &= M28UL; + x08 += (ulong)((long)x07 >> 28); x07 &= M28UL; + x09 += (ulong)((long)x08 >> 28); x08 &= M28UL; + x10 += (ulong)((long)x09 >> 28); x09 &= M28UL; + x11 += (ulong)((long)x10 >> 28); x10 &= M28UL; + x12 += (ulong)((long)x11 >> 28); x11 &= M28UL; + x13 += (ulong)((long)x12 >> 28); x12 &= M28UL; + x14 += (ulong)((long)x13 >> 28); x13 &= M28UL; + x15 += (ulong)((long)x14 >> 28); x14 &= M28UL; + + Debug.Assert(x15 >> 26 == 0UL); + + Codec.Encode56(x00 | (x01 << 28), r); + Codec.Encode56(x02 | (x03 << 28), r[7..]); + Codec.Encode56(x04 | (x05 << 28), r[14..]); + Codec.Encode56(x06 | (x07 << 28), r[21..]); + Codec.Encode56(x08 | (x09 << 28), r[28..]); + Codec.Encode56(x10 | (x11 << 28), r[35..]); + Codec.Encode56(x12 | (x13 << 28), r[42..]); + Codec.Encode56(x14 | (x15 << 28), r[49..]); + r[ScalarBytes - 1] = 0; + } +#endif + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void ReduceBasisVar(ReadOnlySpan<uint> k, Span<uint> z0, Span<uint> z1) + { + /* + * Split scalar k into two half-size scalars z0 and z1, such that z1 * k == z0 mod L. + * + * See https://ia.cr/2020/454 (Pornin). + */ + + Span<uint> Nu = stackalloc uint[28]; LSq.CopyTo(Nu); + Span<uint> Nv = stackalloc uint[28]; Nat.Square(14, k, Nv); Nat.AddWordTo(28, 1U, Nv); + Span<uint> p = stackalloc uint[28]; Nat.Mul(14, L, k, p); + Span<uint> u0 = stackalloc uint[8]; u0.CopyFrom(L); + Span<uint> u1 = stackalloc uint[8]; + Span<uint> v0 = stackalloc uint[8]; v0.CopyFrom(k); + Span<uint> v1 = stackalloc uint[8]; v1[0] = 1U; + + int last = 27; + int len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); + + while (len_Nv > TargetLength) + { + int len_p = ScalarUtilities.GetBitLength(last, p); + int s = len_p - len_Nv; + s &= ~(s >> 31); + + if ((int)p[last] < 0) + { + ScalarUtilities.AddShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.AddShifted_UV(last: 7, s, u0, u1, v0, v1); + } + else + { + ScalarUtilities.SubShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.SubShifted_UV(last: 7, s, u0, u1, v0, v1); + } + + if (ScalarUtilities.LessThan(last, Nu, Nv)) + { + ScalarUtilities.Swap(ref u0, ref v0); + ScalarUtilities.Swap(ref u1, ref v1); + ScalarUtilities.Swap(ref Nu, ref Nv); + + last = len_Nv >> 5; + len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); + } + } + + Debug.Assert((int)v0[7] >> 31 == (int)v0[7] >> 1); + Debug.Assert((int)v1[7] >> 31 == (int)v1[7] >> 1); + + // v1 * k == v0 mod L + v0.CopyTo(z0); + v1.CopyTo(z1); + } +#else + internal static void ReduceBasisVar(uint[] k, uint[] z0, uint[] z1) + { + /* + * Split scalar k into two half-size scalars z0 and z1, such that z1 * k == z0 mod L. + * + * See https://ia.cr/2020/454 (Pornin). + */ + + uint[] Nu = new uint[28]; Array.Copy(LSq, Nu, 28); + uint[] Nv = new uint[28]; Nat.Square(14, k, Nv); Nat.AddWordTo(28, 1U, Nv); + uint[] p = new uint[28]; Nat.Mul(14, L, k, p); + uint[] u0 = new uint[8]; Array.Copy(L, u0, 8); + uint[] u1 = new uint[8]; + uint[] v0 = new uint[8]; Array.Copy(k, v0, 8); + uint[] v1 = new uint[8]; v1[0] = 1U; + + int last = 27; + int len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); + + while (len_Nv > TargetLength) + { + int len_p = ScalarUtilities.GetBitLength(last, p); + int s = len_p - len_Nv; + s &= ~(s >> 31); + + if ((int)p[last] < 0) + { + ScalarUtilities.AddShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.AddShifted_UV(last: 7, s, u0, u1, v0, v1); + } + else + { + ScalarUtilities.SubShifted_NP(last, s, Nu, Nv, p); + ScalarUtilities.SubShifted_UV(last: 7, s, u0, u1, v0, v1); + } + + if (ScalarUtilities.LessThan(last, Nu, Nv)) + { + ScalarUtilities.Swap(ref u0, ref v0); + ScalarUtilities.Swap(ref u1, ref v1); + ScalarUtilities.Swap(ref Nu, ref Nv); + + last = len_Nv >> 5; + len_Nv = ScalarUtilities.GetBitLengthPositive(last, Nv); + } + } + + Debug.Assert((int)v0[7] >> 31 == (int)v0[7] >> 1); + Debug.Assert((int)v1[7] >> 31 == (int)v1[7] >> 1); + + // v1 * k == v0 mod L + Array.Copy(v0, z0, 8); + Array.Copy(v1, z1, 8); + } +#endif + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void ToSignedDigits(int bits, ReadOnlySpan<uint> x, Span<uint> z) +#else + internal static void ToSignedDigits(int bits, uint[] x, uint[] z) +#endif + { + Debug.Assert(448 < bits && bits < 480); + Debug.Assert(z.Length > Size); + + z[Size] = (1U << (bits - 448)) + + Nat.CAdd(Size, ~(int)x[0] & 1, x, L, z); + uint c = Nat.ShiftDownBit(Size + 1, z, 0); + Debug.Assert(c == (1U << 31)); + } + } +} diff --git a/crypto/src/math/ec/rfc8032/ScalarUtilities.cs b/crypto/src/math/ec/rfc8032/ScalarUtilities.cs new file mode 100644 index 000000000..3407c65c7 --- /dev/null +++ b/crypto/src/math/ec/rfc8032/ScalarUtilities.cs @@ -0,0 +1,294 @@ +using System; +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +#endif + +using Org.BouncyCastle.Utilities; + +namespace Org.BouncyCastle.Math.EC.Rfc8032 +{ + internal static class ScalarUtilities + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void AddShifted_NP(int last, int s, Span<uint> Nu, ReadOnlySpan<uint> Nv, Span<uint> _p) +#else + internal static void AddShifted_NP(int last, int s, uint[] Nu, uint[] Nv, uint[] _p) +#endif + { + int sWords = s >> 5, sBits = s & 31; + + ulong cc__p = 0UL; + ulong cc_Nu = 0UL; + + if (sBits == 0) + { + for (int i = sWords; i <= last; ++i) + { + cc_Nu += Nu[i]; + cc_Nu += _p[i - sWords]; + + cc__p += _p[i]; + cc__p += Nv[i - sWords]; + _p[i] = (uint)cc__p; cc__p >>= 32; + + cc_Nu += _p[i - sWords]; + Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; + } + } + else + { + uint prev_p = 0U; + uint prev_q = 0U; + uint prev_v = 0U; + + for (int i = sWords; i <= last; ++i) + { + uint next_p = _p[i - sWords]; + uint p_s = (next_p << sBits) | (prev_p >> -sBits); + prev_p = next_p; + + cc_Nu += Nu[i]; + cc_Nu += p_s; + + uint next_v = Nv[i - sWords]; + uint v_s = (next_v << sBits) | (prev_v >> -sBits); + prev_v = next_v; + + cc__p += _p[i]; + cc__p += v_s; + _p[i] = (uint)cc__p; cc__p >>= 32; + + uint next_q = _p[i - sWords]; + uint q_s = (next_q << sBits) | (prev_q >> -sBits); + prev_q = next_q; + + cc_Nu += q_s; + Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; + } + } + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void AddShifted_UV(int last, int s, Span<uint> u0, Span<uint> u1, ReadOnlySpan<uint> v0, + ReadOnlySpan<uint> v1) +#else + internal static void AddShifted_UV(int last, int s, uint[] u0, uint[] u1, uint[] v0, uint[] v1) +#endif + { + int sWords = s >> 5, sBits = s & 31; + + ulong cc_u0 = 0UL; + ulong cc_u1 = 0UL; + + if (sBits == 0) + { + for (int i = sWords; i <= last; ++i) + { + cc_u0 += u0[i]; + cc_u1 += u1[i]; + cc_u0 += v0[i - sWords]; + cc_u1 += v1[i - sWords]; + u0[i] = (uint)cc_u0; cc_u0 >>= 32; + u1[i] = (uint)cc_u1; cc_u1 >>= 32; + } + } + else + { + uint prev_v0 = 0U; + uint prev_v1 = 0U; + + for (int i = sWords; i <= last; ++i) + { + uint next_v0 = v0[i - sWords]; + uint next_v1 = v1[i - sWords]; + uint v0_s = (next_v0 << sBits) | (prev_v0 >> -sBits); + uint v1_s = (next_v1 << sBits) | (prev_v1 >> -sBits); + prev_v0 = next_v0; + prev_v1 = next_v1; + + cc_u0 += u0[i]; + cc_u1 += u1[i]; + cc_u0 += v0_s; + cc_u1 += v1_s; + u0[i] = (uint)cc_u0; cc_u0 >>= 32; + u1[i] = (uint)cc_u1; cc_u1 >>= 32; + } + } + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static int GetBitLength(int last, ReadOnlySpan<uint> x) +#else + internal static int GetBitLength(int last, uint[] x) +#endif + { + int i = last; + uint sign = (uint)((int)x[i] >> 31); + while (i > 0 && x[i] == sign) + { + --i; + } + return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)(x[i] ^ sign)); + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static int GetBitLengthPositive(int last, ReadOnlySpan<uint> x) +#else + internal static int GetBitLengthPositive(int last, uint[] x) +#endif + { + int i = last; + while (i > 0 && x[i] == 0) + { + --i; + } + return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x[i]); + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool LessThan(int last, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y) +#else + internal static bool LessThan(int last, uint[] x, uint[] y) +#endif + { + int i = last; + if ((int)x[i] < (int)y[i]) + return true; + if ((int)x[i] > (int)y[i]) + return false; + while (--i >= 0) + { + if (x[i] < y[i]) + return true; + if (x[i] > y[i]) + return false; + } + return false; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void SubShifted_NP(int last, int s, Span<uint> Nu, ReadOnlySpan<uint> Nv, Span<uint> _p) +#else + internal static void SubShifted_NP(int last, int s, uint[] Nu, uint[] Nv, uint[] _p) +#endif + { + int sWords = s >> 5, sBits = s & 31; + + long cc__p = 0L; + long cc_Nu = 0L; + + if (sBits == 0) + { + for (int i = sWords; i <= last; ++i) + { + cc_Nu += Nu[i]; + cc_Nu -= _p[i - sWords]; + + cc__p += _p[i]; + cc__p -= Nv[i - sWords]; + _p[i] = (uint)cc__p; cc__p >>= 32; + + cc_Nu -= _p[i - sWords]; + Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; + } + } + else + { + uint prev_p = 0U; + uint prev_q = 0U; + uint prev_v = 0U; + + for (int i = sWords; i <= last; ++i) + { + uint next_p = _p[i - sWords]; + uint p_s = (next_p << sBits) | (prev_p >> -sBits); + prev_p = next_p; + + cc_Nu += Nu[i]; + cc_Nu -= p_s; + + uint next_v = Nv[i - sWords]; + uint v_s = (next_v << sBits) | (prev_v >> -sBits); + prev_v = next_v; + + cc__p += _p[i]; + cc__p -= v_s; + _p[i] = (uint)cc__p; cc__p >>= 32; + + uint next_q = _p[i - sWords]; + uint q_s = (next_q << sBits) | (prev_q >> -sBits); + prev_q = next_q; + + cc_Nu -= q_s; + Nu[i] = (uint)cc_Nu; cc_Nu >>= 32; + } + } + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void SubShifted_UV(int last, int s, Span<uint> u0, Span<uint> u1, ReadOnlySpan<uint> v0, + ReadOnlySpan<uint> v1) +#else + internal static void SubShifted_UV(int last, int s, uint[] u0, uint[] u1, uint[] v0, uint[] v1) +#endif + { + int sWords = s >> 5, sBits = s & 31; + + long cc_u0 = 0L; + long cc_u1 = 0L; + + if (sBits == 0) + { + for (int i = sWords; i <= last; ++i) + { + cc_u0 += u0[i]; + cc_u1 += u1[i]; + cc_u0 -= v0[i - sWords]; + cc_u1 -= v1[i - sWords]; + u0[i] = (uint)cc_u0; cc_u0 >>= 32; + u1[i] = (uint)cc_u1; cc_u1 >>= 32; + } + } + else + { + uint prev_v0 = 0U; + uint prev_v1 = 0U; + + for (int i = sWords; i <= last; ++i) + { + uint next_v0 = v0[i - sWords]; + uint next_v1 = v1[i - sWords]; + uint v0_s = (next_v0 << sBits) | (prev_v0 >> -sBits); + uint v1_s = (next_v1 << sBits) | (prev_v1 >> -sBits); + prev_v0 = next_v0; + prev_v1 = next_v1; + + cc_u0 += u0[i]; + cc_u1 += u1[i]; + cc_u0 -= v0_s; + cc_u1 -= v1_s; + u0[i] = (uint)cc_u0; cc_u0 >>= 32; + u1[i] = (uint)cc_u1; cc_u1 >>= 32; + } + } + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static void Swap(ref Span<uint> x, ref Span<uint> y) +#else + internal static void Swap(ref uint[] x, ref uint[] y) +#endif + { + var t = x; x = y; y = t; + } + } +} |