diff options
Diffstat (limited to 'crypto/src/math')
39 files changed, 1367 insertions, 599 deletions
diff --git a/crypto/src/math/BigInteger.cs b/crypto/src/math/BigInteger.cs index 7da886c4f..42b5b5089 100644 --- a/crypto/src/math/BigInteger.cs +++ b/crypto/src/math/BigInteger.cs @@ -139,6 +139,8 @@ namespace Org.BouncyCastle.Math public static readonly BigInteger Two; public static readonly BigInteger Three; public static readonly BigInteger Four; + public static readonly BigInteger Five; + public static readonly BigInteger Six; public static readonly BigInteger Ten; #if !NETCOREAPP3_0_OR_GREATER @@ -181,27 +183,34 @@ namespace Org.BouncyCastle.Math static BigInteger() { Zero = new BigInteger(0, ZeroMagnitude, false); - Zero.nBits = 0; Zero.nBitLength = 0; + Zero.nBits = 0; + Zero.nBitLength = 0; SMALL_CONSTANTS[0] = Zero; for (uint i = 1; i < SMALL_CONSTANTS.Length; ++i) { - SMALL_CONSTANTS[i] = CreateUValueOf(i); + var sc = CreateUValueOf(i); + sc.nBits = Integers.PopCount(i); + sc.nBitLength = BitLen(i); + + SMALL_CONSTANTS[i] = sc; } One = SMALL_CONSTANTS[1]; Two = SMALL_CONSTANTS[2]; Three = SMALL_CONSTANTS[3]; Four = SMALL_CONSTANTS[4]; + Five = SMALL_CONSTANTS[5]; + Six = SMALL_CONSTANTS[6]; Ten = SMALL_CONSTANTS[10]; - radix2 = ValueOf(2); + radix2 = Two; radix2E = radix2.Pow(chunk2); radix8 = ValueOf(8); radix8E = radix8.Pow(chunk8); - radix10 = ValueOf(10); + radix10 = Ten; radix10E = radix10.Pow(chunk10); radix16 = ValueOf(16); @@ -1171,7 +1180,7 @@ namespace Org.BouncyCastle.Math ? 1 : sign == 0 ? 0 - : sign * CompareNoLeadingZeroes(0, magnitude, 0, other.magnitude); + : sign * CompareNoLeadingZeros(0, magnitude, 0, other.magnitude); } /** @@ -1190,10 +1199,10 @@ namespace Org.BouncyCastle.Math yIndx++; } - return CompareNoLeadingZeroes(xIndx, x, yIndx, y); + return CompareNoLeadingZeros(xIndx, x, yIndx, y); } - private static int CompareNoLeadingZeroes(int xIndx, uint[] x, int yIndx, uint[] y) + private static int CompareNoLeadingZeros(int xIndx, uint[] x, int yIndx, uint[] y) { int diff = (x.Length - y.Length) - (xIndx - yIndx); @@ -1234,7 +1243,7 @@ namespace Org.BouncyCastle.Math Debug.Assert(yStart < y.Length); - int xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y); + int xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y); uint[] count; if (xyCmp > 0) @@ -1271,7 +1280,7 @@ namespace Org.BouncyCastle.Math for (;;) { if (cBitLength < xBitLength - || CompareNoLeadingZeroes(xStart, x, cStart, c) >= 0) + || CompareNoLeadingZeros(xStart, x, cStart, c) >= 0) { Subtract(xStart, x, cStart, c); AddMagnitudes(count, iCount); @@ -1289,7 +1298,7 @@ namespace Org.BouncyCastle.Math if (xBitLength < yBitLength) return count; - xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y); + xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y); if (xyCmp <= 0) break; @@ -1623,6 +1632,8 @@ namespace Org.BouncyCastle.Math BigInteger montRadix = One.ShiftLeft(32 * n.magnitude.Length).Remainder(n); BigInteger minusMontRadix = n.Subtract(montRadix); + uint[] yAccum = new uint[n.magnitude.Length + 1]; + do { BigInteger a; @@ -1633,7 +1644,7 @@ namespace Org.BouncyCastle.Math while (a.sign == 0 || a.CompareTo(n) >= 0 || a.IsEqualMagnitude(montRadix) || a.IsEqualMagnitude(minusMontRadix)); - BigInteger y = ModPowMonty(a, r, n, false); + BigInteger y = ModPowMonty(yAccum, a, r, n, false); if (!y.Equals(montRadix)) { @@ -1643,7 +1654,7 @@ namespace Org.BouncyCastle.Math if (++j == s) return false; - y = ModPowMonty(y, Two, n, false); + y = ModSquareMonty(yAccum, y, n); if (y.Equals(montRadix)) return false; @@ -1725,12 +1736,12 @@ namespace Org.BouncyCastle.Math // for (;;) // { // // While F is even, do F=F/u, C=C*u, k=k+1. -// int zeroes = F.GetLowestSetBit(); -// if (zeroes > 0) +// int zeros = F.GetLowestSetBit(); +// if (zeros > 0) // { -// F = F.ShiftRight(zeroes); -// C = C.ShiftLeft(zeroes); -// k += zeroes; +// F = F.ShiftRight(zeros); +// C = C.ShiftLeft(zeros); +// k += zeros; // } // // // If F = 1, then return B,k. @@ -1891,7 +1902,8 @@ namespace Org.BouncyCastle.Math } else { - result = ModPowMonty(result, e, m, true); + uint[] yAccum = new uint[m.magnitude.Length + 1]; + result = ModPowMonty(yAccum, result, e, m, true); } } @@ -1925,17 +1937,17 @@ namespace Org.BouncyCastle.Math oddPowers[i] = ReduceBarrett(oddPowers[i - 1].Multiply(b2), m, mr, yu); } - int[] windowList = GetWindowList(e.magnitude, extraBits); + uint[] windowList = GetWindowList(e.magnitude, extraBits); Debug.Assert(windowList.Length > 0); - int window = windowList[0]; - int mult = window & 0xFF, lastZeroes = window >> 8; + uint window = windowList[0]; + uint mult = window & 0xFFU, lastZeros = window >> 8; BigInteger y; if (mult == 1) { y = b2; - --lastZeroes; + --lastZeros; } else { @@ -1943,11 +1955,11 @@ namespace Org.BouncyCastle.Math } int windowPos = 1; - while ((window = windowList[windowPos++]) != -1) + while ((window = windowList[windowPos++]) != uint.MaxValue) { mult = window & 0xFF; - int bits = lastZeroes + BitLen((byte)mult); + int bits = (int)lastZeros + BitLen((byte)mult); for (int j = 0; j < bits; ++j) { y = ReduceBarrett(y.Square(), m, mr, yu); @@ -1955,10 +1967,10 @@ namespace Org.BouncyCastle.Math y = ReduceBarrett(y.Multiply(oddPowers[mult >> 1]), m, mr, yu); - lastZeroes = window >> 8; + lastZeros = window >> 8; } - for (int i = 0; i < lastZeroes; ++i) + for (int i = 0; i < lastZeros; ++i) { y = ReduceBarrett(y.Square(), m, mr, yu); } @@ -1999,7 +2011,7 @@ namespace Org.BouncyCastle.Math return x; } - private static BigInteger ModPowMonty(BigInteger b, BigInteger e, BigInteger m, bool convert) + private static BigInteger ModPowMonty(uint[] yAccum, BigInteger b, BigInteger e, BigInteger m, bool convert) { int n = m.magnitude.Length; int powR = 32 * n; @@ -2012,7 +2024,7 @@ namespace Org.BouncyCastle.Math b = b.ShiftLeft(powR).Remainder(m); } - uint[] yAccum = new uint[n + 1]; + Debug.Assert(yAccum.Length == n + 1); uint[] zVal = b.magnitude; Debug.Assert(zVal.Length <= n); @@ -2050,17 +2062,17 @@ namespace Org.BouncyCastle.Math MultiplyMonty(yAccum, oddPowers[i], zSquared, m.magnitude, mDash, smallMontyModulus); } - int[] windowList = GetWindowList(e.magnitude, extraBits); + uint[] windowList = GetWindowList(e.magnitude, extraBits); Debug.Assert(windowList.Length > 1); - int window = windowList[0]; - int mult = window & 0xFF, lastZeroes = window >> 8; + uint window = windowList[0]; + uint mult = window & 0xFF, lastZeros = window >> 8; uint[] yVal; if (mult == 1) { yVal = zSquared; - --lastZeroes; + --lastZeros; } else { @@ -2068,11 +2080,11 @@ namespace Org.BouncyCastle.Math } int windowPos = 1; - while ((window = windowList[windowPos++]) != -1) + while ((window = windowList[windowPos++]) != uint.MaxValue) { mult = window & 0xFF; - int bits = lastZeroes + BitLen((byte)mult); + int bits = (int)lastZeros + BitLen((byte)mult); for (int j = 0; j < bits; ++j) { SquareMonty(yAccum, yVal, m.magnitude, mDash, smallMontyModulus); @@ -2080,10 +2092,10 @@ namespace Org.BouncyCastle.Math MultiplyMonty(yAccum, yVal, oddPowers[mult >> 1], m.magnitude, mDash, smallMontyModulus); - lastZeroes = window >> 8; + lastZeros = window >> 8; } - for (int i = 0; i < lastZeroes; ++i) + for (int i = 0; i < lastZeros; ++i) { SquareMonty(yAccum, yVal, m.magnitude, mDash, smallMontyModulus); } @@ -2101,22 +2113,49 @@ namespace Org.BouncyCastle.Math return new BigInteger(1, yVal, true); } - private static int[] GetWindowList(uint[] mag, int extraBits) + private static BigInteger ModSquareMonty(uint[] yAccum, BigInteger b, BigInteger m) + { + int n = m.magnitude.Length; + int powR = 32 * n; + bool smallMontyModulus = m.BitLength + 2 <= powR; + uint mDash = m.GetMQuote(); + + Debug.Assert(yAccum.Length == n + 1); + + uint[] zVal = b.magnitude; + Debug.Assert(zVal.Length <= n); + + uint[] yVal = new uint[n]; + zVal.CopyTo(yVal, n - zVal.Length); + + SquareMonty(yAccum, yVal, m.magnitude, mDash, smallMontyModulus); + + if (smallMontyModulus && CompareTo(0, yVal, 0, m.magnitude) >= 0) + { + Subtract(0, yVal, 0, m.magnitude); + } + + return new BigInteger(1, yVal, true); + } + + private static uint[] GetWindowList(uint[] mag, int extraBits) { - int v = (int)mag[0]; - Debug.Assert(v != 0); + uint v = mag[0]; + Debug.Assert(v != 0U); - int leadingBits = BitLen((uint)v); + int leadingBits = BitLen(v); + int totalBits = ((mag.Length - 1) << 5) + leadingBits; - int resultSize = (((mag.Length - 1) << 5) + leadingBits) / (1 + extraBits) + 2; - int[] result = new int[resultSize]; + int resultSize = (totalBits + extraBits) / (1 + extraBits) + 1; + uint[] result = new uint[resultSize]; int resultPos = 0; int bitPos = 33 - leadingBits; v <<= bitPos; - int mult = 1, multLimit = 1 << extraBits; - int zeroes = 0; + uint mult = 1U; + uint multLimit = 1U << extraBits; + uint zeros = 0U; int i = 0; for (;;) @@ -2125,17 +2164,17 @@ namespace Org.BouncyCastle.Math { if (mult < multLimit) { - mult = (mult << 1) | (int)((uint)v >> 31); + mult = (mult << 1) | (v >> 31); } - else if (v < 0) + else if ((int)v < 0) { - result[resultPos++] = CreateWindowEntry(mult, zeroes); - mult = 1; - zeroes = 0; + result[resultPos++] = CreateWindowEntry(mult, zeros); + mult = 1U; + zeros = 0U; } else { - ++zeroes; + ++zeros; } v <<= 1; @@ -2143,35 +2182,35 @@ namespace Org.BouncyCastle.Math if (++i == mag.Length) { - result[resultPos++] = CreateWindowEntry(mult, zeroes); + result[resultPos++] = CreateWindowEntry(mult, zeros); break; } - v = (int)mag[i]; + v = mag[i]; bitPos = 0; } - result[resultPos] = -1; + result[resultPos] = uint.MaxValue; // Sentinel value return result; } - private static int CreateWindowEntry(int mult, int zeroes) + private static uint CreateWindowEntry(uint mult, uint zeros) { Debug.Assert(mult > 0); #if NETCOREAPP3_0_OR_GREATER int tz = BitOperations.TrailingZeroCount(mult); mult >>= tz; - zeroes += tz; + zeros += (uint)tz; #else - while ((mult & 1) == 0) + while ((mult & 1U) == 0U) { mult >>= 1; - ++zeroes; + ++zeros; } #endif - return mult | (zeroes << 8); + return mult | (zeros << 8); } /** @@ -2682,7 +2721,7 @@ namespace Org.BouncyCastle.Math Debug.Assert(yStart < y.Length); - int xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y); + int xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y); if (xyCmp > 0) { @@ -2709,7 +2748,7 @@ namespace Org.BouncyCastle.Math for (;;) { if (cBitLength < xBitLength - || CompareNoLeadingZeroes(xStart, x, cStart, c) >= 0) + || CompareNoLeadingZeros(xStart, x, cStart, c) >= 0) { Subtract(xStart, x, cStart, c); @@ -2726,7 +2765,7 @@ namespace Org.BouncyCastle.Math if (xBitLength < yBitLength) return x; - xyCmp = CompareNoLeadingZeroes(xStart, x, yStart, y); + xyCmp = CompareNoLeadingZeros(xStart, x, yStart, y); if (xyCmp <= 0) break; @@ -2799,7 +2838,7 @@ namespace Org.BouncyCastle.Math } } - if (CompareNoLeadingZeroes(0, magnitude, 0, n.magnitude) < 0) + if (CompareNoLeadingZeros(0, magnitude, 0, n.magnitude) < 0) return this; uint[] result; @@ -3094,7 +3133,7 @@ namespace Org.BouncyCastle.Math if (this.sign != n.sign) return Add(n.Negate()); - int compare = CompareNoLeadingZeroes(0, magnitude, 0, n.magnitude); + int compare = CompareNoLeadingZeros(0, magnitude, 0, n.magnitude); if (compare == 0) return Zero; @@ -3607,47 +3646,55 @@ namespace Org.BouncyCastle.Math sb.Append(s); } + private static BigInteger CreateUValueOf(uint value) + { + if (value == 0) + return Zero; + + return new BigInteger(1, new uint[]{ value }, false); + } + private static BigInteger CreateUValueOf(ulong value) { uint msw = (uint)(value >> 32); uint lsw = (uint)value; - if (msw != 0) - return new BigInteger(1, new uint[]{ msw, lsw }, false); - - if (lsw != 0) - { - BigInteger n = new BigInteger(1, new uint[]{ lsw }, false); - // Check for a power of two - if ((lsw & -lsw) == lsw) - { - n.nBits = 1; - } - return n; - } + if (msw == 0) + return CreateUValueOf(lsw); - return Zero; + return new BigInteger(1, new uint[]{ msw, lsw }, false); } - private static BigInteger CreateValueOf(long value) + public static BigInteger ValueOf(int value) { - if (value < 0) + if (value >= 0) { - if (value == long.MinValue) - return CreateValueOf(~value).Not(); + if (value < SMALL_CONSTANTS.Length) + return SMALL_CONSTANTS[value]; - return CreateValueOf(-value).Negate(); + return CreateUValueOf((uint)value); } - return CreateUValueOf((ulong)value); + if (value == int.MinValue) + return CreateUValueOf((uint)~value).Not(); + + return ValueOf(-value).Negate(); } public static BigInteger ValueOf(long value) { - if (value >= 0 && value < SMALL_CONSTANTS.Length) - return SMALL_CONSTANTS[value]; + if (value >= 0L) + { + if (value < SMALL_CONSTANTS.Length) + return SMALL_CONSTANTS[value]; + + return CreateUValueOf((ulong)value); + } + + if (value == long.MinValue) + return CreateUValueOf((ulong)~value).Not(); - return CreateValueOf(value); + return ValueOf(-value).Negate(); } public int GetLowestSetBit() diff --git a/crypto/src/math/ec/ECCurve.cs b/crypto/src/math/ec/ECCurve.cs index 624495051..245ca1941 100644 --- a/crypto/src/math/ec/ECCurve.cs +++ b/crypto/src/math/ec/ECCurve.cs @@ -1,7 +1,7 @@ using System; +using System.Collections.Concurrent; using System.Collections.Generic; -using Org.BouncyCastle.Math.EC.Abc; using Org.BouncyCastle.Math.EC.Endo; using Org.BouncyCastle.Math.EC.Multiplier; using Org.BouncyCastle.Math.Field; @@ -675,40 +675,26 @@ namespace Org.BouncyCastle.Math.EC public abstract class AbstractFpCurve : ECCurve { - private static readonly HashSet<BigInteger> KnownQs = new HashSet<BigInteger>(); + private static readonly ConcurrentDictionary<BigInteger, bool> KnownPrimes = + new ConcurrentDictionary<BigInteger, bool>(); protected AbstractFpCurve(BigInteger q) - : this(q, false) + : this(q, isInternal: false) { } internal AbstractFpCurve(BigInteger q, bool isInternal) : base(FiniteFields.GetPrimeField(q)) { - if (!isInternal) + if (isInternal) { - bool unknownQ; - lock (KnownQs) unknownQ = !KnownQs.Contains(q); - - if (unknownQ) - { - int maxBitLength = ImplGetInteger("Org.BouncyCastle.EC.Fp_MaxSize", 1042); // 2 * 521 - int certainty = ImplGetInteger("Org.BouncyCastle.EC.Fp_Certainty", 100); - - int qBitLength = q.BitLength; - if (maxBitLength < qBitLength) - throw new ArgumentException("Fp q value out of range"); - - if (Primes.HasAnySmallFactors(q) || - !Primes.IsMRProbablePrime(q, SecureRandom.ArbitraryRandom, - ImplGetNumberOfIterations(qBitLength, certainty))) - { - throw new ArgumentException("Fp q value not prime"); - } - } + KnownPrimes.AddOrUpdate(q, true, (key, value) => true); + } + else if (!KnownPrimes.ContainsKey(q)) + { + ImplCheckQ(q); + KnownPrimes.TryAdd(q, false); } - - lock (KnownQs) KnownQs.Add(q); } public override bool IsValidFieldElement(BigInteger x) @@ -761,16 +747,24 @@ namespace Org.BouncyCastle.Math.EC return CreateRawPoint(x, y); } + private static void ImplCheckQ(BigInteger q) + { + int maxBitLength = ImplGetInteger("Org.BouncyCastle.EC.Fp_MaxSize", 1042); // 2 * 521 + if (q.BitLength > maxBitLength) + throw new ArgumentException("Fp q value out of range"); + + if (!ImplIsPrime(q)) + throw new ArgumentException("Fp q value not prime"); + } + private static int ImplGetInteger(string envVariable, int defaultValue) { - string v = Platform.GetEnvironmentVariable(envVariable); - if (v == null) - return defaultValue; + string property = Platform.GetEnvironmentVariable(envVariable); - return int.Parse(v); + return int.TryParse(property, out int value) ? value : defaultValue; } - private static int ImplGetNumberOfIterations(int bits, int certainty) + private static int ImplGetIterations(int bits, int certainty) { /* * NOTE: We enforce a minimum 'certainty' of 100 for bits >= 1024 (else 80). Where the @@ -802,6 +796,17 @@ namespace Org.BouncyCastle.Math.EC } } + private static bool ImplIsPrime(BigInteger q) + { + if (Primes.HasAnySmallFactors(q)) + return false; + + int certainty = ImplGetInteger("Org.BouncyCastle.EC.Fp_Certainty", 100); + int iterations = ImplGetIterations(q.BitLength, certainty); + + return Primes.IsMRProbablePrime(q, SecureRandom.ArbitraryRandom, iterations); + } + private static BigInteger ImplRandomFieldElement(SecureRandom r, BigInteger p) { BigInteger x; @@ -843,15 +848,15 @@ namespace Org.BouncyCastle.Math.EC } public FpCurve(BigInteger q, BigInteger a, BigInteger b, BigInteger order, BigInteger cofactor) - : this(q, a, b, order, cofactor, false) + : this(q, a, b, order, cofactor, isInternal: false) { } - internal FpCurve(BigInteger q, BigInteger a, BigInteger b, BigInteger order, BigInteger cofactor, bool isInternal) + internal FpCurve(BigInteger q, BigInteger a, BigInteger b, BigInteger order, BigInteger cofactor, + bool isInternal) : base(q, isInternal) { this.m_q = q; - this.m_r = FpFieldElement.CalculateResidue(q); this.m_infinity = new FpPoint(this, null, null); @@ -864,7 +869,7 @@ namespace Org.BouncyCastle.Math.EC internal FpCurve(BigInteger q, BigInteger r, ECFieldElement a, ECFieldElement b, BigInteger order, BigInteger cofactor) - : base(q, true) + : base(q, isInternal: true) { this.m_q = q; this.m_r = r; diff --git a/crypto/src/math/ec/abc/Tnaf.cs b/crypto/src/math/ec/abc/Tnaf.cs index 88a4eeb96..d8e9b6ae0 100644 --- a/crypto/src/math/ec/abc/Tnaf.cs +++ b/crypto/src/math/ec/abc/Tnaf.cs @@ -500,12 +500,12 @@ namespace Org.BouncyCastle.Math.EC.Abc { if (mu == 1) { - return BigInteger.ValueOf(6); + return BigInteger.Six; } else { // mu == -1 - return BigInteger.ValueOf(10); + return BigInteger.Ten; } } else diff --git a/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs b/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs index 3147ccf98..6456120a6 100644 --- a/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs +++ b/crypto/src/math/ec/custom/gm/SM2P256V1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.GM protected readonly SM2P256V1Point m_infinity; public SM2P256V1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SM2P256V1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs index 5fa18d470..b96fa75d0 100644 --- a/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP128R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP128R1Point m_infinity; public SecP128R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP128R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs index b757659d2..471f7f992 100644 --- a/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP160K1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP160K1Point m_infinity; public SecP160K1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP160K1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs index 3b7e1aa06..491b10fd3 100644 --- a/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP160R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP160R1Point m_infinity; public SecP160R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP160R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs b/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs index 0f226ad19..97d8b6d00 100644 --- a/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP160R2Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP160R2Point m_infinity; public SecP160R2Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP160R2Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs index b9ff71ac8..b4a884e83 100644 --- a/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP192K1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP192K1Point m_infinity; public SecP192K1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP192K1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs index 77524b362..accb5a786 100644 --- a/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP192R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP192R1Point m_infinity; public SecP192R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP192R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs index 04be47202..7f828bc87 100644 --- a/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP224K1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP224K1Point m_infinity; public SecP224K1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP224K1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs index 8cd2b7272..ca2b876af 100644 --- a/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP224R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP224R1Point m_infinity; public SecP224R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP224R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs b/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs index 804b65d60..391ac7b17 100644 --- a/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP256K1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP256K1Point m_infinity; public SecP256K1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP256K1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs index dd2b964c6..a9d1a4a2b 100644 --- a/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP256R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP256R1Point m_infinity; public SecP256R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP256R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs index f54dd44c2..4704bb16f 100644 --- a/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP384R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP384R1Point m_infinity; public SecP384R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP384R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs b/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs index a5f4cf957..136af8a1a 100644 --- a/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs +++ b/crypto/src/math/ec/custom/sec/SecP521R1Curve.cs @@ -18,7 +18,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec protected readonly SecP521R1Point m_infinity; public SecP521R1Curve() - : base(q, true) + : base(q, isInternal: true) { this.m_infinity = new SecP521R1Point(this, null, null); diff --git a/crypto/src/math/ec/custom/sec/SecT113Field.cs b/crypto/src/math/ec/custom/sec/SecT113Field.cs index 596d8070b..2477b9c78 100644 --- a/crypto/src/math/ec/custom/sec/SecT113Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT113Field.cs @@ -287,7 +287,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X01 = Vector128.Create(x[0], x[1]); var Y01 = Vector128.Create(y[0], y[1]); @@ -424,7 +425,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec #endif { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL); zz[2] = Bmi2.X64.ParallelBitDeposit(x[1] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT131Field.cs b/crypto/src/math/ec/custom/sec/SecT131Field.cs index 743fa6a5d..49d504afb 100644 --- a/crypto/src/math/ec/custom/sec/SecT131Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT131Field.cs @@ -324,7 +324,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X01 = Vector128.Create(x[0], x[1]); var X2_ = Vector128.CreateScalar(x[2]); @@ -620,7 +621,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec zz[4] = Interleave.Expand8to16((byte)x[2]); #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL); zz[2] = Bmi2.X64.ParallelBitDeposit(x[1] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT163Field.cs b/crypto/src/math/ec/custom/sec/SecT163Field.cs index d0f09cd8b..e4fda48ac 100644 --- a/crypto/src/math/ec/custom/sec/SecT163Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT163Field.cs @@ -335,7 +335,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X01 = Vector128.Create(x[0], x[1]); var X2_ = Vector128.CreateScalar(x[2]); @@ -623,7 +624,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec #endif { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL); zz[4] = Bmi2.X64.ParallelBitDeposit(x[2] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT193Field.cs b/crypto/src/math/ec/custom/sec/SecT193Field.cs index b610bf554..7eebc29c6 100644 --- a/crypto/src/math/ec/custom/sec/SecT193Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT193Field.cs @@ -360,7 +360,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) { var X01 = Vector128.Create(x[0], x[1]); var X2_ = Vector128.CreateScalar(x[2]); @@ -545,7 +545,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec zz[6] = x[3] & M01; #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL); zz[4] = Bmi2.X64.ParallelBitDeposit(x[2] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT233Field.cs b/crypto/src/math/ec/custom/sec/SecT233Field.cs index 00bbc0635..bf7b33139 100644 --- a/crypto/src/math/ec/custom/sec/SecT233Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT233Field.cs @@ -378,7 +378,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X01 = Vector128.Create(x[0], x[1]); var X23 = Vector128.Create(x[2], x[3]); @@ -576,7 +577,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec #endif { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { ulong x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; zz[7] = Bmi2.X64.ParallelBitDeposit(x3 >> 32, 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT239Field.cs b/crypto/src/math/ec/custom/sec/SecT239Field.cs index b90867b76..a6f3c9e4d 100644 --- a/crypto/src/math/ec/custom/sec/SecT239Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT239Field.cs @@ -387,7 +387,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X01 = Vector128.Create(x[0], x[1]); var X23 = Vector128.Create(x[2], x[3]); @@ -587,7 +588,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec #endif { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { ulong x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; zz[7] = Bmi2.X64.ParallelBitDeposit(x3 >> 32, 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT283Field.cs b/crypto/src/math/ec/custom/sec/SecT283Field.cs index 498a72c81..92f8ea385 100644 --- a/crypto/src/math/ec/custom/sec/SecT283Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT283Field.cs @@ -386,7 +386,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz) { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<ulong>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X01 = Vector128.Create(x[0], x[1]); var X23 = Vector128.Create(x[2], x[3]); @@ -746,7 +747,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec zz[8] = Interleave.Expand32to64((uint)x[4]); #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL); zz[6] = Bmi2.X64.ParallelBitDeposit(x[3] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT409Field.cs b/crypto/src/math/ec/custom/sec/SecT409Field.cs index 6a5afb0dc..a8a39a575 100644 --- a/crypto/src/math/ec/custom/sec/SecT409Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT409Field.cs @@ -551,7 +551,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec Debug.Assert(y >> 59 == 0); #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) { var X = Vector128.CreateScalar(x); var Y = Vector128.CreateScalar(y); @@ -607,7 +607,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec zz[12] = Interleave.Expand32to64((uint)x[6]); #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[11] = Bmi2.X64.ParallelBitDeposit(x[5] >> 32, 0x5555555555555555UL); zz[10] = Bmi2.X64.ParallelBitDeposit(x[5] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/custom/sec/SecT571Field.cs b/crypto/src/math/ec/custom/sec/SecT571Field.cs index e970027a5..47f157dc3 100644 --- a/crypto/src/math/ec/custom/sec/SecT571Field.cs +++ b/crypto/src/math/ec/custom/sec/SecT571Field.cs @@ -654,7 +654,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec #endif { #if NETCOREAPP3_0_OR_GREATER - if (Pclmulqdq.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Pclmulqdq.IsEnabled) { var X = Vector128.CreateScalar(x); var Y = Vector128.CreateScalar(y); @@ -711,7 +711,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec #endif { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { zz[17] = Bmi2.X64.ParallelBitDeposit(x[8] >> 32, 0x5555555555555555UL); zz[16] = Bmi2.X64.ParallelBitDeposit(x[8] , 0x5555555555555555UL); diff --git a/crypto/src/math/ec/rfc7748/X25519.cs b/crypto/src/math/ec/rfc7748/X25519.cs index ffddd4376..0b2be4af0 100644 --- a/crypto/src/math/ec/rfc7748/X25519.cs +++ b/crypto/src/math/ec/rfc7748/X25519.cs @@ -269,6 +269,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER ScalarMultBase(k.AsSpan(kOff), r.AsSpan(rOff)); #else + // Equivalent (but much slower) + //byte[] u = new byte[PointSize]; + //u[0] = 9; + + //ScalarMult(k, kOff, u, 0, r, rOff); + int[] y = F.Create(); int[] z = F.Create(); @@ -287,6 +293,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void ScalarMultBase(ReadOnlySpan<byte> k, Span<byte> r) { + // Equivalent (but much slower) + //Span<byte> u = stackalloc byte[PointSize]; + //u[0] = 9; + + //ScalarMult(k, u, r); + int[] y = F.Create(); int[] z = F.Create(); diff --git a/crypto/src/math/ec/rfc7748/X25519Field.cs b/crypto/src/math/ec/rfc7748/X25519Field.cs index 079e673a8..ffe468f73 100644 --- a/crypto/src/math/ec/rfc7748/X25519Field.cs +++ b/crypto/src/math/ec/rfc7748/X25519Field.cs @@ -24,10 +24,10 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 private const int M25 = 0x01FFFFFF; private const int M26 = 0x03FFFFFF; - private static readonly uint[] P32 = new uint[]{ 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, - 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU }; - private static readonly int[] RootNegOne = { 0x020EA0B0, 0x0386C9D2, 0x00478C4E, 0x0035697F, 0x005E8630, - 0x01FBD7A7, 0x0340264F, 0x01F0B2B4, 0x00027E0E, 0x00570649 }; + private static readonly uint[] P32 = { 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU }; + private static readonly int[] RootNegOne = { -0x01F15F50, -0x0079362D, 0x00478C4F, 0x0035697F, 0x005E8630, + 0x01FBD7A7, -0x00BFD9B1, -0x000F4D4B, 0x00027E0F, 0x00570649 }; #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -35,7 +35,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 public static void Add(int[] x, int[] y, int[] z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8)); var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8)); @@ -54,7 +55,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 return; } - if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8)); var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8)); @@ -105,7 +107,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 public static void Apm(int[] x, int[] y, int[] zp, int[] zm) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8)); var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8)); @@ -132,7 +135,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 return; } - if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8)); var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8)); @@ -833,7 +837,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 public static void Normalize(int[] z) { - int x = (z[9] >> 23) & 1; + int x = (z[9] >> (24 - 1)) & 1; Reduce(z, x); Reduce(z, -x); Debug.Assert(z[9] >> 24 == 0); @@ -842,7 +846,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void Normalize(Span<int> z) { - int x = (z[9] >> 23) & 1; + int x = (z[9] >> (24 - 1)) & 1; Reduce(z, x); Reduce(z, -x); Debug.Assert(z[9] >> 24 == 0); @@ -1101,7 +1105,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 public static void Sub(int[] x, int[] y, int[] z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<int>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8)); var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8)); @@ -1120,7 +1125,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 return; } - if (Sse2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<int>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var X = MemoryMarshal.AsBytes(x.AsSpan(0, 8)); var Y = MemoryMarshal.AsBytes(y.AsSpan(0, 8)); diff --git a/crypto/src/math/ec/rfc7748/X448.cs b/crypto/src/math/ec/rfc7748/X448.cs index 7e078c5c6..0019f53fc 100644 --- a/crypto/src/math/ec/rfc7748/X448.cs +++ b/crypto/src/math/ec/rfc7748/X448.cs @@ -282,6 +282,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER ScalarMultBase(k.AsSpan(kOff), r.AsSpan(rOff)); #else + // Equivalent (but much slower) + //byte[] u = new byte[PointSize]; + //u[0] = 5; + + //ScalarMult(k, kOff, u, 0, r, rOff); + uint[] x = F.Create(); uint[] y = F.Create(); @@ -299,6 +305,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void ScalarMultBase(ReadOnlySpan<byte> k, Span<byte> r) { + // Equivalent (but much slower) + //Span<byte> u = stackalloc byte[PointSize]; + //u[0] = 5; + + //ScalarMult(k, u, r); + uint[] x = F.Create(); uint[] y = F.Create(); diff --git a/crypto/src/math/ec/rfc7748/X448Field.cs b/crypto/src/math/ec/rfc7748/X448Field.cs index f3fe71114..1b9fbb839 100644 --- a/crypto/src/math/ec/rfc7748/X448Field.cs +++ b/crypto/src/math/ec/rfc7748/X448Field.cs @@ -24,9 +24,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 private const uint M28 = 0x0FFFFFFFU; - private static readonly uint[] P32 = new uint[]{ 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, - 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, - 0xFFFFFFFFU, 0xFFFFFFFFU }; + private static readonly uint[] P32 = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, + 0xFFFFFFFFU }; #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -1458,7 +1458,8 @@ namespace Org.BouncyCastle.Math.EC.Rfc7748 public static void Sub(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector256<uint>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPackedLittleEndian) { var ControlCarry = Vector256.Create(7U, 0U, 1U, 2U, 3U, 4U, 5U, 6U); var Mask28 = Vector256.Create(M28); diff --git a/crypto/src/math/ec/rfc8032/Ed25519.cs b/crypto/src/math/ec/rfc8032/Ed25519.cs index fd2d5fe93..7318a8a7e 100644 --- a/crypto/src/math/ec/rfc8032/Ed25519.cs +++ b/crypto/src/math/ec/rfc8032/Ed25519.cs @@ -55,9 +55,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 public static readonly int SignatureSize = PointBytes + ScalarBytes; // "SigEd25519 no Ed25519 collisions" - private static readonly byte[] Dom2Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, - 0x39, 0x20, 0x6e, 0x6f, 0x20, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, 0x63, 0x6f, 0x6c, 0x6c, 0x69, - 0x73, 0x69, 0x6f, 0x6e, 0x73 }; + private static readonly byte[] Dom2Prefix = { 0x53, 0x69, 0x67, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, + 0x6e, 0x6f, 0x20, 0x45, 0x64, 0x32, 0x35, 0x35, 0x31, 0x39, 0x20, 0x63, 0x6f, 0x6c, 0x6c, 0x69, 0x73, 0x69, + 0x6f, 0x6e, 0x73 }; private static readonly uint[] P = { 0xFFFFFFEDU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0x7FFFFFFFU }; @@ -149,7 +149,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 byte[] result = new byte[ScalarBytes * 2]; Codec.Encode32(t, 0, t.Length, result, 0); - return Scalar25519.Reduce(result); + return Scalar25519.Reduce512(result); } private static bool CheckContextVar(byte[] ctx, byte phflag) @@ -167,13 +167,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Sqr(p.x, u); F.Sqr(p.y, v); F.Mul(u, v, t); - F.Sub(v, u, v); + F.Sub(u, v, u); F.Mul(t, C_d, t); F.AddOne(t); - F.Sub(t, v, t); + F.Add(t, u, t); F.Normalize(t); + F.Normalize(v); - return F.IsZero(t); + return F.IsZero(t) & ~F.IsZero(v); } private static int CheckPoint(PointAccum p) @@ -187,15 +188,17 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Sqr(p.y, v); F.Sqr(p.z, w); F.Mul(u, v, t); - F.Sub(v, u, v); - F.Mul(v, w, v); + F.Sub(u, v, u); + F.Mul(u, w, u); F.Sqr(w, w); F.Mul(t, C_d, t); F.Add(t, w, t); - F.Sub(t, v, t); + F.Add(t, u, t); F.Normalize(t); + F.Normalize(v); + F.Normalize(w); - return F.IsZero(t); + return F.IsZero(t) & ~F.IsZero(v) & ~F.IsZero(w); } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -592,7 +595,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.DoFinal(h, 0); - byte[] r = Scalar25519.Reduce(h); + byte[] r = Scalar25519.Reduce512(h); byte[] R = new byte[PointBytes]; ScalarMultBaseEncoded(r, R, 0); @@ -605,7 +608,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.DoFinal(h, 0); - byte[] k = Scalar25519.Reduce(h); + byte[] k = Scalar25519.Reduce512(h); byte[] S = CalculateS(r, k, s); Array.Copy(R, 0, sig, sigOff, PointBytes); @@ -697,7 +700,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.DoFinal(h); Span<byte> k = stackalloc byte[ScalarBytes]; - Scalar25519.Reduce(h, k); + Scalar25519.Reduce512(h, k); Span<uint> nA = stackalloc uint[ScalarUints]; Scalar25519.Decode(k, nA); @@ -739,7 +742,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.DoFinal(h, 0); - byte[] k = Scalar25519.Reduce(h); + byte[] k = Scalar25519.Reduce512(h); uint[] nA = new uint[ScalarUints]; Scalar25519.Decode(k, nA); @@ -799,7 +802,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.DoFinal(h); Span<byte> k = stackalloc byte[ScalarBytes]; - Scalar25519.Reduce(h, k); + Scalar25519.Reduce512(h, k); Span<uint> nA = stackalloc uint[ScalarUints]; Scalar25519.Decode(k, nA); @@ -840,7 +843,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.DoFinal(h, 0); - byte[] k = Scalar25519.Reduce(h); + byte[] k = Scalar25519.Reduce512(h); uint[] nA = new uint[ScalarUints]; Scalar25519.Decode(k, nA); @@ -950,7 +953,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Normalize(p.y); F.Normalize(p.z); - return F.IsZeroVar(p.x) && F.AreEqualVar(p.y, p.z); + return F.IsZeroVar(p.x) && !F.IsZeroVar(p.y) && F.AreEqualVar(p.y, p.z); } private static void PointAdd(ref PointExtended p, ref PointExtended q, ref PointExtended r, ref PointTemp t) @@ -1496,7 +1499,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif Scalar25519.Decode(k, n); - Scalar25519.ToSignedDigits(256, n, n); + Scalar25519.ToSignedDigits(256, n); Init(out PointPrecompZ q); Init(out PointTemp t); @@ -1541,7 +1544,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif Scalar25519.Decode(k, n); - Scalar25519.ToSignedDigits(PrecompRange, n, n); + Scalar25519.ToSignedDigits(PrecompRange, n); GroupCombBits(n); Init(out PointPrecomp p); @@ -1714,6 +1717,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 int bit = 128; while (--bit >= 0) { + if (((int)ws_b[bit] | (int)ws_b[128 + bit] | (int)ws_p[bit] | (int)ws_q[bit]) != 0) + break; + } + + for (; bit >= 0; --bit) + { int wb = ws_b[bit]; if (wb != 0) { diff --git a/crypto/src/math/ec/rfc8032/Ed448.cs b/crypto/src/math/ec/rfc8032/Ed448.cs index 08b64ddf2..aff9b5460 100644 --- a/crypto/src/math/ec/rfc8032/Ed448.cs +++ b/crypto/src/math/ec/rfc8032/Ed448.cs @@ -52,7 +52,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 public static readonly int SignatureSize = PointBytes + ScalarBytes; // "SigEd448" - private static readonly byte[] Dom4Prefix = new byte[]{ 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 }; + private static readonly byte[] Dom4Prefix = { 0x53, 0x69, 0x67, 0x45, 0x64, 0x34, 0x34, 0x38 }; private static readonly uint[] P = { 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFEU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, 0xFFFFFFFFU, @@ -73,7 +73,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 0x03AC222BU, 0x0304DB8EU, 0x083EE319U, 0x05E5DB0BU, 0x0ECA503BU, 0x0B1C6539U, 0x078A8DCEU, 0x02D256BCU, 0x04A8B05EU, 0x0BD9FD57U, 0x0A1C3CB8U }; - private const int C_d = -39081; + private const uint C_d = 39081U; //private const int WnafWidth = 6; private const int WnafWidth225 = 5; @@ -118,7 +118,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 byte[] result = new byte[ScalarBytes * 2]; Codec.Encode32(t, 0, t.Length, result, 0); - return Scalar448.Reduce(result); + return Scalar448.Reduce912(result); } private static bool CheckContextVar(byte[] ctx) @@ -136,12 +136,13 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Sqr(p.y, v); F.Mul(u, v, t); F.Add(u, v, u); - F.Mul(t, -C_d, t); + F.Mul(t, C_d, t); F.SubOne(t); F.Add(t, u, t); F.Normalize(t); + F.Normalize(v); - return F.IsZero(t); + return F.IsZero(t) & ~F.IsZero(v); } private static int CheckPoint(PointProjective p) @@ -158,12 +159,14 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Add(u, v, u); F.Mul(u, w, u); F.Sqr(w, w); - F.Mul(t, -C_d, t); + F.Mul(t, C_d, t); F.Sub(t, w, t); F.Add(t, u, t); F.Normalize(t); + F.Normalize(v); + F.Normalize(w); - return F.IsZero(t); + return F.IsZero(t) & ~F.IsZero(v) & ~F.IsZero(w); } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -310,7 +313,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 uint[] v = F.Create(); F.Sqr(r.y, u); - F.Mul(u, (uint)-C_d, v); + F.Mul(u, C_d, v); F.Negate(u, u); F.AddOne(u); F.AddOne(v); @@ -545,7 +548,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] r = Scalar448.Reduce(h); + byte[] r = Scalar448.Reduce912(h); byte[] R = new byte[PointBytes]; ScalarMultBaseEncoded(r, R, 0); @@ -555,7 +558,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] k = Scalar448.Reduce(h); + byte[] k = Scalar448.Reduce912(h); byte[] S = CalculateS(r, k, s); Array.Copy(R, 0, sig, sigOff, PointBytes); @@ -644,7 +647,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.OutputFinal(h); Span<byte> k = stackalloc byte[ScalarBytes]; - Scalar448.Reduce(h, k); + Scalar448.Reduce912(h, k); Span<uint> nA = stackalloc uint[ScalarUints]; Scalar448.Decode(k, nA); @@ -683,7 +686,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] k = Scalar448.Reduce(h); + byte[] k = Scalar448.Reduce912(h); uint[] nA = new uint[ScalarUints]; Scalar448.Decode(k, nA); @@ -740,7 +743,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.OutputFinal(h); Span<byte> k = stackalloc byte[ScalarBytes]; - Scalar448.Reduce(h, k); + Scalar448.Reduce912(h, k); Span<uint> nA = stackalloc uint[ScalarUints]; Scalar448.Decode(k, nA); @@ -778,7 +781,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 d.BlockUpdate(m, mOff, mLen); d.OutputFinal(h, 0, h.Length); - byte[] k = Scalar448.Reduce(h); + byte[] k = Scalar448.Reduce912(h); uint[] nA = new uint[ScalarUints]; Scalar448.Decode(k, nA); @@ -868,7 +871,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Normalize(p.y); F.Normalize(p.z); - return F.IsZeroVar(p.x) && F.AreEqualVar(p.y, p.z); + return F.IsZeroVar(p.x) && !F.IsZeroVar(p.y) && F.AreEqualVar(p.y, p.z); } private static void PointAdd(ref PointAffine p, ref PointProjective r, ref PointTemp t) @@ -885,7 +888,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Mul(p.x, r.x, c); F.Mul(p.y, r.y, d); F.Mul(c, d, e); - F.Mul(e, -C_d, e); + F.Mul(e, C_d, e); //F.Apm(b, e, f, g); F.Add(b, e, f); F.Sub(b, e, g); @@ -920,7 +923,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Mul(p.x, r.x, c); F.Mul(p.y, r.y, d); F.Mul(c, d, e); - F.Mul(e, -C_d, e); + F.Mul(e, C_d, e); //F.Apm(b, e, f, g); F.Add(b, e, f); F.Sub(b, e, g); @@ -965,7 +968,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Mul(p.x, r.x, c); F.Mul(p.y, r.y, d); F.Mul(c, d, e); - F.Mul(e, -C_d, e); + F.Mul(e, C_d, e); //F.Apm(b, e, nf, ng); F.Add(b, e, nf); F.Sub(b, e, ng); @@ -1011,7 +1014,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 F.Mul(p.x, r.x, c); F.Mul(p.y, r.y, d); F.Mul(c, d, e); - F.Mul(e, -C_d, e); + F.Mul(e, C_d, e); //F.Apm(b, e, nf, ng); F.Add(b, e, nf); F.Sub(b, e, ng); @@ -1149,7 +1152,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 PointCopy(ref p, ref q); Init(out PointProjective d); - PointCopy(ref q, ref d); + PointCopy(ref p, ref d); PointDouble(ref d, ref t); uint[] table = F.CreateTable(count * 3); @@ -1581,6 +1584,12 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 int bit = 225; while (--bit >= 0) { + if (((int)ws_b[bit] | (int)ws_b[225 + bit] | (int)ws_p[bit] | (int)ws_q[bit]) != 0) + break; + } + + for (; bit >= 0; --bit) + { int wb = ws_b[bit]; if (wb != 0) { diff --git a/crypto/src/math/ec/rfc8032/Scalar25519.cs b/crypto/src/math/ec/rfc8032/Scalar25519.cs index 00dcd49a1..67eee6155 100644 --- a/crypto/src/math/ec/rfc8032/Scalar25519.cs +++ b/crypto/src/math/ec/rfc8032/Scalar25519.cs @@ -14,9 +14,9 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { internal const int Size = 8; - private const long M08L = 0x000000FFL; + private const int ScalarBytes = Size * 4; + private const long M28L = 0x0FFFFFFFL; - private const long M32L = 0xFFFFFFFFL; private const int TargetLength = 254; @@ -70,7 +70,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER internal static void Multiply128Var(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y128, Span<uint> z) { - Span<uint> tt = stackalloc uint[16]; + Span<uint> tt = stackalloc uint[12]; Nat256.Mul128(x, y128, tt); if ((int)y128[3] < 0) @@ -79,9 +79,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Nat256.SubFrom(x, tt[4..], 0); } - Span<byte> r = MemoryMarshal.AsBytes(tt); - Reduce(r, r); - tt[..Size].CopyTo(z); + if (BitConverter.IsLittleEndian) + { + Span<byte> r = MemoryMarshal.AsBytes(tt); + Reduce384(r, r); + tt[..Size].CopyTo(z); + } + else + { + Span<byte> r = stackalloc byte[48]; + Codec.Encode32(tt, r); + + Reduce384(r, r); + Decode(r, z); + } } #else internal static void Multiply128Var(uint[] x, uint[] y128, uint[] z) @@ -95,40 +106,242 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Nat256.SubFrom(x, 0, tt, 4, 0); } - byte[] bytes = new byte[64]; + byte[] bytes = new byte[48]; Codec.Encode32(tt, 0, 12, bytes, 0); - byte[] r = Reduce(bytes); + byte[] r = Reduce384(bytes); Decode(r, z); } #endif - internal static byte[] Reduce(byte[] n) + internal static byte[] Reduce384(byte[] n) + { + byte[] r = new byte[ScalarBytes]; + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Reduce384(n, r); +#else + long x00 = Codec.Decode32(n, 0); // x00:32/-- + long x01 = (Codec.Decode24(n, 4) << 4); // x01:28/-- + long x02 = Codec.Decode32(n, 7); // x02:32/-- + long x03 = (Codec.Decode24(n, 11) << 4); // x03:28/-- + long x04 = Codec.Decode32(n, 14); // x04:32/-- + long x05 = (Codec.Decode24(n, 18) << 4); // x05:28/-- + long x06 = Codec.Decode32(n, 21); // x06:32/-- + long x07 = (Codec.Decode24(n, 25) << 4); // x07:28/-- + long x08 = Codec.Decode32(n, 28); // x08:32/-- + long x09 = (Codec.Decode24(n, 32) << 4); // x09:28/-- + long x10 = Codec.Decode32(n, 35); // x10:32/-- + long x11 = (Codec.Decode24(n, 39) << 4); // x11:28/-- + long x12 = Codec.Decode32(n, 42); // x12:32/-- + long x13 = (Codec.Decode16(n, 46) << 4); // x13:20/-- + long t; + + // TODO Fix bounds calculations which were copied from Reduce512 + + x13 += (x12 >> 28); x12 &= M28L; // x13:28/22, x12:28/-- + x04 -= x13 * L0; // x04:54/49 + x05 -= x13 * L1; // x05:54/53 + x06 -= x13 * L2; // x06:56/-- + x07 -= x13 * L3; // x07:56/52 + x08 -= x13 * L4; // x08:56/52 + + x12 += (x11 >> 28); x11 &= M28L; // x12:28/24, x11:28/-- + x03 -= x12 * L0; // x03:54/49 + x04 -= x12 * L1; // x04:54/51 + x05 -= x12 * L2; // x05:56/-- + x06 -= x12 * L3; // x06:56/52 + x07 -= x12 * L4; // x07:56/53 + + x11 += (x10 >> 28); x10 &= M28L; // x11:29/--, x10:28/-- + x02 -= x11 * L0; // x02:55/32 + x03 -= x11 * L1; // x03:55/-- + x04 -= x11 * L2; // x04:56/55 + x05 -= x11 * L3; // x05:56/52 + x06 -= x11 * L4; // x06:56/53 + + x10 += (x09 >> 28); x09 &= M28L; // x10:29/--, x09:28/-- + x01 -= x10 * L0; // x01:55/28 + x02 -= x10 * L1; // x02:55/54 + x03 -= x10 * L2; // x03:56/55 + x04 -= x10 * L3; // x04:57/-- + x05 -= x10 * L4; // x05:56/53 + + x08 += (x07 >> 28); x07 &= M28L; // x08:56/53, x07:28/-- + x09 += (x08 >> 28); x08 &= M28L; // x09:29/25, x08:28/-- + + t = (x08 >> 27) & 1L; + x09 += t; // x09:29/26 + + x00 -= x09 * L0; // x00:55/53 + x01 -= x09 * L1; // x01:55/54 + x02 -= x09 * L2; // x02:57/-- + x03 -= x09 * L3; // x03:57/-- + x04 -= x09 * L4; // x04:57/42 + + x01 += (x00 >> 28); x00 &= M28L; + x02 += (x01 >> 28); x01 &= M28L; + x03 += (x02 >> 28); x02 &= M28L; + x04 += (x03 >> 28); x03 &= M28L; + x05 += (x04 >> 28); x04 &= M28L; + x06 += (x05 >> 28); x05 &= M28L; + x07 += (x06 >> 28); x06 &= M28L; + x08 += (x07 >> 28); x07 &= M28L; + x09 = (x08 >> 28); x08 &= M28L; + + x09 -= t; + + Debug.Assert(x09 == 0L || x09 == -1L); + + x00 += x09 & L0; + x01 += x09 & L1; + x02 += x09 & L2; + x03 += x09 & L3; + x04 += x09 & L4; + + x01 += (x00 >> 28); x00 &= M28L; + x02 += (x01 >> 28); x01 &= M28L; + x03 += (x02 >> 28); x02 &= M28L; + x04 += (x03 >> 28); x03 &= M28L; + x05 += (x04 >> 28); x04 &= M28L; + x06 += (x05 >> 28); x05 &= M28L; + x07 += (x06 >> 28); x06 &= M28L; + x08 += (x07 >> 28); x07 &= M28L; + + Codec.Encode56((ulong)(x00 | (x01 << 28)), r, 0); + Codec.Encode56((ulong)(x02 | (x03 << 28)), r, 7); + Codec.Encode56((ulong)(x04 | (x05 << 28)), r, 14); + Codec.Encode56((ulong)(x06 | (x07 << 28)), r, 21); + Codec.Encode32((uint)x08, r, 28); +#endif + + return r; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void Reduce384(ReadOnlySpan<byte> n, Span<byte> r) + { + long x00 = Codec.Decode32(n[ 0..]); // x00:32/-- + long x01 = (Codec.Decode24(n[ 4..]) << 4); // x01:28/-- + long x02 = Codec.Decode32(n[ 7..]); // x02:32/-- + long x03 = (Codec.Decode24(n[11..]) << 4); // x03:28/-- + long x04 = Codec.Decode32(n[14..]); // x04:32/-- + long x05 = (Codec.Decode24(n[18..]) << 4); // x05:28/-- + long x06 = Codec.Decode32(n[21..]); // x06:32/-- + long x07 = (Codec.Decode24(n[25..]) << 4); // x07:28/-- + long x08 = Codec.Decode32(n[28..]); // x08:32/-- + long x09 = (Codec.Decode24(n[32..]) << 4); // x09:28/-- + long x10 = Codec.Decode32(n[35..]); // x10:32/-- + long x11 = (Codec.Decode24(n[39..]) << 4); // x11:28/-- + long x12 = Codec.Decode32(n[42..]); // x12:32/-- + long x13 = (Codec.Decode16(n[46..]) << 4); // x13:20/-- + long t; + + // TODO Fix bounds calculations which were copied from Reduce512 + + x13 += (x12 >> 28); x12 &= M28L; // x13:28/22, x12:28/-- + x04 -= x13 * L0; // x04:54/49 + x05 -= x13 * L1; // x05:54/53 + x06 -= x13 * L2; // x06:56/-- + x07 -= x13 * L3; // x07:56/52 + x08 -= x13 * L4; // x08:56/52 + + x12 += (x11 >> 28); x11 &= M28L; // x12:28/24, x11:28/-- + x03 -= x12 * L0; // x03:54/49 + x04 -= x12 * L1; // x04:54/51 + x05 -= x12 * L2; // x05:56/-- + x06 -= x12 * L3; // x06:56/52 + x07 -= x12 * L4; // x07:56/53 + + x11 += (x10 >> 28); x10 &= M28L; // x11:29/--, x10:28/-- + x02 -= x11 * L0; // x02:55/32 + x03 -= x11 * L1; // x03:55/-- + x04 -= x11 * L2; // x04:56/55 + x05 -= x11 * L3; // x05:56/52 + x06 -= x11 * L4; // x06:56/53 + + x10 += (x09 >> 28); x09 &= M28L; // x10:29/--, x09:28/-- + x01 -= x10 * L0; // x01:55/28 + x02 -= x10 * L1; // x02:55/54 + x03 -= x10 * L2; // x03:56/55 + x04 -= x10 * L3; // x04:57/-- + x05 -= x10 * L4; // x05:56/53 + + x08 += (x07 >> 28); x07 &= M28L; // x08:56/53, x07:28/-- + x09 += (x08 >> 28); x08 &= M28L; // x09:29/25, x08:28/-- + + t = (x08 >> 27) & 1L; + x09 += t; // x09:29/26 + + x00 -= x09 * L0; // x00:55/53 + x01 -= x09 * L1; // x01:55/54 + x02 -= x09 * L2; // x02:57/-- + x03 -= x09 * L3; // x03:57/-- + x04 -= x09 * L4; // x04:57/42 + + x01 += (x00 >> 28); x00 &= M28L; + x02 += (x01 >> 28); x01 &= M28L; + x03 += (x02 >> 28); x02 &= M28L; + x04 += (x03 >> 28); x03 &= M28L; + x05 += (x04 >> 28); x04 &= M28L; + x06 += (x05 >> 28); x05 &= M28L; + x07 += (x06 >> 28); x06 &= M28L; + x08 += (x07 >> 28); x07 &= M28L; + x09 = (x08 >> 28); x08 &= M28L; + + x09 -= t; + + Debug.Assert(x09 == 0L || x09 == -1L); + + x00 += x09 & L0; + x01 += x09 & L1; + x02 += x09 & L2; + x03 += x09 & L3; + x04 += x09 & L4; + + x01 += (x00 >> 28); x00 &= M28L; + x02 += (x01 >> 28); x01 &= M28L; + x03 += (x02 >> 28); x02 &= M28L; + x04 += (x03 >> 28); x03 &= M28L; + x05 += (x04 >> 28); x04 &= M28L; + x06 += (x05 >> 28); x05 &= M28L; + x07 += (x06 >> 28); x06 &= M28L; + x08 += (x07 >> 28); x07 &= M28L; + + Codec.Encode56((ulong)(x00 | (x01 << 28)), r); + Codec.Encode56((ulong)(x02 | (x03 << 28)), r[7..]); + Codec.Encode56((ulong)(x04 | (x05 << 28)), r[14..]); + Codec.Encode56((ulong)(x06 | (x07 << 28)), r[21..]); + Codec.Encode32((uint)x08, r[28..]); + } +#endif + + internal static byte[] Reduce512(byte[] n) { - byte[] r = new byte[64]; + byte[] r = new byte[ScalarBytes]; #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - Reduce(n, r); + Reduce512(n, r); #else - long x00 = Codec.Decode32(n, 0) & M32L; // x00:32/-- - long x01 = (Codec.Decode24(n, 4) << 4) & M32L; // x01:28/-- - long x02 = Codec.Decode32(n, 7) & M32L; // x02:32/-- - long x03 = (Codec.Decode24(n, 11) << 4) & M32L; // x03:28/-- - long x04 = Codec.Decode32(n, 14) & M32L; // x04:32/-- - long x05 = (Codec.Decode24(n, 18) << 4) & M32L; // x05:28/-- - long x06 = Codec.Decode32(n, 21) & M32L; // x06:32/-- - long x07 = (Codec.Decode24(n, 25) << 4) & M32L; // x07:28/-- - long x08 = Codec.Decode32(n, 28) & M32L; // x08:32/-- - long x09 = (Codec.Decode24(n, 32) << 4) & M32L; // x09:28/-- - long x10 = Codec.Decode32(n, 35) & M32L; // x10:32/-- - long x11 = (Codec.Decode24(n, 39) << 4) & M32L; // x11:28/-- - long x12 = Codec.Decode32(n, 42) & M32L; // x12:32/-- - long x13 = (Codec.Decode24(n, 46) << 4) & M32L; // x13:28/-- - long x14 = Codec.Decode32(n, 49) & M32L; // x14:32/-- - long x15 = (Codec.Decode24(n, 53) << 4) & M32L; // x15:28/-- - long x16 = Codec.Decode32(n, 56) & M32L; // x16:32/-- - long x17 = (Codec.Decode24(n, 60) << 4) & M32L; // x17:28/-- - long x18 = n[63] & M08L; // x18:08/-- + long x00 = Codec.Decode32(n, 0); // x00:32/-- + long x01 = (Codec.Decode24(n, 4) << 4); // x01:28/-- + long x02 = Codec.Decode32(n, 7); // x02:32/-- + long x03 = (Codec.Decode24(n, 11) << 4); // x03:28/-- + long x04 = Codec.Decode32(n, 14); // x04:32/-- + long x05 = (Codec.Decode24(n, 18) << 4); // x05:28/-- + long x06 = Codec.Decode32(n, 21); // x06:32/-- + long x07 = (Codec.Decode24(n, 25) << 4); // x07:28/-- + long x08 = Codec.Decode32(n, 28); // x08:32/-- + long x09 = (Codec.Decode24(n, 32) << 4); // x09:28/-- + long x10 = Codec.Decode32(n, 35); // x10:32/-- + long x11 = (Codec.Decode24(n, 39) << 4); // x11:28/-- + long x12 = Codec.Decode32(n, 42); // x12:32/-- + long x13 = (Codec.Decode24(n, 46) << 4); // x13:28/-- + long x14 = Codec.Decode32(n, 49); // x14:32/-- + long x15 = (Codec.Decode24(n, 53) << 4); // x15:28/-- + long x16 = Codec.Decode32(n, 56); // x16:32/-- + long x17 = (Codec.Decode24(n, 60) << 4); // x17:28/-- + long x18 = n[63]; // x18:08/-- long t; //x18 += (x17 >> 28); x17 &= M28L; @@ -246,27 +459,27 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r) + internal static void Reduce512(ReadOnlySpan<byte> n, Span<byte> r) { - long x00 = Codec.Decode32(n[ 0..]) & M32L; // x00:32/-- - long x01 = (Codec.Decode24(n[ 4..]) << 4) & M32L; // x01:28/-- - long x02 = Codec.Decode32(n[ 7..]) & M32L; // x02:32/-- - long x03 = (Codec.Decode24(n[11..]) << 4) & M32L; // x03:28/-- - long x04 = Codec.Decode32(n[14..]) & M32L; // x04:32/-- - long x05 = (Codec.Decode24(n[18..]) << 4) & M32L; // x05:28/-- - long x06 = Codec.Decode32(n[21..]) & M32L; // x06:32/-- - long x07 = (Codec.Decode24(n[25..]) << 4) & M32L; // x07:28/-- - long x08 = Codec.Decode32(n[28..]) & M32L; // x08:32/-- - long x09 = (Codec.Decode24(n[32..]) << 4) & M32L; // x09:28/-- - long x10 = Codec.Decode32(n[35..]) & M32L; // x10:32/-- - long x11 = (Codec.Decode24(n[39..]) << 4) & M32L; // x11:28/-- - long x12 = Codec.Decode32(n[42..]) & M32L; // x12:32/-- - long x13 = (Codec.Decode24(n[46..]) << 4) & M32L; // x13:28/-- - long x14 = Codec.Decode32(n[49..]) & M32L; // x14:32/-- - long x15 = (Codec.Decode24(n[53..]) << 4) & M32L; // x15:28/-- - long x16 = Codec.Decode32(n[56..]) & M32L; // x16:32/-- - long x17 = (Codec.Decode24(n[60..]) << 4) & M32L; // x17:28/-- - long x18 = n[63] & M08L; // x18:08/-- + long x00 = Codec.Decode32(n[ 0..]); // x00:32/-- + long x01 = (Codec.Decode24(n[ 4..]) << 4); // x01:28/-- + long x02 = Codec.Decode32(n[ 7..]); // x02:32/-- + long x03 = (Codec.Decode24(n[11..]) << 4); // x03:28/-- + long x04 = Codec.Decode32(n[14..]); // x04:32/-- + long x05 = (Codec.Decode24(n[18..]) << 4); // x05:28/-- + long x06 = Codec.Decode32(n[21..]); // x06:32/-- + long x07 = (Codec.Decode24(n[25..]) << 4); // x07:28/-- + long x08 = Codec.Decode32(n[28..]); // x08:32/-- + long x09 = (Codec.Decode24(n[32..]) << 4); // x09:28/-- + long x10 = Codec.Decode32(n[35..]); // x10:32/-- + long x11 = (Codec.Decode24(n[39..]) << 4); // x11:28/-- + long x12 = Codec.Decode32(n[42..]); // x12:32/-- + long x13 = (Codec.Decode24(n[46..]) << 4); // x13:28/-- + long x14 = Codec.Decode32(n[49..]); // x14:32/-- + long x15 = (Codec.Decode24(n[53..]) << 4); // x15:28/-- + long x16 = Codec.Decode32(n[56..]); // x16:32/-- + long x17 = (Codec.Decode24(n[60..]) << 4); // x17:28/-- + long x18 = n[63]; // x18:08/-- long t; //x18 += (x17 >> 28); x17 &= M28L; @@ -488,15 +701,15 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 #endif #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - internal static void ToSignedDigits(int bits, ReadOnlySpan<uint> x, Span<uint> z) + internal static void ToSignedDigits(int bits, Span<uint> z) #else - internal static void ToSignedDigits(int bits, uint[] x, uint[] z) + internal static void ToSignedDigits(int bits, uint[] z) #endif { Debug.Assert(bits == 256); Debug.Assert(z.Length >= Size); - uint c1 = Nat.CAdd(Size, ~(int)x[0] & 1, x, L, z); Debug.Assert(c1 == 0U); + uint c1 = Nat.CAddTo(Size, ~(int)z[0] & 1, L, z); Debug.Assert(c1 == 0U); uint c2 = Nat.ShiftDownBit(Size, z, 1U); Debug.Assert(c2 == (1U << 31)); } } diff --git a/crypto/src/math/ec/rfc8032/Scalar448.cs b/crypto/src/math/ec/rfc8032/Scalar448.cs index 4afe1d2d6..124b91250 100644 --- a/crypto/src/math/ec/rfc8032/Scalar448.cs +++ b/crypto/src/math/ec/rfc8032/Scalar448.cs @@ -97,7 +97,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { Debug.Assert((int)y225[7] >> 31 == (int)y225[7]); - Span<uint> tt = stackalloc uint[29]; + Span<uint> tt = stackalloc uint[22]; Nat.Mul(y225, x, tt); if ((int)y225[7] < 0) @@ -106,9 +106,20 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Nat.SubFrom(Size, x, tt[8..]); } - Span<byte> r = MemoryMarshal.AsBytes(tt); - Reduce(r, r); - tt[..Size].CopyTo(z); + if (BitConverter.IsLittleEndian) + { + Span<byte> r = MemoryMarshal.AsBytes(tt); + Reduce704(r, r); + tt[..Size].CopyTo(z); + } + else + { + Span<byte> r = stackalloc byte[88]; + Codec.Encode32(tt, r); + + Reduce704(r, r); + Decode(r, z); + } } #else internal static void Multiply225Var(uint[] x, uint[] y225, uint[] z) @@ -124,20 +135,430 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 Nat.SubFrom(Size, x, 0, tt, 8); } - byte[] bytes = new byte[114]; + byte[] bytes = new byte[88]; Codec.Encode32(tt, 0, 22, bytes, 0); - byte[] r = Reduce(bytes); + byte[] r = Reduce704(bytes); Decode(r, z); } #endif - internal static byte[] Reduce(byte[] n) + internal static byte[] Reduce704(byte[] n) + { + byte[] r = new byte[ScalarBytes]; + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Reduce704(n, r); +#else + ulong x00 = Codec.Decode32(n, 0); // x00:32/-- + ulong x01 = (Codec.Decode24(n, 4) << 4); // x01:28/-- + ulong x02 = Codec.Decode32(n, 7); // x02:32/-- + ulong x03 = (Codec.Decode24(n, 11) << 4); // x03:28/-- + ulong x04 = Codec.Decode32(n, 14); // x04:32/-- + ulong x05 = (Codec.Decode24(n, 18) << 4); // x05:28/-- + ulong x06 = Codec.Decode32(n, 21); // x06:32/-- + ulong x07 = (Codec.Decode24(n, 25) << 4); // x07:28/-- + ulong x08 = Codec.Decode32(n, 28); // x08:32/-- + ulong x09 = (Codec.Decode24(n, 32) << 4); // x09:28/-- + ulong x10 = Codec.Decode32(n, 35); // x10:32/-- + ulong x11 = (Codec.Decode24(n, 39) << 4); // x11:28/-- + ulong x12 = Codec.Decode32(n, 42); // x12:32/-- + ulong x13 = (Codec.Decode24(n, 46) << 4); // x13:28/-- + ulong x14 = Codec.Decode32(n, 49); // x14:32/-- + ulong x15 = (Codec.Decode24(n, 53) << 4); // x15:28/-- + ulong x16 = Codec.Decode32(n, 56); // x16:32/-- + ulong x17 = (Codec.Decode24(n, 60) << 4); // x17:28/-- + ulong x18 = Codec.Decode32(n, 63); // x18:32/-- + ulong x19 = (Codec.Decode24(n, 67) << 4); // x19:28/-- + ulong x20 = Codec.Decode32(n, 70); // x20:32/-- + ulong x21 = (Codec.Decode24(n, 74) << 4); // x21:28/-- + ulong x22 = Codec.Decode32(n, 77); // x22:32/-- + ulong x23 = (Codec.Decode24(n, 81) << 4); // x23:28/-- + ulong x24 = Codec.Decode32(n, 84); // x24:32/-- + ulong x25 = 0UL; + + // TODO Fix bounds calculations which were copied from Reduce912 + + x25 += (x24 >> 28); x24 &= M28UL; // x25:28/--, x24:28/-- + x09 += x25 * L4_0; // x09:54/-- + x10 += x25 * L4_1; // x10:54/53 + x11 += x25 * L4_2; // x11:56/-- + x12 += x25 * L4_3; // x12:57/-- + x13 += x25 * L4_4; // x13:57/55 + x14 += x25 * L4_5; // x14:58/-- + x15 += x25 * L4_6; // x15:58/56 + x16 += x25 * L4_7; // x16:59/-- + + x21 += (x20 >> 28); x20 &= M28UL; // x21:58/--, x20:28/-- + x22 += (x21 >> 28); x21 &= M28UL; // x22:57/54, x21:28/-- + x23 += (x22 >> 28); x22 &= M28UL; // x23:45/42, x22:28/-- + x24 += (x23 >> 28); x23 &= M28UL; // x24:28/18, x23:28/-- + + x08 += x24 * L4_0; // x08:54/-- + x09 += x24 * L4_1; // x09:55/-- + x10 += x24 * L4_2; // x10:56/46 + x11 += x24 * L4_3; // x11:57/46 + x12 += x24 * L4_4; // x12:57/55 + x13 += x24 * L4_5; // x13:58/-- + x14 += x24 * L4_6; // x14:58/56 + x15 += x24 * L4_7; // x15:59/-- + + x07 += x23 * L4_0; // x07:54/-- + x08 += x23 * L4_1; // x08:54/53 + x09 += x23 * L4_2; // x09:56/53 + x10 += x23 * L4_3; // x10:57/46 + x11 += x23 * L4_4; // x11:57/55 + x12 += x23 * L4_5; // x12:58/-- + x13 += x23 * L4_6; // x13:58/56 + x14 += x23 * L4_7; // x14:59/-- + + x06 += x22 * L4_0; // x06:54/-- + x07 += x22 * L4_1; // x07:54/53 + x08 += x22 * L4_2; // x08:56/-- + x09 += x22 * L4_3; // x09:57/53 + x10 += x22 * L4_4; // x10:57/55 + x11 += x22 * L4_5; // x11:58/-- + x12 += x22 * L4_6; // x12:58/56 + x13 += x22 * L4_7; // x13:59/-- + + x18 += (x17 >> 28); x17 &= M28UL; // x18:59/31, x17:28/-- + x19 += (x18 >> 28); x18 &= M28UL; // x19:58/54, x18:28/-- + x20 += (x19 >> 28); x19 &= M28UL; // x20:30/29, x19:28/-- + x21 += (x20 >> 28); x20 &= M28UL; // x21:28/03, x20:28/-- + + x05 += x21 * L4_0; // x05:54/-- + x06 += x21 * L4_1; // x06:55/-- + x07 += x21 * L4_2; // x07:56/31 + x08 += x21 * L4_3; // x08:57/31 + x09 += x21 * L4_4; // x09:57/56 + x10 += x21 * L4_5; // x10:58/-- + x11 += x21 * L4_6; // x11:58/56 + x12 += x21 * L4_7; // x12:59/-- + + x04 += x20 * L4_0; // x04:54/-- + x05 += x20 * L4_1; // x05:54/53 + x06 += x20 * L4_2; // x06:56/53 + x07 += x20 * L4_3; // x07:57/31 + x08 += x20 * L4_4; // x08:57/55 + x09 += x20 * L4_5; // x09:58/-- + x10 += x20 * L4_6; // x10:58/56 + x11 += x20 * L4_7; // x11:59/-- + + x03 += x19 * L4_0; // x03:54/-- + x04 += x19 * L4_1; // x04:54/53 + x05 += x19 * L4_2; // x05:56/-- + x06 += x19 * L4_3; // x06:57/53 + x07 += x19 * L4_4; // x07:57/55 + x08 += x19 * L4_5; // x08:58/-- + x09 += x19 * L4_6; // x09:58/56 + x10 += x19 * L4_7; // x10:59/-- + + x15 += (x14 >> 28); x14 &= M28UL; // x15:59/31, x14:28/-- + x16 += (x15 >> 28); x15 &= M28UL; // x16:59/32, x15:28/-- + x17 += (x16 >> 28); x16 &= M28UL; // x17:31/29, x16:28/-- + x18 += (x17 >> 28); x17 &= M28UL; // x18:28/04, x17:28/-- + + x02 += x18 * L4_0; // x02:54/-- + x03 += x18 * L4_1; // x03:55/-- + x04 += x18 * L4_2; // x04:56/32 + x05 += x18 * L4_3; // x05:57/32 + x06 += x18 * L4_4; // x06:57/56 + x07 += x18 * L4_5; // x07:58/-- + x08 += x18 * L4_6; // x08:58/56 + x09 += x18 * L4_7; // x09:59/-- + + x01 += x17 * L4_0; // x01:54/-- + x02 += x17 * L4_1; // x02:54/53 + x03 += x17 * L4_2; // x03:56/53 + x04 += x17 * L4_3; // x04:57/32 + x05 += x17 * L4_4; // x05:57/55 + x06 += x17 * L4_5; // x06:58/-- + x07 += x17 * L4_6; // x07:58/56 + x08 += x17 * L4_7; // x08:59/-- + + x16 *= 4; + x16 += (x15 >> 26); x15 &= M26UL; + x16 += 1; // x16:30/01 + + x00 += x16 * L_0; + x01 += x16 * L_1; + x02 += x16 * L_2; + x03 += x16 * L_3; + x04 += x16 * L_4; + x05 += x16 * L_5; + x06 += x16 * L_6; + x07 += x16 * L_7; + + x01 += (x00 >> 28); x00 &= M28UL; + x02 += (x01 >> 28); x01 &= M28UL; + x03 += (x02 >> 28); x02 &= M28UL; + x04 += (x03 >> 28); x03 &= M28UL; + x05 += (x04 >> 28); x04 &= M28UL; + x06 += (x05 >> 28); x05 &= M28UL; + x07 += (x06 >> 28); x06 &= M28UL; + x08 += (x07 >> 28); x07 &= M28UL; + x09 += (x08 >> 28); x08 &= M28UL; + x10 += (x09 >> 28); x09 &= M28UL; + x11 += (x10 >> 28); x10 &= M28UL; + x12 += (x11 >> 28); x11 &= M28UL; + x13 += (x12 >> 28); x12 &= M28UL; + x14 += (x13 >> 28); x13 &= M28UL; + x15 += (x14 >> 28); x14 &= M28UL; + x16 = (x15 >> 26); x15 &= M26UL; + + x16 -= 1; + + Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue); + + x00 -= x16 & L_0; + x01 -= x16 & L_1; + x02 -= x16 & L_2; + x03 -= x16 & L_3; + x04 -= x16 & L_4; + x05 -= x16 & L_5; + x06 -= x16 & L_6; + x07 -= x16 & L_7; + + x01 += (ulong)((long)x00 >> 28); x00 &= M28UL; + x02 += (ulong)((long)x01 >> 28); x01 &= M28UL; + x03 += (ulong)((long)x02 >> 28); x02 &= M28UL; + x04 += (ulong)((long)x03 >> 28); x03 &= M28UL; + x05 += (ulong)((long)x04 >> 28); x04 &= M28UL; + x06 += (ulong)((long)x05 >> 28); x05 &= M28UL; + x07 += (ulong)((long)x06 >> 28); x06 &= M28UL; + x08 += (ulong)((long)x07 >> 28); x07 &= M28UL; + x09 += (ulong)((long)x08 >> 28); x08 &= M28UL; + x10 += (ulong)((long)x09 >> 28); x09 &= M28UL; + x11 += (ulong)((long)x10 >> 28); x10 &= M28UL; + x12 += (ulong)((long)x11 >> 28); x11 &= M28UL; + x13 += (ulong)((long)x12 >> 28); x12 &= M28UL; + x14 += (ulong)((long)x13 >> 28); x13 &= M28UL; + x15 += (ulong)((long)x14 >> 28); x14 &= M28UL; + + Debug.Assert(x15 >> 26 == 0UL); + + Codec.Encode56(x00 | (x01 << 28), r, 0); + Codec.Encode56(x02 | (x03 << 28), r, 7); + Codec.Encode56(x04 | (x05 << 28), r, 14); + Codec.Encode56(x06 | (x07 << 28), r, 21); + Codec.Encode56(x08 | (x09 << 28), r, 28); + Codec.Encode56(x10 | (x11 << 28), r, 35); + Codec.Encode56(x12 | (x13 << 28), r, 42); + Codec.Encode56(x14 | (x15 << 28), r, 49); + //r[ScalarBytes - 1] = 0; +#endif + + return r; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void Reduce704(ReadOnlySpan<byte> n, Span<byte> r) + { + ulong x00 = Codec.Decode32(n[ 0..]); // x00:32/-- + ulong x01 = (Codec.Decode24(n[ 4..]) << 4); // x01:28/-- + ulong x02 = Codec.Decode32(n[ 7..]); // x02:32/-- + ulong x03 = (Codec.Decode24(n[ 11..]) << 4); // x03:28/-- + ulong x04 = Codec.Decode32(n[ 14..]); // x04:32/-- + ulong x05 = (Codec.Decode24(n[ 18..]) << 4); // x05:28/-- + ulong x06 = Codec.Decode32(n[ 21..]); // x06:32/-- + ulong x07 = (Codec.Decode24(n[ 25..]) << 4); // x07:28/-- + ulong x08 = Codec.Decode32(n[ 28..]); // x08:32/-- + ulong x09 = (Codec.Decode24(n[ 32..]) << 4); // x09:28/-- + ulong x10 = Codec.Decode32(n[ 35..]); // x10:32/-- + ulong x11 = (Codec.Decode24(n[ 39..]) << 4); // x11:28/-- + ulong x12 = Codec.Decode32(n[ 42..]); // x12:32/-- + ulong x13 = (Codec.Decode24(n[ 46..]) << 4); // x13:28/-- + ulong x14 = Codec.Decode32(n[ 49..]); // x14:32/-- + ulong x15 = (Codec.Decode24(n[ 53..]) << 4); // x15:28/-- + ulong x16 = Codec.Decode32(n[ 56..]); // x16:32/-- + ulong x17 = (Codec.Decode24(n[ 60..]) << 4); // x17:28/-- + ulong x18 = Codec.Decode32(n[ 63..]); // x18:32/-- + ulong x19 = (Codec.Decode24(n[ 67..]) << 4); // x19:28/-- + ulong x20 = Codec.Decode32(n[ 70..]); // x20:32/-- + ulong x21 = (Codec.Decode24(n[ 74..]) << 4); // x21:28/-- + ulong x22 = Codec.Decode32(n[ 77..]); // x22:32/-- + ulong x23 = (Codec.Decode24(n[ 81..]) << 4); // x23:28/-- + ulong x24 = Codec.Decode32(n[ 84..]); // x24:32/-- + ulong x25 = 0UL; + + // TODO Fix bounds calculations which were copied from Reduce912 + + x25 += (x24 >> 28); x24 &= M28UL; // x25:28/--, x24:28/-- + x09 += x25 * L4_0; // x09:54/-- + x10 += x25 * L4_1; // x10:54/53 + x11 += x25 * L4_2; // x11:56/-- + x12 += x25 * L4_3; // x12:57/-- + x13 += x25 * L4_4; // x13:57/55 + x14 += x25 * L4_5; // x14:58/-- + x15 += x25 * L4_6; // x15:58/56 + x16 += x25 * L4_7; // x16:59/-- + + x21 += (x20 >> 28); x20 &= M28UL; // x21:58/--, x20:28/-- + x22 += (x21 >> 28); x21 &= M28UL; // x22:57/54, x21:28/-- + x23 += (x22 >> 28); x22 &= M28UL; // x23:45/42, x22:28/-- + x24 += (x23 >> 28); x23 &= M28UL; // x24:28/18, x23:28/-- + + x08 += x24 * L4_0; // x08:54/-- + x09 += x24 * L4_1; // x09:55/-- + x10 += x24 * L4_2; // x10:56/46 + x11 += x24 * L4_3; // x11:57/46 + x12 += x24 * L4_4; // x12:57/55 + x13 += x24 * L4_5; // x13:58/-- + x14 += x24 * L4_6; // x14:58/56 + x15 += x24 * L4_7; // x15:59/-- + + x07 += x23 * L4_0; // x07:54/-- + x08 += x23 * L4_1; // x08:54/53 + x09 += x23 * L4_2; // x09:56/53 + x10 += x23 * L4_3; // x10:57/46 + x11 += x23 * L4_4; // x11:57/55 + x12 += x23 * L4_5; // x12:58/-- + x13 += x23 * L4_6; // x13:58/56 + x14 += x23 * L4_7; // x14:59/-- + + x06 += x22 * L4_0; // x06:54/-- + x07 += x22 * L4_1; // x07:54/53 + x08 += x22 * L4_2; // x08:56/-- + x09 += x22 * L4_3; // x09:57/53 + x10 += x22 * L4_4; // x10:57/55 + x11 += x22 * L4_5; // x11:58/-- + x12 += x22 * L4_6; // x12:58/56 + x13 += x22 * L4_7; // x13:59/-- + + x18 += (x17 >> 28); x17 &= M28UL; // x18:59/31, x17:28/-- + x19 += (x18 >> 28); x18 &= M28UL; // x19:58/54, x18:28/-- + x20 += (x19 >> 28); x19 &= M28UL; // x20:30/29, x19:28/-- + x21 += (x20 >> 28); x20 &= M28UL; // x21:28/03, x20:28/-- + + x05 += x21 * L4_0; // x05:54/-- + x06 += x21 * L4_1; // x06:55/-- + x07 += x21 * L4_2; // x07:56/31 + x08 += x21 * L4_3; // x08:57/31 + x09 += x21 * L4_4; // x09:57/56 + x10 += x21 * L4_5; // x10:58/-- + x11 += x21 * L4_6; // x11:58/56 + x12 += x21 * L4_7; // x12:59/-- + + x04 += x20 * L4_0; // x04:54/-- + x05 += x20 * L4_1; // x05:54/53 + x06 += x20 * L4_2; // x06:56/53 + x07 += x20 * L4_3; // x07:57/31 + x08 += x20 * L4_4; // x08:57/55 + x09 += x20 * L4_5; // x09:58/-- + x10 += x20 * L4_6; // x10:58/56 + x11 += x20 * L4_7; // x11:59/-- + + x03 += x19 * L4_0; // x03:54/-- + x04 += x19 * L4_1; // x04:54/53 + x05 += x19 * L4_2; // x05:56/-- + x06 += x19 * L4_3; // x06:57/53 + x07 += x19 * L4_4; // x07:57/55 + x08 += x19 * L4_5; // x08:58/-- + x09 += x19 * L4_6; // x09:58/56 + x10 += x19 * L4_7; // x10:59/-- + + x15 += (x14 >> 28); x14 &= M28UL; // x15:59/31, x14:28/-- + x16 += (x15 >> 28); x15 &= M28UL; // x16:59/32, x15:28/-- + x17 += (x16 >> 28); x16 &= M28UL; // x17:31/29, x16:28/-- + x18 += (x17 >> 28); x17 &= M28UL; // x18:28/04, x17:28/-- + + x02 += x18 * L4_0; // x02:54/-- + x03 += x18 * L4_1; // x03:55/-- + x04 += x18 * L4_2; // x04:56/32 + x05 += x18 * L4_3; // x05:57/32 + x06 += x18 * L4_4; // x06:57/56 + x07 += x18 * L4_5; // x07:58/-- + x08 += x18 * L4_6; // x08:58/56 + x09 += x18 * L4_7; // x09:59/-- + + x01 += x17 * L4_0; // x01:54/-- + x02 += x17 * L4_1; // x02:54/53 + x03 += x17 * L4_2; // x03:56/53 + x04 += x17 * L4_3; // x04:57/32 + x05 += x17 * L4_4; // x05:57/55 + x06 += x17 * L4_5; // x06:58/-- + x07 += x17 * L4_6; // x07:58/56 + x08 += x17 * L4_7; // x08:59/-- + + x16 *= 4; + x16 += (x15 >> 26); x15 &= M26UL; + x16 += 1; // x16:30/01 + + x00 += x16 * L_0; + x01 += x16 * L_1; + x02 += x16 * L_2; + x03 += x16 * L_3; + x04 += x16 * L_4; + x05 += x16 * L_5; + x06 += x16 * L_6; + x07 += x16 * L_7; + + x01 += (x00 >> 28); x00 &= M28UL; + x02 += (x01 >> 28); x01 &= M28UL; + x03 += (x02 >> 28); x02 &= M28UL; + x04 += (x03 >> 28); x03 &= M28UL; + x05 += (x04 >> 28); x04 &= M28UL; + x06 += (x05 >> 28); x05 &= M28UL; + x07 += (x06 >> 28); x06 &= M28UL; + x08 += (x07 >> 28); x07 &= M28UL; + x09 += (x08 >> 28); x08 &= M28UL; + x10 += (x09 >> 28); x09 &= M28UL; + x11 += (x10 >> 28); x10 &= M28UL; + x12 += (x11 >> 28); x11 &= M28UL; + x13 += (x12 >> 28); x12 &= M28UL; + x14 += (x13 >> 28); x13 &= M28UL; + x15 += (x14 >> 28); x14 &= M28UL; + x16 = (x15 >> 26); x15 &= M26UL; + + x16 -= 1; + + Debug.Assert(x16 == 0UL || x16 == ulong.MaxValue); + + x00 -= x16 & L_0; + x01 -= x16 & L_1; + x02 -= x16 & L_2; + x03 -= x16 & L_3; + x04 -= x16 & L_4; + x05 -= x16 & L_5; + x06 -= x16 & L_6; + x07 -= x16 & L_7; + + x01 += (ulong)((long)x00 >> 28); x00 &= M28UL; + x02 += (ulong)((long)x01 >> 28); x01 &= M28UL; + x03 += (ulong)((long)x02 >> 28); x02 &= M28UL; + x04 += (ulong)((long)x03 >> 28); x03 &= M28UL; + x05 += (ulong)((long)x04 >> 28); x04 &= M28UL; + x06 += (ulong)((long)x05 >> 28); x05 &= M28UL; + x07 += (ulong)((long)x06 >> 28); x06 &= M28UL; + x08 += (ulong)((long)x07 >> 28); x07 &= M28UL; + x09 += (ulong)((long)x08 >> 28); x08 &= M28UL; + x10 += (ulong)((long)x09 >> 28); x09 &= M28UL; + x11 += (ulong)((long)x10 >> 28); x10 &= M28UL; + x12 += (ulong)((long)x11 >> 28); x11 &= M28UL; + x13 += (ulong)((long)x12 >> 28); x12 &= M28UL; + x14 += (ulong)((long)x13 >> 28); x13 &= M28UL; + x15 += (ulong)((long)x14 >> 28); x14 &= M28UL; + + Debug.Assert(x15 >> 26 == 0UL); + + Codec.Encode56(x00 | (x01 << 28), r); + Codec.Encode56(x02 | (x03 << 28), r[7..]); + Codec.Encode56(x04 | (x05 << 28), r[14..]); + Codec.Encode56(x06 | (x07 << 28), r[21..]); + Codec.Encode56(x08 | (x09 << 28), r[28..]); + Codec.Encode56(x10 | (x11 << 28), r[35..]); + Codec.Encode56(x12 | (x13 << 28), r[42..]); + Codec.Encode56(x14 | (x15 << 28), r[49..]); + r[ScalarBytes - 1] = 0; + } +#endif + + internal static byte[] Reduce912(byte[] n) { byte[] r = new byte[ScalarBytes]; #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - Reduce(n, r); + Reduce912(n, r); #else ulong x00 = Codec.Decode32(n, 0); // x00:32/-- ulong x01 = (Codec.Decode24(n, 4) << 4); // x01:28/-- @@ -416,7 +837,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - internal static void Reduce(ReadOnlySpan<byte> n, Span<byte> r) + internal static void Reduce912(ReadOnlySpan<byte> n, Span<byte> r) { ulong x00 = Codec.Decode32(n[ 0..]); // x00:32/-- ulong x01 = (Codec.Decode24(n[ 4..]) << 4); // x01:28/-- diff --git a/crypto/src/math/ec/rfc8032/Wnaf.cs b/crypto/src/math/ec/rfc8032/Wnaf.cs index 88319f405..209934031 100644 --- a/crypto/src/math/ec/rfc8032/Wnaf.cs +++ b/crypto/src/math/ec/rfc8032/Wnaf.cs @@ -42,7 +42,7 @@ namespace Org.BouncyCastle.Math.EC.Rfc8032 { int word16 = (int)(word >> j); - int skip = Integers.NumberOfTrailingZeros((sign ^ word16) | 0x00010000); + int skip = Integers.NumberOfTrailingZeros((sign ^ word16) | (1 << 16)); if (skip > 0) { j += skip; diff --git a/crypto/src/math/raw/Interleave.cs b/crypto/src/math/raw/Interleave.cs index 8082ce57c..e71f8e394 100644 --- a/crypto/src/math/raw/Interleave.cs +++ b/crypto/src/math/raw/Interleave.cs @@ -17,7 +17,7 @@ namespace Org.BouncyCastle.Math.Raw uint t = x; #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return Bmi2.ParallelBitDeposit(t, 0x55555555U); } @@ -33,7 +33,7 @@ namespace Org.BouncyCastle.Math.Raw uint t = x; #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return Bmi2.ParallelBitDeposit(t, 0x55555555U); } @@ -48,7 +48,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Expand32to64(uint x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return (ulong)Bmi2.ParallelBitDeposit(x >> 16, 0x55555555U) << 32 | Bmi2.ParallelBitDeposit(x , 0x55555555U); @@ -67,7 +67,7 @@ namespace Org.BouncyCastle.Math.Raw internal static void Expand64To128(ulong x, ulong[] z, int zOff) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { z[zOff ] = Bmi2.X64.ParallelBitDeposit(x , 0x5555555555555555UL); z[zOff + 1] = Bmi2.X64.ParallelBitDeposit(x >> 32, 0x5555555555555555UL); @@ -90,7 +90,7 @@ namespace Org.BouncyCastle.Math.Raw internal static void Expand64To128(ulong x, Span<ulong> z) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { z[0] = Bmi2.X64.ParallelBitDeposit(x , 0x5555555555555555UL); z[1] = Bmi2.X64.ParallelBitDeposit(x >> 32, 0x5555555555555555UL); @@ -136,7 +136,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Expand64To128Rev(ulong x, out ulong low) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { low = Bmi2.X64.ParallelBitDeposit(x >> 32, 0xAAAAAAAAAAAAAAAAUL); return Bmi2.X64.ParallelBitDeposit(x , 0xAAAAAAAAAAAAAAAAUL); @@ -157,7 +157,7 @@ namespace Org.BouncyCastle.Math.Raw internal static uint Shuffle(uint x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return Bmi2.ParallelBitDeposit(x >> 16, 0xAAAAAAAAU) | Bmi2.ParallelBitDeposit(x , 0x55555555U); @@ -175,7 +175,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Shuffle(ulong x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { return Bmi2.X64.ParallelBitDeposit(x >> 32, 0xAAAAAAAAAAAAAAAAUL) | Bmi2.X64.ParallelBitDeposit(x , 0x5555555555555555UL); @@ -194,7 +194,7 @@ namespace Org.BouncyCastle.Math.Raw internal static uint Shuffle2(uint x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return Bmi2.ParallelBitDeposit(x >> 24, 0x88888888U) | Bmi2.ParallelBitDeposit(x >> 16, 0x44444444U) @@ -219,7 +219,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Shuffle2(ulong x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { return Bmi2.X64.ParallelBitDeposit(x >> 48, 0x8888888888888888UL) | Bmi2.X64.ParallelBitDeposit(x >> 32, 0x4444444444444444UL) @@ -242,7 +242,7 @@ namespace Org.BouncyCastle.Math.Raw internal static uint Unshuffle(uint x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return Bmi2.ParallelBitExtract(x, 0xAAAAAAAAU) << 16 | Bmi2.ParallelBitExtract(x, 0x55555555U); @@ -260,7 +260,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Unshuffle(ulong x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { return Bmi2.X64.ParallelBitExtract(x, 0xAAAAAAAAAAAAAAAAUL) << 32 | Bmi2.X64.ParallelBitExtract(x, 0x5555555555555555UL); @@ -279,7 +279,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Unshuffle(ulong x, out ulong even) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { even = Bmi2.X64.ParallelBitExtract(x, 0x5555555555555555UL); return Bmi2.X64.ParallelBitExtract(x, 0xAAAAAAAAAAAAAAAAUL); @@ -294,7 +294,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Unshuffle(ulong x0, ulong x1, out ulong even) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { even = Bmi2.X64.ParallelBitExtract(x0, 0x5555555555555555UL) | Bmi2.X64.ParallelBitExtract(x1, 0x5555555555555555UL) << 32; @@ -312,7 +312,7 @@ namespace Org.BouncyCastle.Math.Raw internal static uint Unshuffle2(uint x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.IsEnabled) { return Bmi2.ParallelBitExtract(x, 0x88888888U) << 24 | Bmi2.ParallelBitExtract(x, 0x44444444U) << 16 @@ -337,7 +337,7 @@ namespace Org.BouncyCastle.Math.Raw internal static ulong Unshuffle2(ulong x) { #if NETCOREAPP3_0_OR_GREATER - if (Bmi2.X64.IsSupported) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Bmi2.X64.IsEnabled) { return Bmi2.X64.ParallelBitExtract(x, 0x8888888888888888UL) << 48 | Bmi2.X64.ParallelBitExtract(x, 0x4444444444444444UL) << 32 diff --git a/crypto/src/math/raw/Mod.cs b/crypto/src/math/raw/Mod.cs index f1ca2ebf0..9059e479c 100644 --- a/crypto/src/math/raw/Mod.cs +++ b/crypto/src/math/raw/Mod.cs @@ -7,16 +7,21 @@ using Org.BouncyCastle.Utilities; namespace Org.BouncyCastle.Math.Raw { - /* - * Modular inversion as implemented in this class is based on the paper "Fast constant-time gcd - * computation and modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. - */ - + /// <summary> + /// Modular inversion as implemented in this class is based on the paper "Fast constant-time gcd computation and + /// modular inversion" by Daniel J. Bernstein and Bo-Yin Yang. + /// </summary> + /// <remarks> + /// In some cases (when it is faster) we use the "half delta" variant of safegcd based on + /// <a href="https://github.com/sipa/safegcd-bounds">hddivsteps</a>. + /// </remarks> internal static class Mod { private const int M30 = 0x3FFFFFFF; private const ulong M32UL = 0xFFFFFFFFUL; + private static readonly int MaxStackAlloc = Platform.Is64BitProcess ? 4096 : 1024; + #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER public static void CheckedModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z) #else @@ -66,11 +71,12 @@ namespace Org.BouncyCastle.Math.Raw return x; } - public static uint ModOddInverse(uint[] m, uint[] x, uint[] z) - { #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - return ModOddInverse(m.AsSpan(), x.AsSpan(), z.AsSpan()); + public static uint ModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z) #else + public static uint ModOddInverse(uint[] m, uint[] x, uint[] z) +#endif + { int len32 = m.Length; Debug.Assert(len32 > 0); Debug.Assert((m[0] & 1) != 0); @@ -79,25 +85,45 @@ namespace Org.BouncyCastle.Math.Raw int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]); int len30 = (bits + 29) / 30; +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + int allocSize = len30 * 5; + Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc) + ? stackalloc int[allocSize] + : new int[allocSize]; + + Span<int> t = stackalloc int[4]; + Span<int> D = alloc[..len30]; alloc = alloc[len30..]; + Span<int> E = alloc[..len30]; alloc = alloc[len30..]; + Span<int> F = alloc[..len30]; alloc = alloc[len30..]; + Span<int> G = alloc[..len30]; alloc = alloc[len30..]; + Span<int> M = alloc[..len30]; +#else int[] t = new int[4]; int[] D = new int[len30]; int[] E = new int[len30]; int[] F = new int[len30]; int[] G = new int[len30]; int[] M = new int[len30]; +#endif E[0] = 1; - Encode30(bits, x, 0, G, 0); - Encode30(bits, m, 0, M, 0); + Encode30(bits, x, G); + Encode30(bits, m, M); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + M.CopyTo(F); +#else Array.Copy(M, 0, F, 0, len30); +#endif - int delta = 0; + // We use the "half delta" variant here, with theta == delta - 1/2 + int theta = 0; int m0Inv32 = (int)Inverse32((uint)M[0]); - int maxDivsteps = GetMaximumDivsteps(bits); + int maxDivsteps = GetMaximumHDDivsteps(bits); for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30) { - delta = Divsteps30(delta, F[0], G[0], t); + theta = HDDivsteps30(theta, F[0], G[0], t); UpdateDE30(len30, D, E, t, m0Inv32, M); UpdateFG30(len30, F, G, t); } @@ -107,15 +133,17 @@ namespace Org.BouncyCastle.Math.Raw CNormalize30(len30, signF, D, M); - Decode30(bits, D, 0, z, 0); + Decode30(bits, D, z); Debug.Assert(0 != Nat.LessThan(m.Length, z, m)); - return (uint)(EqualTo(len30, F, 1) & EqualToZero(len30, G)); -#endif + return (uint)(EqualTo(len30, F, 1) & EqualTo(len30, G, 0)); } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static uint ModOddInverse(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z) + public static bool ModOddInverseVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z) +#else + public static bool ModOddInverseVar(uint[] m, uint[] x, uint[] z) +#endif { int len32 = m.Length; Debug.Assert(len32 > 0); @@ -125,9 +153,14 @@ namespace Org.BouncyCastle.Math.Raw int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]); int len30 = (bits + 29) / 30; - Span<int> alloc = len30 <= 50 - ? stackalloc int[len30 * 5] - : new int[len30 * 5]; + int clz = bits - Nat.GetBitLength(len32, x); + Debug.Assert(clz >= 0); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + int allocSize = len30 * 5; + Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc) + ? stackalloc int[allocSize] + : new int[allocSize]; Span<int> t = stackalloc int[4]; Span<int> D = alloc[..len30]; alloc = alloc[len30..]; @@ -135,68 +168,34 @@ namespace Org.BouncyCastle.Math.Raw Span<int> F = alloc[..len30]; alloc = alloc[len30..]; Span<int> G = alloc[..len30]; alloc = alloc[len30..]; Span<int> M = alloc[..len30]; - - E[0] = 1; - Encode30(bits, x, G); - Encode30(bits, m, M); - M.CopyTo(F); - - int delta = 0; - int m0Inv32 = (int)Inverse32((uint)M[0]); - int maxDivsteps = GetMaximumDivsteps(bits); - - for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30) - { - delta = Divsteps30(delta, F[0], G[0], t); - UpdateDE30(len30, D, E, t, m0Inv32, M); - UpdateFG30(len30, F, G, t); - } - - int signF = F[len30 - 1] >> 31; - CNegate30(len30, signF, F); - - CNormalize30(len30, signF, D, M); - - Decode30(bits, D, z); - Debug.Assert(0 != Nat.LessThan(m.Length, z, m)); - - return (uint)(EqualTo(len30, F, 1) & EqualToZero(len30, G)); - } -#endif - - public static bool ModOddInverseVar(uint[] m, uint[] x, uint[] z) - { -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - return ModOddInverseVar(m.AsSpan(), x.AsSpan(), z.AsSpan()); #else - int len32 = m.Length; - Debug.Assert(len32 > 0); - Debug.Assert((m[0] & 1) != 0); - Debug.Assert(m[len32 - 1] != 0); - - int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]); - int len30 = (bits + 29) / 30; - int[] t = new int[4]; int[] D = new int[len30]; int[] E = new int[len30]; int[] F = new int[len30]; int[] G = new int[len30]; int[] M = new int[len30]; +#endif E[0] = 1; - Encode30(bits, x, 0, G, 0); - Encode30(bits, m, 0, M, 0); + Encode30(bits, x, G); + Encode30(bits, m, M); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + M.CopyTo(F); +#else Array.Copy(M, 0, F, 0, len30); +#endif - int clzG = Integers.NumberOfLeadingZeros(G[len30 - 1] | 1) - (len30 * 30 + 2 - bits); - int eta = -1 - clzG; + // We use the original safegcd here, with eta == 1 - delta + // For shorter x, configure as if low zeros of x had been shifted away by divsteps + int eta = -clz; int lenDE = len30, lenFG = len30; int m0Inv32 = (int)Inverse32((uint)M[0]); int maxDivsteps = GetMaximumDivsteps(bits); - int divsteps = 0; - while (!EqualToZeroVar_Unlikely(lenFG, G)) + int divsteps = clz; + while (!EqualToVar(lenFG, G, 0)) { if (divsteps >= maxDivsteps) return false; @@ -206,20 +205,7 @@ namespace Org.BouncyCastle.Math.Raw eta = Divsteps30Var(eta, F[0], G[0], t); UpdateDE30(lenDE, D, E, t, m0Inv32, M); UpdateFG30(lenFG, F, G, t); - - int fn = F[lenFG - 1]; - int gn = G[lenFG - 1]; - - int cond = (lenFG - 2) >> 31; - cond |= fn ^ (fn >> 31); - cond |= gn ^ (gn >> 31); - - if (cond == 0) - { - F[lenFG - 2] |= fn << 30; - G[lenFG - 2] |= gn << 30; - --lenFG; - } + lenFG = TrimFG30Var(lenFG, F, G); } int signF = F[lenFG - 1] >> 31; @@ -241,7 +227,7 @@ namespace Org.BouncyCastle.Math.Raw } Debug.Assert(0 == signF); - if (!EqualToOneVar_Expected(lenFG, F)) + if (!EqualToVar(lenFG, F, 1)) return false; if (signD < 0) @@ -250,15 +236,73 @@ namespace Org.BouncyCastle.Math.Raw } Debug.Assert(0 == signD); - Decode30(bits, D, 0, z, 0); + Decode30(bits, D, z); Debug.Assert(!Nat.Gte(m.Length, z, m)); return true; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static uint ModOddIsCoprime(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x) +#else + public static uint ModOddIsCoprime(uint[] m, uint[] x) +#endif + { + int len32 = m.Length; + Debug.Assert(len32 > 0); + Debug.Assert((m[0] & 1) != 0); + Debug.Assert(m[len32 - 1] != 0); + + int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]); + int len30 = (bits + 29) / 30; + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + int allocSize = len30 * 3; + Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc) + ? stackalloc int[allocSize] + : new int[allocSize]; + + Span<int> t = stackalloc int[4]; + Span<int> F = alloc[..len30]; alloc = alloc[len30..]; + Span<int> G = alloc[..len30]; alloc = alloc[len30..]; + Span<int> M = alloc[..len30]; +#else + int[] t = new int[4]; + int[] F = new int[len30]; + int[] G = new int[len30]; + int[] M = new int[len30]; #endif + + Encode30(bits, x, G); + Encode30(bits, m, M); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + M.CopyTo(F); +#else + Array.Copy(M, 0, F, 0, len30); +#endif + + // We use the "half delta" variant here, with theta == delta - 1/2 + int theta = 0; + int maxDivsteps = GetMaximumHDDivsteps(bits); + + for (int divSteps = 0; divSteps < maxDivsteps; divSteps += 30) + { + theta = HDDivsteps30(theta, F[0], G[0], t); + UpdateFG30(len30, F, G, t); + } + + int signF = F[len30 - 1] >> 31; + CNegate30(len30, signF, F); + + return (uint)(EqualTo(len30, F, 1) & EqualTo(len30, G, 0)); } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static bool ModOddInverseVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x, Span<uint> z) + public static bool ModOddIsCoprimeVar(ReadOnlySpan<uint> m, ReadOnlySpan<uint> x) +#else + public static bool ModOddIsCoprimeVar(uint[] m, uint[] x) +#endif { int len32 = m.Length; Debug.Assert(len32 > 0); @@ -268,30 +312,43 @@ namespace Org.BouncyCastle.Math.Raw int bits = (len32 << 5) - Integers.NumberOfLeadingZeros((int)m[len32 - 1]); int len30 = (bits + 29) / 30; - Span<int> alloc = len30 <= 50 - ? stackalloc int[len30 * 5] - : new int[len30 * 5]; + int clz = bits - Nat.GetBitLength(len32, x); + Debug.Assert(clz >= 0); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + int allocSize = len30 * 3; + Span<int> alloc = (allocSize * Integers.NumBytes <= MaxStackAlloc) + ? stackalloc int[allocSize] + : new int[allocSize]; Span<int> t = stackalloc int[4]; - Span<int> D = alloc[..len30]; alloc = alloc[len30..]; - Span<int> E = alloc[..len30]; alloc = alloc[len30..]; Span<int> F = alloc[..len30]; alloc = alloc[len30..]; Span<int> G = alloc[..len30]; alloc = alloc[len30..]; Span<int> M = alloc[..len30]; +#else + int[] t = new int[4]; + int[] F = new int[len30]; + int[] G = new int[len30]; + int[] M = new int[len30]; +#endif - E[0] = 1; Encode30(bits, x, G); Encode30(bits, m, M); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER M.CopyTo(F); +#else + Array.Copy(M, 0, F, 0, len30); +#endif - int clzG = Integers.NumberOfLeadingZeros(G[len30 - 1] | 1) - (len30 * 30 + 2 - bits); - int eta = -1 - clzG; - int lenDE = len30, lenFG = len30; - int m0Inv32 = (int)Inverse32((uint)M[0]); + // We use the original safegcd here, with eta == 1 - delta + // For shorter x, configure as if low zeros of x had been shifted away by divsteps + int eta = -clz; + int lenFG = len30; int maxDivsteps = GetMaximumDivsteps(bits); - int divsteps = 0; - while (!EqualToZeroVar_Unlikely(lenFG, G)) + int divsteps = clz; + while (!EqualToVar(lenFG, G, 0)) { if (divsteps >= maxDivsteps) return false; @@ -299,58 +356,19 @@ namespace Org.BouncyCastle.Math.Raw divsteps += 30; eta = Divsteps30Var(eta, F[0], G[0], t); - UpdateDE30(lenDE, D, E, t, m0Inv32, M); UpdateFG30(lenFG, F, G, t); - - int fn = F[lenFG - 1]; - int gn = G[lenFG - 1]; - - int cond = (lenFG - 2) >> 31; - cond |= fn ^ (fn >> 31); - cond |= gn ^ (gn >> 31); - - if (cond == 0) - { - F[lenFG - 2] |= fn << 30; - G[lenFG - 2] |= gn << 30; - --lenFG; - } + lenFG = TrimFG30Var(lenFG, F, G); } int signF = F[lenFG - 1] >> 31; - - /* - * D is in the range (-2.M, M). First, conditionally add M if D is negative, to bring it - * into the range (-M, M). Then normalize by conditionally negating (according to signF) - * and/or then adding M, to bring it into the range [0, M). - */ - int signD = D[lenDE - 1] >> 31; - if (signD < 0) - { - signD = Add30(lenDE, D, M); - } if (signF < 0) { - signD = Negate30(lenDE, D); signF = Negate30(lenFG, F); } Debug.Assert(0 == signF); - if (!EqualToOneVar_Expected(lenFG, F)) - return false; - - if (signD < 0) - { - signD = Add30(lenDE, D, M); - } - Debug.Assert(0 == signD); - - Decode30(bits, D, z); - Debug.Assert(!Nat.Gte(m.Length, z, m)); - - return true; + return EqualToVar(lenFG, F, 1); } -#endif public static uint[] Random(SecureRandom random, uint[] p) { @@ -392,9 +410,10 @@ namespace Org.BouncyCastle.Math.Raw m |= m >> 8; m |= m >> 16; - Span<byte> bytes = len <= 256 - ? stackalloc byte[len << 2] - : new byte[len << 2]; + int allocSize = len * Integers.NumBytes; + Span<byte> bytes = allocSize <= MaxStackAlloc + ? stackalloc byte[allocSize] + : new byte[allocSize]; do { @@ -496,34 +515,16 @@ namespace Org.BouncyCastle.Math.Raw #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER private static void Decode30(int bits, ReadOnlySpan<int> x, Span<uint> z) - { - Debug.Assert(bits > 0); - - int avail = 0; - ulong data = 0L; - - int xOff = 0, zOff = 0; - while (bits > 0) - { - while (avail < System.Math.Min(32, bits)) - { - data |= (ulong)x[xOff++] << avail; - avail += 30; - } - - z[zOff++] = (uint)data; data >>= 32; - avail -= 32; - bits -= 32; - } - } #else - private static void Decode30(int bits, int[] x, int xOff, uint[] z, int zOff) + private static void Decode30(int bits, int[] x, uint[] z) +#endif { Debug.Assert(bits > 0); int avail = 0; - ulong data = 0L; + ulong data = 0UL; + int xOff = 0, zOff = 0; while (bits > 0) { while (avail < System.Math.Min(32, bits)) @@ -537,53 +538,6 @@ namespace Org.BouncyCastle.Math.Raw bits -= 32; } } -#endif - -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static int Divsteps30(int delta, int f0, int g0, Span<int> t) -#else - private static int Divsteps30(int delta, int f0, int g0, int[] t) -#endif - { - int u = 1 << 30, v = 0, q = 0, r = 1 << 30; - int f = f0, g = g0; - - for (int i = 0; i < 30; ++i) - { - Debug.Assert((f & 1) == 1); - Debug.Assert(((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0) == f << i); - Debug.Assert(((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0) == g << i); - - int c1 = delta >> 31; - int c2 = -(g & 1); - - int x = f ^ c1; - int y = u ^ c1; - int z = v ^ c1; - - g -= x & c2; - q -= y & c2; - r -= z & c2; - - c2 &= ~c1; - delta = (delta ^ c2) - (c2 - 1); - - f += g & c2; - u += q & c2; - v += r & c2; - - g >>= 1; - q >>= 1; - r >>= 1; - } - - t[0] = u; - t[1] = v; - t[2] = q; - t[3] = r; - - return delta; - } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER private static int Divsteps30Var(int eta, int f0, int g0, Span<int> t) @@ -595,7 +549,7 @@ namespace Org.BouncyCastle.Math.Raw int f = f0, g = g0, m, w, x, y, z; int i = 30, limit, zeros; - for (; ; ) + for (;;) { // Use a sentinel bit to count zeros only up to i. zeros = Integers.NumberOfTrailingZeros(g | (-1 << i)); @@ -614,15 +568,15 @@ namespace Org.BouncyCastle.Math.Raw Debug.Assert((u * f0 + v * g0) == f << (30 - i)); Debug.Assert((q * f0 + r * g0) == g << (30 - i)); - if (eta < 0) + if (eta <= 0) { - eta = -eta; + eta = 2 - eta; x = f; f = g; g = -x; y = u; u = q; q = -y; z = v; v = r; r = -z; // Handle up to 6 divsteps at once, subject to eta and i. - limit = (eta + 1) > i ? i : (eta + 1); + limit = eta > i ? i : eta; m = (int)((uint.MaxValue >> (32 - limit)) & 63U); w = (f * g * (f * f - 2)) & m; @@ -630,11 +584,11 @@ namespace Org.BouncyCastle.Math.Raw else { // Handle up to 4 divsteps at once, subject to eta and i. - limit = (eta + 1) > i ? i : (eta + 1); + limit = eta > i ? i : eta; m = (int)((uint.MaxValue >> (32 - limit)) & 15U); w = f + (((f + 1) & 4) << 1); - w = (-w * g) & m; + w = (w * -g) & m; } g += f * w; @@ -654,34 +608,16 @@ namespace Org.BouncyCastle.Math.Raw #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER private static void Encode30(int bits, ReadOnlySpan<uint> x, Span<int> z) - { - Debug.Assert(bits > 0); - - int avail = 0; - ulong data = 0UL; - - int xOff = 0, zOff = 0; - while (bits > 0) - { - if (avail < System.Math.Min(30, bits)) - { - data |= (x[xOff++] & M32UL) << avail; - avail += 32; - } - - z[zOff++] = (int)data & M30; data >>= 30; - avail -= 30; - bits -= 30; - } - } #else - private static void Encode30(int bits, uint[] x, int xOff, int[] z, int zOff) + private static void Encode30(int bits, uint[] x, int[] z) +#endif { Debug.Assert(bits > 0); int avail = 0; ulong data = 0UL; + int xOff = 0, zOff = 0; while (bits > 0) { if (avail < System.Math.Min(30, bits)) @@ -695,7 +631,6 @@ namespace Org.BouncyCastle.Math.Raw bits -= 30; } } -#endif #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER private static int EqualTo(int len, ReadOnlySpan<int> x, int y) @@ -713,12 +648,15 @@ namespace Org.BouncyCastle.Math.Raw } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static bool EqualToOneVar_Expected(int len, ReadOnlySpan<int> x) + private static bool EqualToVar(int len, ReadOnlySpan<int> x, int y) #else - private static bool EqualToOneVar_Expected(int len, int[] x) + private static bool EqualToVar(int len, int[] x, int y) #endif { - int d = x[0] ^ 1; + int d = x[0] ^ y; + if (d != 0) + return false; + for (int i = 1; i < len; ++i) { d |= x[i]; @@ -726,41 +664,62 @@ namespace Org.BouncyCastle.Math.Raw return d == 0; } -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static int EqualToZero(int len, ReadOnlySpan<int> x) -#else - private static int EqualToZero(int len, int[] x) -#endif + private static int GetMaximumDivsteps(int bits) { - int d = 0; - for (int i = 0; i < len; ++i) - { - d |= x[i]; - } - d = (int)((uint)d >> 1) | (d & 1); - return (d - 1) >> 31; + //return (49 * bits + (bits < 46 ? 80 : 47)) / 17; + return (int)((188898L * bits + (bits < 46 ? 308405 : 181188)) >> 16); + } + + private static int GetMaximumHDDivsteps(int bits) + { + //return (int)((45907L * bits + 30179) / 19929); + return (int)((150964L * bits + 99243) >> 16); } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private static bool EqualToZeroVar_Unlikely(int len, ReadOnlySpan<int> x) + private static int HDDivsteps30(int theta, int f0, int g0, Span<int> t) #else - private static bool EqualToZeroVar_Unlikely(int len, int[] x) + private static int HDDivsteps30(int theta, int f0, int g0, int[] t) #endif { - int d = x[0]; - if (d != 0) - return false; + int u = 1 << 30, v = 0, q = 0, r = 1 << 30; + int f = f0, g = g0; - for (int i = 1; i < len; ++i) + for (int i = 0; i < 30; ++i) { - d |= x[i]; + Debug.Assert((f & 1) == 1); + Debug.Assert(((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0) == f << i); + Debug.Assert(((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0) == g << i); + + int c1 = theta >> 31; + int c2 = -(g & 1); + + int x = f ^ c1; + int y = u ^ c1; + int z = v ^ c1; + + g -= x & c2; + q -= y & c2; + r -= z & c2; + + int c3 = c2 & ~c1; + theta = (theta ^ c3) + 1; + + f += g & c3; + u += q & c3; + v += r & c3; + + g >>= 1; + q >>= 1; + r >>= 1; } - return d == 0; - } - private static int GetMaximumDivsteps(int bits) - { - return (49 * bits + (bits < 46 ? 80 : 47)) / 17; + t[0] = u; + t[1] = v; + t[2] = q; + t[3] = r; + + return theta; } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER @@ -784,6 +743,33 @@ namespace Org.BouncyCastle.Math.Raw } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + private static int TrimFG30Var(int len30, Span<int> F, Span<int> G) +#else + private static int TrimFG30Var(int len30, int[] F, int[] G) +#endif + { + Debug.Assert(len30 > 0); + Debug.Assert(F.Length >= len30); + Debug.Assert(G.Length >= len30); + + int fn = F[len30 - 1]; + int gn = G[len30 - 1]; + + int cond = (len30 - 2) >> 31; + cond |= fn ^ (fn >> 31); + cond |= gn ^ (gn >> 31); + + if (cond == 0) + { + F[len30 - 2] |= fn << 30; + G[len30 - 2] |= gn << 30; + --len30; + } + + return len30; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER private static void UpdateDE30(int len30, Span<int> D, Span<int> E, ReadOnlySpan<int> t, int m0Inv32, ReadOnlySpan<int> M) #else diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs index d748e04c5..b524750d8 100644 --- a/crypto/src/math/raw/Nat.cs +++ b/crypto/src/math/raw/Nat.cs @@ -6,6 +6,7 @@ using System.Runtime.InteropServices; #endif using Org.BouncyCastle.Crypto.Utilities; +using Org.BouncyCastle.Utilities; namespace Org.BouncyCastle.Math.Raw { @@ -400,6 +401,36 @@ namespace Org.BouncyCastle.Math.Raw } #endif + public static uint CAddTo(int len, int mask, uint[] x, uint[] z) + { + uint MASK = (uint)-(mask & 1); + + ulong c = 0; + for (int i = 0; i < len; ++i) + { + c += (ulong)z[i] + (x[i] & MASK); + z[i] = (uint)c; + c >>= 32; + } + return (uint)c; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static uint CAddTo(int len, int mask, ReadOnlySpan<uint> x, Span<uint> z) + { + uint MASK = (uint)-(mask & 1); + + ulong c = 0; + for (int i = 0; i < len; ++i) + { + c += (ulong)z[i] + (x[i] & MASK); + z[i] = (uint)c; + c >>= 32; + } + return (uint)c; + } +#endif + public static void CMov(int len, int mask, uint[] x, int xOff, uint[] z, int zOff) { uint MASK = (uint)-(mask & 1); @@ -712,7 +743,11 @@ namespace Org.BouncyCastle.Math.Raw } #endif +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static uint EqualTo(int len, ReadOnlySpan<uint> x, uint y) +#else public static uint EqualTo(int len, uint[] x, uint y) +#endif { uint d = x[0] ^ y; for (int i = 1; i < len; ++i) @@ -735,19 +770,10 @@ namespace Org.BouncyCastle.Math.Raw } #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static uint EqualTo(int len, ReadOnlySpan<uint> x, uint y) - { - uint d = x[0] ^ y; - for (int i = 1; i < len; ++i) - { - d |= x[i]; - } - d = (d >> 1) | (d & 1); - return (uint)(((int)d - 1) >> 31); - } -#endif - + public static uint EqualTo(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y) +#else public static uint EqualTo(int len, uint[] x, uint[] y) +#endif { uint d = 0; for (int i = 0; i < len; ++i) @@ -769,20 +795,12 @@ namespace Org.BouncyCastle.Math.Raw return (uint)(((int)d - 1) >> 31); } -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static uint EqualTo(int len, ReadOnlySpan<uint> x, ReadOnlySpan<uint> y) - { - uint d = 0; - for (int i = 0; i < len; ++i) - { - d |= x[i] ^ y[i]; - } - d = (d >> 1) | (d & 1); - return (uint)(((int)d - 1) >> 31); - } -#endif +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static uint EqualToZero(int len, ReadOnlySpan<uint> x) +#else public static uint EqualToZero(int len, uint[] x) +#endif { uint d = 0; for (int i = 0; i < len; ++i) @@ -804,19 +822,6 @@ namespace Org.BouncyCastle.Math.Raw return (uint)(((int)d - 1) >> 31); } -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - public static uint EqualToZero(int len, ReadOnlySpan<uint> x) - { - uint d = 0; - for (int i = 0; i < len; ++i) - { - d |= x[i]; - } - d = (d >> 1) | (d & 1); - return (uint)(((int)d - 1) >> 31); - } -#endif - public static uint[] FromBigInteger(int bits, BigInteger x) { if (x.SignValue < 0 || x.BitLength > bits) @@ -924,6 +929,32 @@ namespace Org.BouncyCastle.Math.Raw } #endif +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public static int GetBitLength(int len, ReadOnlySpan<uint> x) +#else + public static int GetBitLength(int len, uint[] x) +#endif + { + for (int i = len - 1; i >= 0; --i) + { + uint x_i = x[i]; + if (x_i != 0) + return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x_i); + } + return 0; + } + + public static int GetBitLength(int len, uint[] x, int xOff) + { + for (int i = len - 1; i >= 0; --i) + { + uint x_i = x[xOff + i]; + if (x_i != 0) + return i * 32 + 32 - Integers.NumberOfLeadingZeros((int)x_i); + } + return 0; + } + public static int GetLengthForBits(int bits) { if (bits < 1) diff --git a/crypto/src/math/raw/Nat256.cs b/crypto/src/math/raw/Nat256.cs index 59039d3fa..49adf04af 100644 --- a/crypto/src/math/raw/Nat256.cs +++ b/crypto/src/math/raw/Nat256.cs @@ -1865,7 +1865,8 @@ namespace Org.BouncyCastle.Math.Raw public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..8]); var Y = MemoryMarshal.AsBytes(y[..8]); @@ -1880,7 +1881,8 @@ namespace Org.BouncyCastle.Math.Raw return; } - if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..8]); var Y = MemoryMarshal.AsBytes(y[..8]); diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs index 56fa9a2c9..71b53214c 100644 --- a/crypto/src/math/raw/Nat512.cs +++ b/crypto/src/math/raw/Nat512.cs @@ -67,7 +67,8 @@ namespace Org.BouncyCastle.Math.Raw public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..16]); var Y = MemoryMarshal.AsBytes(y[..16]); @@ -87,7 +88,8 @@ namespace Org.BouncyCastle.Math.Raw return; } - if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..16]); var Y = MemoryMarshal.AsBytes(y[..16]); @@ -145,7 +147,8 @@ namespace Org.BouncyCastle.Math.Raw public static void XorTo(ReadOnlySpan<uint> x, Span<uint> z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..16]); var Z = MemoryMarshal.AsBytes(z[..16]); @@ -164,7 +167,8 @@ namespace Org.BouncyCastle.Math.Raw return; } - if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..16]); var Z = MemoryMarshal.AsBytes(z[..16]); @@ -221,7 +225,8 @@ namespace Org.BouncyCastle.Math.Raw public static void Xor64(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..8]); var Y = MemoryMarshal.AsBytes(y[..8]); @@ -241,7 +246,8 @@ namespace Org.BouncyCastle.Math.Raw return; } - if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..8]); var Y = MemoryMarshal.AsBytes(y[..8]); @@ -299,7 +305,8 @@ namespace Org.BouncyCastle.Math.Raw public static void XorTo64(ReadOnlySpan<ulong> x, Span<ulong> z) { #if NETCOREAPP3_0_OR_GREATER - if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Avx2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..8]); var Z = MemoryMarshal.AsBytes(z[..8]); @@ -318,7 +325,8 @@ namespace Org.BouncyCastle.Math.Raw return; } - if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16) + if (Org.BouncyCastle.Runtime.Intrinsics.X86.Sse2.IsEnabled && + Org.BouncyCastle.Runtime.Intrinsics.Vector.IsPacked) { var X = MemoryMarshal.AsBytes(x[..8]); var Z = MemoryMarshal.AsBytes(z[..8]); |