From 2b54bff482b5c96960a0bd8c5bb9638f0706e328 Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Thu, 10 Nov 2022 14:09:13 +0700 Subject: BIKE perf. opts. - Repeated squaring via cached permutations --- crypto/src/pqc/crypto/bike/BikeRing.cs | 118 +++++++++++++++++++++++++++++++-- 1 file changed, 111 insertions(+), 7 deletions(-) diff --git a/crypto/src/pqc/crypto/bike/BikeRing.cs b/crypto/src/pqc/crypto/bike/BikeRing.cs index a519595af..e66fd9c7e 100644 --- a/crypto/src/pqc/crypto/bike/BikeRing.cs +++ b/crypto/src/pqc/crypto/bike/BikeRing.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.Diagnostics; #if NETCOREAPP3_0_OR_GREATER using System.Runtime.Intrinsics; @@ -17,15 +18,26 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike private readonly int m_bits; private readonly int m_size; private readonly int m_sizeExt; + private readonly int m_permutationCutoff; + private readonly Dictionary m_permutations = new Dictionary(); internal BikeRing(int r) { - if ((r & 0x80000001) != 1) + if ((r & 0xFFFF0001) != 1) throw new ArgumentException(); m_bits = r; m_size = (r + 63) >> 6; m_sizeExt = m_size * 2; + m_permutationCutoff = r >> 5; + + foreach (int n in EnumerateSquarePowersInv(r)) + { + if (n > m_permutationCutoff && !m_permutations.ContainsKey(n)) + { + m_permutations[n] = GenerateSquarePowerPermutation(r, n); + } + } } internal void Add(ulong[] x, ulong[] y, ulong[] z) @@ -172,15 +184,20 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike internal void SquareN(ulong[] x, int n, ulong[] z) { - /* - * TODO In these polynomial rings, 'squareN' for some 'n' is equivalent to a fixed permutation of the - * coefficients. For 'squareN' with 'n' above some cutoff value, this permutation could be precomputed - * and then applied in place of explicit squaring for that 'n'. This is particularly relevant to the - * calls generated by 'inv'. - */ Debug.Assert(n > 0); + /* + * In these polynomial rings, 'SquareN' for some 'n' is equivalent to a fixed permutation of the + * coefficients. For 'SquareN' with 'n' above some cutoff value, this permutation is precomputed and then + * applied in place of explicit squaring for that 'n'. This is particularly relevant to calls during 'Inv'. + */ + if (n > m_permutationCutoff) + { + ImplPermute(x, n, z); + return; + } + ulong[] tt = CreateExt(); ImplSquare(x, tt); Reduce(tt, z); @@ -245,6 +262,93 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike } } + private void ImplPermute(ulong[] x, int n, ulong[] z) + { + var permutation = m_permutations[n]; + + int i = 0, limit64 = m_bits - 64; + while (i < limit64) + { + ulong z_i = 0UL; + + for (int j = 0; j < 64; j += 8) + { + int k = i + j; + int p0 = permutation[k + 0]; + int p1 = permutation[k + 1]; + int p2 = permutation[k + 2]; + int p3 = permutation[k + 3]; + int p4 = permutation[k + 4]; + int p5 = permutation[k + 5]; + int p6 = permutation[k + 6]; + int p7 = permutation[k + 7]; + + z_i |= ((x[p0 >> 6] >> (p0 & 63)) & 1) << (j + 0); + z_i |= ((x[p1 >> 6] >> (p1 & 63)) & 1) << (j + 1); + z_i |= ((x[p2 >> 6] >> (p2 & 63)) & 1) << (j + 2); + z_i |= ((x[p3 >> 6] >> (p3 & 63)) & 1) << (j + 3); + z_i |= ((x[p4 >> 6] >> (p4 & 63)) & 1) << (j + 4); + z_i |= ((x[p5 >> 6] >> (p5 & 63)) & 1) << (j + 5); + z_i |= ((x[p6 >> 6] >> (p6 & 63)) & 1) << (j + 6); + z_i |= ((x[p7 >> 6] >> (p7 & 63)) & 1) << (j + 7); + } + + z[i >> 6] = z_i; + + i += 64; + } + Debug.Assert(i < m_bits); + { + ulong z_i = 0UL; + + for (int j = i; j < m_bits; ++j) + { + int p = permutation[j]; + + z_i |= ((x[p >> 6] >> (p & 63)) & 1) << (j & 63); + } + + z[i >> 6] = z_i; + } + } + + private static IEnumerable EnumerateSquarePowersInv(int r) + { + int rSub2 = r - 2; + int bits = 32 - Integers.NumberOfLeadingZeros(rSub2); + + for (int i = 1; i < bits; ++i) + { + yield return 1 << (i - 1); + + if ((rSub2 & (1 << i)) != 0) + { + int n = rSub2 & ((1 << i) - 1); + yield return n; + } + } + } + + private static ushort[] GenerateSquarePowerPermutation(int r, int n) + { + int p = 1; + for (int i = 0; i < n; ++i) + { + int m = -(p & 1); + p += r & m; + p >>= 1; + } + + var permutation = new ushort[r]; + permutation[0] = 0; + permutation[1] = (ushort)p; + for (int i = 2; i < r; ++i) + { + permutation[i] = (ushort)(((uint)i * (uint)p) % (uint)r); + } + return permutation; + } + private static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff) { #if NETCOREAPP3_0_OR_GREATER -- cgit 1.4.1