summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-11-11 01:33:06 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-11-11 01:33:06 +0700
commit1967f89f379101ab6ed110b7206164e694da2b28 (patch)
tree6bf32d416e0c35e752eb94b34f66a92c3ed1b92b
parentBIKE perf. opts. (diff)
downloadBouncyCastle.NET-ed25519-1967f89f379101ab6ed110b7206164e694da2b28.tar.xz
BIKE perf. opts.
-rw-r--r--crypto/src/math/raw/Nat.cs63
-rw-r--r--crypto/src/pqc/crypto/bike/BikeEngine.cs10
-rw-r--r--crypto/src/pqc/crypto/bike/BikeRing.cs74
3 files changed, 126 insertions, 21 deletions
diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
index 3bc983430..b3b670954 100644
--- a/crypto/src/math/raw/Nat.cs
+++ b/crypto/src/math/raw/Nat.cs
@@ -1,5 +1,8 @@
 using System;
 using System.Diagnostics;
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+using System.Numerics;
+#endif
 
 using Org.BouncyCastle.Crypto.Utilities;
 
@@ -2737,6 +2740,66 @@ namespace Org.BouncyCastle.Math.Raw
         }
 #endif
 
+        public static void Xor64(int len, ulong[] x, ulong y, ulong[] z)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(len, x.AsSpan(0, len), y, z.AsSpan(0, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[i] = x[i] ^ y;
+            }
+#endif
+        }
+
+        public static void Xor64(int len, ulong[] x, int xOff, ulong y, ulong[] z, int zOff)
+        {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Xor64(len, x.AsSpan(xOff, len), y, z.AsSpan(zOff, len));
+#else
+            for (int i = 0; i < len; ++i)
+            {
+                z[zOff + i] = x[xOff + i] ^ y;
+            }
+#endif
+        }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Xor64(int len, ReadOnlySpan<ulong> x, ulong y, Span<ulong> z)
+        {
+            int i = 0;
+            if (Vector.IsHardwareAccelerated)
+            {
+                var vy = new Vector<ulong>(y);
+
+                int limit = len - Vector<ulong>.Count;
+                while (i <= limit)
+                {
+                    var vx = new Vector<ulong>(x[i..]);
+                    (vx ^ vy).CopyTo(z[i..]);
+                    i += Vector<ulong>.Count;
+                }
+            }
+            else
+            {
+                int limit = len - 4;
+                while (i <= limit)
+                {
+                    z[i + 0] = x[i + 0] ^ y;
+                    z[i + 1] = x[i + 1] ^ y;
+                    z[i + 2] = x[i + 2] ^ y;
+                    z[i + 3] = x[i + 3] ^ y;
+                    i += 4;
+                }
+            }
+            while (i < len)
+            {
+                z[i] = x[i] ^ y;
+                ++i;
+            }
+        }
+#endif
+
         public static void Xor64(int len, ulong[] x, ulong[] y, ulong[] z)
         {
 #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
diff --git a/crypto/src/pqc/crypto/bike/BikeEngine.cs b/crypto/src/pqc/crypto/bike/BikeEngine.cs
index d523e71ab..4684caad6 100644
--- a/crypto/src/pqc/crypto/bike/BikeEngine.cs
+++ b/crypto/src/pqc/crypto/bike/BikeEngine.cs
@@ -489,7 +489,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
             int count = 0;
 
             int i = 0, limit = hw - 4;
-            while (i < limit)
+            while (i <= limit)
             {
                 int sPos0 = hCompactCol[i + 0] + j - r;
                 int sPos1 = hCompactCol[i + 1] + j - r;
@@ -529,7 +529,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
                 if (Vector.IsHardwareAccelerated)
                 {
                     int jLimit = neg - Vector<byte>.Count;
-                    while (j < jLimit)
+                    while (j <= jLimit)
                     {
                         var vc = new Vector<byte>(ctrs, j);
                         var vs = new Vector<byte>(s, col + j);
@@ -541,7 +541,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 #endif
                 {
                     int jLimit = neg - 4;
-                    while (j < jLimit)
+                    while (j <= jLimit)
                     {
                         ctrs[j + 0] += s[col + j + 0];
                         ctrs[j + 1] += s[col + j + 1];
@@ -561,7 +561,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
                 if (Vector.IsHardwareAccelerated)
                 {
                     int kLimit = r - Vector<byte>.Count;
-                    while (k < kLimit)
+                    while (k <= kLimit)
                     {
                         var vc = new Vector<byte>(ctrs, k);
                         var vs = new Vector<byte>(s, k - neg);
@@ -573,7 +573,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 #endif
                 {
                     int kLimit = r - 4;
-                    while (k < kLimit)
+                    while (k <= kLimit)
                     {
                         ctrs[k + 0] += s[k + 0 - neg];
                         ctrs[k + 1] += s[k + 1 - neg];
diff --git a/crypto/src/pqc/crypto/bike/BikeRing.cs b/crypto/src/pqc/crypto/bike/BikeRing.cs
index e66fd9c7e..9babe280e 100644
--- a/crypto/src/pqc/crypto/bike/BikeRing.cs
+++ b/crypto/src/pqc/crypto/bike/BikeRing.cs
@@ -211,6 +211,63 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 
         private void ImplMultiplyAcc(ulong[] x, ulong[] y, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Pclmulqdq.IsSupported)
+            {
+                int i = 0, limit = Size - 2;
+                while (i <= limit)
+                {
+                    var X01 = Vector128.Create(x[i], x[i + 1]);
+
+                    int j = 0;
+                    while (j <= limit)
+                    {
+                        var Y01 = Vector128.Create(y[j], y[j + 1]);
+
+                        var Z01 = Pclmulqdq.CarrylessMultiply(X01, Y01, 0x00);
+                        var Z12 = Sse2.Xor(Pclmulqdq.CarrylessMultiply(X01, Y01, 0x01),
+                                           Pclmulqdq.CarrylessMultiply(X01, Y01, 0x10));
+                        var Z23 = Pclmulqdq.CarrylessMultiply(X01, Y01, 0x11);
+
+                        zz[i + j + 0] ^= Z01.GetElement(0);
+                        zz[i + j + 1] ^= Z01.GetElement(1) ^ Z12.GetElement(0);
+                        zz[i + j + 2] ^= Z23.GetElement(0) ^ Z12.GetElement(1);
+                        zz[i + j + 3] ^= Z23.GetElement(1);
+
+                        j += 2;
+                    }
+
+                    i += 2;
+                }
+                if (i < Size)
+                {
+                    var Xi = Vector128.CreateScalar(x[i]);
+                    var Yi = Vector128.CreateScalar(y[i]);
+
+                    for (int j = 0; j < i; ++j)
+                    {
+                        var Xj = Vector128.CreateScalar(x[j]);
+                        var Yj = Vector128.CreateScalar(y[j]);
+
+                        var Z = Sse2.Xor(Pclmulqdq.CarrylessMultiply(Xi, Yj, 0x00),
+                                         Pclmulqdq.CarrylessMultiply(Yi, Xj, 0x00));
+
+                        zz[i + j + 0] ^= Z.GetElement(0);
+                        zz[i + j + 1] ^= Z.GetElement(1);
+                    }
+
+                    {
+                        var Z = Pclmulqdq.CarrylessMultiply(Xi, Yi, 0x00);
+
+                        zz[i + i + 0] ^= Z.GetElement(0);
+                        zz[i + i + 1] ^= Z.GetElement(1);
+
+                    }
+                }
+                return;
+            }
+#endif
+
             ulong[] u = new ulong[16];
 
             // Schoolbook
@@ -241,10 +298,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
             }
 
             ulong w = v0 ^ v1;
-            for (int i = 0; i < Size; ++i)
-            {
-                zz[Size + i] = zz[i] ^ w;
-            }
+            Nat.Xor64(Size, zz, 0, w, zz, Size);
 
             int last = Size - 1;
             for (int zPos = 1; zPos < (last * 2); ++zPos)
@@ -351,18 +405,6 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 
         private static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff)
         {
-#if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported)
-            {
-                var X = Vector128.CreateScalar(x);
-                var Y = Vector128.CreateScalar(y);
-                var Z = Pclmulqdq.CarrylessMultiply(X, Y, 0x00);
-                z[zOff    ] ^= Z.GetElement(0);
-                z[zOff + 1] ^= Z.GetElement(1);
-                return;
-            }
-#endif
-
             //u[0] = 0;
             u[1] = y;
             for (int i = 2; i < 16; i += 2)