summary refs log tree commit diff
path: root/crypto/src/pqc
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-11-11 01:33:06 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-11-11 01:33:06 +0700
commit1967f89f379101ab6ed110b7206164e694da2b28 (patch)
tree6bf32d416e0c35e752eb94b34f66a92c3ed1b92b /crypto/src/pqc
parentBIKE perf. opts. (diff)
downloadBouncyCastle.NET-ed25519-1967f89f379101ab6ed110b7206164e694da2b28.tar.xz
BIKE perf. opts.
Diffstat (limited to 'crypto/src/pqc')
-rw-r--r--crypto/src/pqc/crypto/bike/BikeEngine.cs10
-rw-r--r--crypto/src/pqc/crypto/bike/BikeRing.cs74
2 files changed, 63 insertions, 21 deletions
diff --git a/crypto/src/pqc/crypto/bike/BikeEngine.cs b/crypto/src/pqc/crypto/bike/BikeEngine.cs
index d523e71ab..4684caad6 100644
--- a/crypto/src/pqc/crypto/bike/BikeEngine.cs
+++ b/crypto/src/pqc/crypto/bike/BikeEngine.cs
@@ -489,7 +489,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
             int count = 0;
 
             int i = 0, limit = hw - 4;
-            while (i < limit)
+            while (i <= limit)
             {
                 int sPos0 = hCompactCol[i + 0] + j - r;
                 int sPos1 = hCompactCol[i + 1] + j - r;
@@ -529,7 +529,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
                 if (Vector.IsHardwareAccelerated)
                 {
                     int jLimit = neg - Vector<byte>.Count;
-                    while (j < jLimit)
+                    while (j <= jLimit)
                     {
                         var vc = new Vector<byte>(ctrs, j);
                         var vs = new Vector<byte>(s, col + j);
@@ -541,7 +541,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 #endif
                 {
                     int jLimit = neg - 4;
-                    while (j < jLimit)
+                    while (j <= jLimit)
                     {
                         ctrs[j + 0] += s[col + j + 0];
                         ctrs[j + 1] += s[col + j + 1];
@@ -561,7 +561,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
                 if (Vector.IsHardwareAccelerated)
                 {
                     int kLimit = r - Vector<byte>.Count;
-                    while (k < kLimit)
+                    while (k <= kLimit)
                     {
                         var vc = new Vector<byte>(ctrs, k);
                         var vs = new Vector<byte>(s, k - neg);
@@ -573,7 +573,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 #endif
                 {
                     int kLimit = r - 4;
-                    while (k < kLimit)
+                    while (k <= kLimit)
                     {
                         ctrs[k + 0] += s[k + 0 - neg];
                         ctrs[k + 1] += s[k + 1 - neg];
diff --git a/crypto/src/pqc/crypto/bike/BikeRing.cs b/crypto/src/pqc/crypto/bike/BikeRing.cs
index e66fd9c7e..9babe280e 100644
--- a/crypto/src/pqc/crypto/bike/BikeRing.cs
+++ b/crypto/src/pqc/crypto/bike/BikeRing.cs
@@ -211,6 +211,63 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 
         private void ImplMultiplyAcc(ulong[] x, ulong[] y, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Pclmulqdq.IsSupported)
+            {
+                int i = 0, limit = Size - 2;
+                while (i <= limit)
+                {
+                    var X01 = Vector128.Create(x[i], x[i + 1]);
+
+                    int j = 0;
+                    while (j <= limit)
+                    {
+                        var Y01 = Vector128.Create(y[j], y[j + 1]);
+
+                        var Z01 = Pclmulqdq.CarrylessMultiply(X01, Y01, 0x00);
+                        var Z12 = Sse2.Xor(Pclmulqdq.CarrylessMultiply(X01, Y01, 0x01),
+                                           Pclmulqdq.CarrylessMultiply(X01, Y01, 0x10));
+                        var Z23 = Pclmulqdq.CarrylessMultiply(X01, Y01, 0x11);
+
+                        zz[i + j + 0] ^= Z01.GetElement(0);
+                        zz[i + j + 1] ^= Z01.GetElement(1) ^ Z12.GetElement(0);
+                        zz[i + j + 2] ^= Z23.GetElement(0) ^ Z12.GetElement(1);
+                        zz[i + j + 3] ^= Z23.GetElement(1);
+
+                        j += 2;
+                    }
+
+                    i += 2;
+                }
+                if (i < Size)
+                {
+                    var Xi = Vector128.CreateScalar(x[i]);
+                    var Yi = Vector128.CreateScalar(y[i]);
+
+                    for (int j = 0; j < i; ++j)
+                    {
+                        var Xj = Vector128.CreateScalar(x[j]);
+                        var Yj = Vector128.CreateScalar(y[j]);
+
+                        var Z = Sse2.Xor(Pclmulqdq.CarrylessMultiply(Xi, Yj, 0x00),
+                                         Pclmulqdq.CarrylessMultiply(Yi, Xj, 0x00));
+
+                        zz[i + j + 0] ^= Z.GetElement(0);
+                        zz[i + j + 1] ^= Z.GetElement(1);
+                    }
+
+                    {
+                        var Z = Pclmulqdq.CarrylessMultiply(Xi, Yi, 0x00);
+
+                        zz[i + i + 0] ^= Z.GetElement(0);
+                        zz[i + i + 1] ^= Z.GetElement(1);
+
+                    }
+                }
+                return;
+            }
+#endif
+
             ulong[] u = new ulong[16];
 
             // Schoolbook
@@ -241,10 +298,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
             }
 
             ulong w = v0 ^ v1;
-            for (int i = 0; i < Size; ++i)
-            {
-                zz[Size + i] = zz[i] ^ w;
-            }
+            Nat.Xor64(Size, zz, 0, w, zz, Size);
 
             int last = Size - 1;
             for (int zPos = 1; zPos < (last * 2); ++zPos)
@@ -351,18 +405,6 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
 
         private static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff)
         {
-#if NETCOREAPP3_0_OR_GREATER
-            if (Pclmulqdq.IsSupported)
-            {
-                var X = Vector128.CreateScalar(x);
-                var Y = Vector128.CreateScalar(y);
-                var Z = Pclmulqdq.CarrylessMultiply(X, Y, 0x00);
-                z[zOff    ] ^= Z.GetElement(0);
-                z[zOff + 1] ^= Z.GetElement(1);
-                return;
-            }
-#endif
-
             //u[0] = 0;
             u[1] = y;
             for (int i = 2; i < 16; i += 2)