diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
index 3bc983430..b3b670954 100644
--- a/crypto/src/math/raw/Nat.cs
+++ b/crypto/src/math/raw/Nat.cs
@@ -1,5 +1,8 @@
using System;
using System.Diagnostics;
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+using System.Numerics;
+#endif
using Org.BouncyCastle.Crypto.Utilities;
@@ -2737,6 +2740,66 @@ namespace Org.BouncyCastle.Math.Raw
}
#endif
+ public static void Xor64(int len, ulong[] x, ulong y, ulong[] z)
+ {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ Xor64(len, x.AsSpan(0, len), y, z.AsSpan(0, len));
+#else
+ for (int i = 0; i < len; ++i)
+ {
+ z[i] = x[i] ^ y;
+ }
+#endif
+ }
+
+ public static void Xor64(int len, ulong[] x, int xOff, ulong y, ulong[] z, int zOff)
+ {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ Xor64(len, x.AsSpan(xOff, len), y, z.AsSpan(zOff, len));
+#else
+ for (int i = 0; i < len; ++i)
+ {
+ z[zOff + i] = x[xOff + i] ^ y;
+ }
+#endif
+ }
+
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+ public static void Xor64(int len, ReadOnlySpan<ulong> x, ulong y, Span<ulong> z)
+ {
+ int i = 0;
+ if (Vector.IsHardwareAccelerated)
+ {
+ var vy = new Vector<ulong>(y);
+
+ int limit = len - Vector<ulong>.Count;
+ while (i <= limit)
+ {
+ var vx = new Vector<ulong>(x[i..]);
+ (vx ^ vy).CopyTo(z[i..]);
+ i += Vector<ulong>.Count;
+ }
+ }
+ else
+ {
+ int limit = len - 4;
+ while (i <= limit)
+ {
+ z[i + 0] = x[i + 0] ^ y;
+ z[i + 1] = x[i + 1] ^ y;
+ z[i + 2] = x[i + 2] ^ y;
+ z[i + 3] = x[i + 3] ^ y;
+ i += 4;
+ }
+ }
+ while (i < len)
+ {
+ z[i] = x[i] ^ y;
+ ++i;
+ }
+ }
+#endif
+
public static void Xor64(int len, ulong[] x, ulong[] y, ulong[] z)
{
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
diff --git a/crypto/src/pqc/crypto/bike/BikeEngine.cs b/crypto/src/pqc/crypto/bike/BikeEngine.cs
index d523e71ab..4684caad6 100644
--- a/crypto/src/pqc/crypto/bike/BikeEngine.cs
+++ b/crypto/src/pqc/crypto/bike/BikeEngine.cs
@@ -489,7 +489,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
int count = 0;
int i = 0, limit = hw - 4;
- while (i < limit)
+ while (i <= limit)
{
int sPos0 = hCompactCol[i + 0] + j - r;
int sPos1 = hCompactCol[i + 1] + j - r;
@@ -529,7 +529,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
if (Vector.IsHardwareAccelerated)
{
int jLimit = neg - Vector<byte>.Count;
- while (j < jLimit)
+ while (j <= jLimit)
{
var vc = new Vector<byte>(ctrs, j);
var vs = new Vector<byte>(s, col + j);
@@ -541,7 +541,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
#endif
{
int jLimit = neg - 4;
- while (j < jLimit)
+ while (j <= jLimit)
{
ctrs[j + 0] += s[col + j + 0];
ctrs[j + 1] += s[col + j + 1];
@@ -561,7 +561,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
if (Vector.IsHardwareAccelerated)
{
int kLimit = r - Vector<byte>.Count;
- while (k < kLimit)
+ while (k <= kLimit)
{
var vc = new Vector<byte>(ctrs, k);
var vs = new Vector<byte>(s, k - neg);
@@ -573,7 +573,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
#endif
{
int kLimit = r - 4;
- while (k < kLimit)
+ while (k <= kLimit)
{
ctrs[k + 0] += s[k + 0 - neg];
ctrs[k + 1] += s[k + 1 - neg];
diff --git a/crypto/src/pqc/crypto/bike/BikeRing.cs b/crypto/src/pqc/crypto/bike/BikeRing.cs
index e66fd9c7e..9babe280e 100644
--- a/crypto/src/pqc/crypto/bike/BikeRing.cs
+++ b/crypto/src/pqc/crypto/bike/BikeRing.cs
@@ -211,6 +211,63 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
private void ImplMultiplyAcc(ulong[] x, ulong[] y, ulong[] zz)
{
+#if NETCOREAPP3_0_OR_GREATER
+ if (Pclmulqdq.IsSupported)
+ {
+ int i = 0, limit = Size - 2;
+ while (i <= limit)
+ {
+ var X01 = Vector128.Create(x[i], x[i + 1]);
+
+ int j = 0;
+ while (j <= limit)
+ {
+ var Y01 = Vector128.Create(y[j], y[j + 1]);
+
+ var Z01 = Pclmulqdq.CarrylessMultiply(X01, Y01, 0x00);
+ var Z12 = Sse2.Xor(Pclmulqdq.CarrylessMultiply(X01, Y01, 0x01),
+ Pclmulqdq.CarrylessMultiply(X01, Y01, 0x10));
+ var Z23 = Pclmulqdq.CarrylessMultiply(X01, Y01, 0x11);
+
+ zz[i + j + 0] ^= Z01.GetElement(0);
+ zz[i + j + 1] ^= Z01.GetElement(1) ^ Z12.GetElement(0);
+ zz[i + j + 2] ^= Z23.GetElement(0) ^ Z12.GetElement(1);
+ zz[i + j + 3] ^= Z23.GetElement(1);
+
+ j += 2;
+ }
+
+ i += 2;
+ }
+ if (i < Size)
+ {
+ var Xi = Vector128.CreateScalar(x[i]);
+ var Yi = Vector128.CreateScalar(y[i]);
+
+ for (int j = 0; j < i; ++j)
+ {
+ var Xj = Vector128.CreateScalar(x[j]);
+ var Yj = Vector128.CreateScalar(y[j]);
+
+ var Z = Sse2.Xor(Pclmulqdq.CarrylessMultiply(Xi, Yj, 0x00),
+ Pclmulqdq.CarrylessMultiply(Yi, Xj, 0x00));
+
+ zz[i + j + 0] ^= Z.GetElement(0);
+ zz[i + j + 1] ^= Z.GetElement(1);
+ }
+
+ {
+ var Z = Pclmulqdq.CarrylessMultiply(Xi, Yi, 0x00);
+
+ zz[i + i + 0] ^= Z.GetElement(0);
+ zz[i + i + 1] ^= Z.GetElement(1);
+
+ }
+ }
+ return;
+ }
+#endif
+
ulong[] u = new ulong[16];
// Schoolbook
@@ -241,10 +298,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
}
ulong w = v0 ^ v1;
- for (int i = 0; i < Size; ++i)
- {
- zz[Size + i] = zz[i] ^ w;
- }
+ Nat.Xor64(Size, zz, 0, w, zz, Size);
int last = Size - 1;
for (int zPos = 1; zPos < (last * 2); ++zPos)
@@ -351,18 +405,6 @@ namespace Org.BouncyCastle.Pqc.Crypto.Bike
private static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff)
{
-#if NETCOREAPP3_0_OR_GREATER
- if (Pclmulqdq.IsSupported)
- {
- var X = Vector128.CreateScalar(x);
- var Y = Vector128.CreateScalar(y);
- var Z = Pclmulqdq.CarrylessMultiply(X, Y, 0x00);
- z[zOff ] ^= Z.GetElement(0);
- z[zOff + 1] ^= Z.GetElement(1);
- return;
- }
-#endif
-
//u[0] = 0;
u[1] = y;
for (int i = 2; i < 16; i += 2)
|