summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-10-22 00:49:49 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-10-22 00:49:49 +0700
commit677feb1f2d6f7bf941a7423ae3ebd9c9d66a90ca (patch)
tree9047bf420e4f6d5b8c8a3a3b927fcbf39bd55688
parentAdd Xor methods to Nat classes (diff)
downloadBouncyCastle.NET-ed25519-677feb1f2d6f7bf941a7423ae3ebd9c9d66a90ca.tar.xz
Salsa20 perf. opts.
-rw-r--r--crypto/src/crypto/engines/Salsa20Engine.cs153
1 files changed, 117 insertions, 36 deletions
diff --git a/crypto/src/crypto/engines/Salsa20Engine.cs b/crypto/src/crypto/engines/Salsa20Engine.cs
index c3e44f645..fa5477ed5 100644
--- a/crypto/src/crypto/engines/Salsa20Engine.cs
+++ b/crypto/src/crypto/engines/Salsa20Engine.cs
@@ -1,5 +1,12 @@
 using System;
-using System.Text;
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+using System.Runtime.CompilerServices;
+#endif
+#if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+#endif
 
 using Org.BouncyCastle.Crypto.Parameters;
 using Org.BouncyCastle.Crypto.Utilities;
@@ -254,7 +261,71 @@ namespace Org.BouncyCastle.Crypto.Engines
 			if (rounds % 2 != 0)
 				throw new ArgumentException("Number of rounds must be even");
 
-            uint x00 = input[ 0];
+#if NETCOREAPP3_0_OR_GREATER
+            if (Sse41.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<short>>() == 16)
+			{
+				Vector128<uint> b0, b1, b2, b3;
+				{
+                    var I = MemoryMarshal.Cast<uint, byte>(input.AsSpan(0, 16));
+					var t0 = MemoryMarshal.Read<Vector128<short>>(I[0x00..0x10]);
+                    var t1 = MemoryMarshal.Read<Vector128<short>>(I[0x10..0x20]);
+                    var t2 = MemoryMarshal.Read<Vector128<short>>(I[0x20..0x30]);
+                    var t3 = MemoryMarshal.Read<Vector128<short>>(I[0x30..0x40]);
+
+                    var u0 = Sse41.Blend(t0, t2, 0xF0);
+					var u1 = Sse41.Blend(t1, t3, 0xC3);
+					var u2 = Sse41.Blend(t0, t2, 0x0F);
+					var u3 = Sse41.Blend(t1, t3, 0x3C);
+
+					b0 = Sse41.Blend(u0, u1, 0xCC).AsUInt32();
+					b1 = Sse41.Blend(u0, u1, 0x33).AsUInt32();
+					b2 = Sse41.Blend(u2, u3, 0xCC).AsUInt32();
+					b3 = Sse41.Blend(u2, u3, 0x33).AsUInt32();
+				}
+
+                var c0 = b0;
+                var c1 = b1;
+                var c2 = b2;
+                var c3 = b3;
+
+                for (int i = rounds; i > 0; i -= 2)
+				{
+                    QuarterRound_Sse2(ref c0, ref c3, ref c2, ref c1);
+                    QuarterRound_Sse2(ref c0, ref c1, ref c2, ref c3);
+                }
+
+                b0 = Sse2.Add(b0, c0);
+                b1 = Sse2.Add(b1, c1);
+                b2 = Sse2.Add(b2, c2);
+                b3 = Sse2.Add(b3, c3);
+
+                {
+					var t0 = b0.AsUInt16();
+                    var t1 = b1.AsUInt16();
+                    var t2 = b2.AsUInt16();
+                    var t3 = b3.AsUInt16();
+
+					var u0 = Sse41.Blend(t0, t1, 0xCC);
+					var u1 = Sse41.Blend(t0, t1, 0x33);
+					var u2 = Sse41.Blend(t2, t3, 0xCC);
+					var u3 = Sse41.Blend(t2, t3, 0x33);
+
+					var v0 = Sse41.Blend(u0, u2, 0xF0);
+                    var v1 = Sse41.Blend(u1, u3, 0xC3);
+                    var v2 = Sse41.Blend(u0, u2, 0x0F);
+                    var v3 = Sse41.Blend(u1, u3, 0x3C);
+
+                    var X = MemoryMarshal.Cast<uint, byte>(x.AsSpan(0, 16));
+                    MemoryMarshal.Write(X[0x00..0x10], ref v0);
+                    MemoryMarshal.Write(X[0x10..0x20], ref v1);
+                    MemoryMarshal.Write(X[0x20..0x30], ref v2);
+                    MemoryMarshal.Write(X[0x30..0x40], ref v3);
+                }
+                return;
+			}
+#endif
+
+			uint x00 = input[ 0];
 			uint x01 = input[ 1];
 			uint x02 = input[ 2];
 			uint x03 = input[ 3];
@@ -273,39 +344,15 @@ namespace Org.BouncyCastle.Crypto.Engines
 
 			for (int i = rounds; i > 0; i -= 2)
 			{
-				x04 ^= Integers.RotateLeft((x00+x12), 7);
-				x08 ^= Integers.RotateLeft((x04+x00), 9);
-				x12 ^= Integers.RotateLeft((x08+x04),13);
-				x00 ^= Integers.RotateLeft((x12+x08),18);
-				x09 ^= Integers.RotateLeft((x05+x01), 7);
-				x13 ^= Integers.RotateLeft((x09+x05), 9);
-				x01 ^= Integers.RotateLeft((x13+x09),13);
-				x05 ^= Integers.RotateLeft((x01+x13),18);
-				x14 ^= Integers.RotateLeft((x10+x06), 7);
-				x02 ^= Integers.RotateLeft((x14+x10), 9);
-				x06 ^= Integers.RotateLeft((x02+x14),13);
-				x10 ^= Integers.RotateLeft((x06+x02),18);
-				x03 ^= Integers.RotateLeft((x15+x11), 7);
-				x07 ^= Integers.RotateLeft((x03+x15), 9);
-				x11 ^= Integers.RotateLeft((x07+x03),13);
-				x15 ^= Integers.RotateLeft((x11+x07),18);
-
-				x01 ^= Integers.RotateLeft((x00+x03), 7);
-				x02 ^= Integers.RotateLeft((x01+x00), 9);
-				x03 ^= Integers.RotateLeft((x02+x01),13);
-				x00 ^= Integers.RotateLeft((x03+x02),18);
-				x06 ^= Integers.RotateLeft((x05+x04), 7);
-				x07 ^= Integers.RotateLeft((x06+x05), 9);
-				x04 ^= Integers.RotateLeft((x07+x06),13);
-				x05 ^= Integers.RotateLeft((x04+x07),18);
-				x11 ^= Integers.RotateLeft((x10+x09), 7);
-				x08 ^= Integers.RotateLeft((x11+x10), 9);
-				x09 ^= Integers.RotateLeft((x08+x11),13);
-				x10 ^= Integers.RotateLeft((x09+x08),18);
-				x12 ^= Integers.RotateLeft((x15+x14), 7);
-				x13 ^= Integers.RotateLeft((x12+x15), 9);
-				x14 ^= Integers.RotateLeft((x13+x12),13);
-				x15 ^= Integers.RotateLeft((x14+x13),18);
+				QuarterRound(ref x00, ref x04, ref x08, ref x12);
+                QuarterRound(ref x05, ref x09, ref x13, ref x01);
+                QuarterRound(ref x10, ref x14, ref x02, ref x06);
+                QuarterRound(ref x15, ref x03, ref x07, ref x11);
+
+                QuarterRound(ref x00, ref x01, ref x02, ref x03);
+                QuarterRound(ref x05, ref x06, ref x07, ref x04);
+                QuarterRound(ref x10, ref x11, ref x08, ref x09);
+                QuarterRound(ref x15, ref x12, ref x13, ref x14);
 			}
 
 			x[ 0] = x00 + input[ 0];
@@ -364,5 +411,39 @@ namespace Org.BouncyCastle.Crypto.Engines
 
 			return false;
 		}
-	}
+
+#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+#endif
+        private static void QuarterRound(ref uint a, ref uint b, ref uint c, ref uint d)
+		{
+            b ^= Integers.RotateLeft(a + d,  7);
+            c ^= Integers.RotateLeft(b + a,  9);
+            d ^= Integers.RotateLeft(c + b, 13);
+            a ^= Integers.RotateLeft(d + c, 18);
+        }
+
+#if NETCOREAPP3_0_OR_GREATER
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void QuarterRound_Sse2(ref Vector128<uint> a, ref Vector128<uint> b, ref Vector128<uint> c,
+			ref Vector128<uint> d)
+        {
+			b = Sse2.Xor(b, Rotate_Sse2(Sse2.Add(a, d), 7));
+			c = Sse2.Xor(c, Rotate_Sse2(Sse2.Add(b, a), 9));
+			d = Sse2.Xor(d, Rotate_Sse2(Sse2.Add(c, b), 13));
+			a = Sse2.Xor(a, Rotate_Sse2(Sse2.Add(d, c), 18));
+
+            b = Sse2.Shuffle(b, 0x93);
+			c = Sse2.Shuffle(c, 0x4E);
+			d = Sse2.Shuffle(d, 0x39);
+		}
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<uint> Rotate_Sse2(Vector128<uint> x, byte sl)
+        {
+			byte sr = (byte)(32 - sl);
+            return Sse2.Xor(Sse2.ShiftLeftLogical(x, sl), Sse2.ShiftRightLogical(x, sr));
+        }
+#endif
+    }
 }