From ddc7183746413247f7310645f0e101f3115fa69d Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Tue, 11 Oct 2022 13:02:41 +0700 Subject: Further speedups in Haraka256_X86 --- crypto/src/crypto/digests/Haraka256_X86.cs | 62 +++++++++++++++++------------- 1 file changed, 36 insertions(+), 26 deletions(-) diff --git a/crypto/src/crypto/digests/Haraka256_X86.cs b/crypto/src/crypto/digests/Haraka256_X86.cs index aa9b4555f..f3613eb6e 100644 --- a/crypto/src/crypto/digests/Haraka256_X86.cs +++ b/crypto/src/crypto/digests/Haraka256_X86.cs @@ -42,16 +42,13 @@ namespace Org.BouncyCastle.Crypto.Digests if (!IsSupported) throw new PlatformNotSupportedException(nameof(Haraka256_X86)); - var m1 = Load128(input[ ..16]); - var m2 = Load128(input[16..32]); - - var s1 = m1; - var s2 = m2; + var s1 = Load128(input[ ..16]); + var s2 = Load128(input[16..32]); - ImplPermute(ref s1, ref s2, DefaultRoundConstants); + ImplRounds(ref s1, ref s2, DefaultRoundConstants.AsSpan(0, 20)); - s1 = Sse2.Xor(s1, m1); - s2 = Sse2.Xor(s2, m2); + s1 = Sse2.Xor(s1, Load128(input[ ..16])); + s2 = Sse2.Xor(s2, Load128(input[16..32])); Store128(ref s1, output[ ..16]); Store128(ref s2, output[16..32]); @@ -65,34 +62,47 @@ namespace Org.BouncyCastle.Crypto.Digests var s1 = Load128(input[ ..16]); var s2 = Load128(input[16..32]); - ImplPermute(ref s1, ref s2, DefaultRoundConstants); + ImplRounds(ref s1, ref s2, DefaultRoundConstants.AsSpan(0, 20)); Store128(ref s1, output[ ..16]); Store128(ref s2, output[16..32]); } - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - private static void ImplPermute(ref Vector128 s1, ref Vector128 s2, - Span> rc) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void ImplRounds(ref Vector128 s1, ref Vector128 s2, Span> rc) { - int k = 0; - for (int round = 0; round < 5; ++round) - { - s1 = Aes.Encrypt(s1, rc[k++]); - s2 = Aes.Encrypt(s2, rc[k++]); + ImplRound(ref s1, ref s2, rc[ .. 4]); + ImplRound(ref s1, ref s2, rc[ 4.. 8]); + ImplRound(ref s1, ref s2, rc[ 8..12]); + ImplRound(ref s1, ref s2, rc[12..16]); + ImplRound(ref s1, ref s2, rc[16..20]); + } - s1 = Aes.Encrypt(s1, rc[k++]); - s2 = Aes.Encrypt(s2, rc[k++]); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void ImplRound(ref Vector128 s1, ref Vector128 s2, Span> rc) + { + ImplAes(ref s1, ref s2, rc[ ..2]); + ImplAes(ref s1, ref s2, rc[2..4]); + ImplMix(ref s1, ref s2); + } - var t1 = s1.AsUInt32(); - var t2 = s2.AsUInt32(); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void ImplAes(ref Vector128 s1, ref Vector128 s2, Span> rc) + { + s1 = Aes.Encrypt(s1, rc[0]); + s2 = Aes.Encrypt(s2, rc[1]); + } - s1 = Sse2.UnpackLow(t1, t2).AsByte(); - s2 = Sse2.UnpackHigh(t1, t2).AsByte(); - } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void ImplMix(ref Vector128 s1, ref Vector128 s2) + { + Vector128 t1 = s1.AsUInt32(); + Vector128 t2 = s2.AsUInt32(); + s1 = Sse2.UnpackLow(t1, t2).AsByte(); + s2 = Sse2.UnpackHigh(t1, t2).AsByte(); } - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static Vector128 Load128(ReadOnlySpan t) { #if NET7_0_OR_GREATER @@ -106,7 +116,7 @@ namespace Org.BouncyCastle.Crypto.Digests #endif } - [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + [MethodImpl(MethodImplOptions.AggressiveInlining)] private static void Store128(ref Vector128 s, Span t) { #if NET7_0_OR_GREATER -- cgit 1.4.1