From 44261cb1487ee653ccf28afb87a085c0a8975d36 Mon Sep 17 00:00:00 2001 From: Peter Dettman Date: Tue, 1 Nov 2022 21:53:57 +0700 Subject: SCrypt perf. opts. --- crypto/src/crypto/engines/Salsa20Engine.cs | 101 +++++++++++++++++++++------ crypto/src/crypto/generators/SCrypt.cs | 107 ++++++++++++++++++++++++----- 2 files changed, 168 insertions(+), 40 deletions(-) diff --git a/crypto/src/crypto/engines/Salsa20Engine.cs b/crypto/src/crypto/engines/Salsa20Engine.cs index 1ccf68902..7c2c1e1f9 100644 --- a/crypto/src/crypto/engines/Salsa20Engine.cs +++ b/crypto/src/crypto/engines/Salsa20Engine.cs @@ -252,11 +252,12 @@ namespace Org.BouncyCastle.Crypto.Engines Pack.UInt32_To_LE(x, output, 0); } - internal static void SalsaCore(int rounds, uint[] input, uint[] x) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + internal static void SalsaCore(int rounds, ReadOnlySpan input, Span output) { - if (input.Length != 16) + if (input.Length < 16) throw new ArgumentException(); - if (x.Length != 16) + if (output.Length < 16) throw new ArgumentException(); if (rounds % 2 != 0) throw new ArgumentException("Number of rounds must be even"); @@ -266,7 +267,7 @@ namespace Org.BouncyCastle.Crypto.Engines { Vector128 b0, b1, b2, b3; { - var I = MemoryMarshal.AsBytes(input.AsSpan(0, 16)); + var I = MemoryMarshal.AsBytes(input[..16]); var t0 = MemoryMarshal.Read>(I[0x00..0x10]); var t1 = MemoryMarshal.Read>(I[0x10..0x20]); var t2 = MemoryMarshal.Read>(I[0x20..0x30]); @@ -315,7 +316,7 @@ namespace Org.BouncyCastle.Crypto.Engines var v2 = Sse41.Blend(u0, u2, 0x0F); var v3 = Sse41.Blend(u1, u3, 0x3C); - var X = MemoryMarshal.AsBytes(x.AsSpan(0, 16)); + var X = MemoryMarshal.AsBytes(output[..16]); MemoryMarshal.Write(X[0x00..0x10], ref v0); MemoryMarshal.Write(X[0x10..0x20], ref v1); MemoryMarshal.Write(X[0x20..0x30], ref v2); @@ -355,23 +356,81 @@ namespace Org.BouncyCastle.Crypto.Engines QuarterRound(ref x15, ref x12, ref x13, ref x14); } - x[ 0] = x00 + input[ 0]; - x[ 1] = x01 + input[ 1]; - x[ 2] = x02 + input[ 2]; - x[ 3] = x03 + input[ 3]; - x[ 4] = x04 + input[ 4]; - x[ 5] = x05 + input[ 5]; - x[ 6] = x06 + input[ 6]; - x[ 7] = x07 + input[ 7]; - x[ 8] = x08 + input[ 8]; - x[ 9] = x09 + input[ 9]; - x[10] = x10 + input[10]; - x[11] = x11 + input[11]; - x[12] = x12 + input[12]; - x[13] = x13 + input[13]; - x[14] = x14 + input[14]; - x[15] = x15 + input[15]; + output[ 0] = x00 + input[ 0]; + output[ 1] = x01 + input[ 1]; + output[ 2] = x02 + input[ 2]; + output[ 3] = x03 + input[ 3]; + output[ 4] = x04 + input[ 4]; + output[ 5] = x05 + input[ 5]; + output[ 6] = x06 + input[ 6]; + output[ 7] = x07 + input[ 7]; + output[ 8] = x08 + input[ 8]; + output[ 9] = x09 + input[ 9]; + output[10] = x10 + input[10]; + output[11] = x11 + input[11]; + output[12] = x12 + input[12]; + output[13] = x13 + input[13]; + output[14] = x14 + input[14]; + output[15] = x15 + input[15]; } +#else + internal static void SalsaCore(int rounds, uint[] input, uint[] output) + { + if (input.Length < 16) + throw new ArgumentException(); + if (output.Length < 16) + throw new ArgumentException(); + if (rounds % 2 != 0) + throw new ArgumentException("Number of rounds must be even"); + + uint x00 = input[ 0]; + uint x01 = input[ 1]; + uint x02 = input[ 2]; + uint x03 = input[ 3]; + uint x04 = input[ 4]; + uint x05 = input[ 5]; + uint x06 = input[ 6]; + uint x07 = input[ 7]; + uint x08 = input[ 8]; + uint x09 = input[ 9]; + uint x10 = input[10]; + uint x11 = input[11]; + uint x12 = input[12]; + uint x13 = input[13]; + uint x14 = input[14]; + uint x15 = input[15]; + + for (int i = rounds; i > 0; i -= 2) + { + QuarterRound(ref x00, ref x04, ref x08, ref x12); + QuarterRound(ref x05, ref x09, ref x13, ref x01); + QuarterRound(ref x10, ref x14, ref x02, ref x06); + QuarterRound(ref x15, ref x03, ref x07, ref x11); + + QuarterRound(ref x00, ref x01, ref x02, ref x03); + QuarterRound(ref x05, ref x06, ref x07, ref x04); + QuarterRound(ref x10, ref x11, ref x08, ref x09); + QuarterRound(ref x15, ref x12, ref x13, ref x14); + } + + output[ 0] = x00 + input[ 0]; + output[ 1] = x01 + input[ 1]; + output[ 2] = x02 + input[ 2]; + output[ 3] = x03 + input[ 3]; + output[ 4] = x04 + input[ 4]; + output[ 5] = x05 + input[ 5]; + output[ 6] = x06 + input[ 6]; + output[ 7] = x07 + input[ 7]; + output[ 8] = x08 + input[ 8]; + output[ 9] = x09 + input[ 9]; + output[10] = x10 + input[10]; + output[11] = x11 + input[11]; + output[12] = x12 + input[12]; + output[13] = x13 + input[13]; + output[14] = x14 + input[14]; + output[15] = x15 + input[15]; + } +#endif internal void ResetLimitCounter() { diff --git a/crypto/src/crypto/generators/SCrypt.cs b/crypto/src/crypto/generators/SCrypt.cs index 1a8d4a003..fef842be2 100644 --- a/crypto/src/crypto/generators/SCrypt.cs +++ b/crypto/src/crypto/generators/SCrypt.cs @@ -102,7 +102,76 @@ namespace Org.BouncyCastle.Crypto.Generators return key.GetKey(); } - private static void SMix(uint[] B, int BOff, int N, int d, int r) +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + private static void SMix(uint[] B, int BOff, int N, int d, int r) + { + int powN = Integers.NumberOfTrailingZeros(N); + int blocksPerChunk = N >> d; + int chunkCount = 1 << d, chunkMask = blocksPerChunk - 1, chunkPow = powN - d; + + int BCount = r * 32; + + uint[] blockY = new uint[BCount]; + + uint[][] VV = new uint[chunkCount][]; + + try + { + var X = B.AsSpan(BOff, BCount); + + for (int c = 0; c < chunkCount; ++c) + { + uint[] V = new uint[blocksPerChunk * BCount]; + VV[c] = V; + + Nat.Copy(BCount, X, V); + int off = 0; + for (int i = 1; i < blocksPerChunk; ++i) + { + BlockMix(V.AsSpan(off, BCount), V.AsSpan(off + BCount)); + off += BCount; + } + BlockMix(V.AsSpan()[^BCount..], X); + } + + uint mask = (uint)N - 1; + for (int i = 0; i < N; ++i) + { + int j = (int)(X[BCount - 16] & mask); + uint[] V = VV[j >> chunkPow]; + int VOff = (j & chunkMask) * BCount; + Nat.Xor(BCount, V.AsSpan(VOff), X, blockY); + BlockMix(blockY, X); + } + } + finally + { + ClearAll(VV); + Clear(blockY); + } + } + + private static void BlockMix(Span B, Span Y) + { + int BCount = B.Length; + int half = BCount >> 1; + var y1 = B[^16..]; + + for (int pos = 0; pos < BCount; pos += 32) + { + var b0 = B[pos..]; + var y0 = Y[(pos >> 1)..]; + Nat512.Xor(y1, b0, y0); + Salsa20Engine.SalsaCore(8, y0, y0); + + var b1 = b0[16..]; + y1 = y0[half..]; + Nat512.Xor(y0, b1, y1); + Salsa20Engine.SalsaCore(8, y1, y1); + } + } +#else + private static void SMix(uint[] B, int BOff, int N, int d, int r) { int powN = Integers.NumberOfTrailingZeros(N); int blocksPerChunk = N >> d; @@ -111,7 +180,6 @@ namespace Org.BouncyCastle.Crypto.Generators int BCount = r * 32; uint[] blockX1 = new uint[16]; - uint[] blockX2 = new uint[16]; uint[] blockY = new uint[BCount]; uint[] X = new uint[BCount]; @@ -131,10 +199,10 @@ namespace Org.BouncyCastle.Crypto.Generators { Array.Copy(X, 0, V, off, BCount); off += BCount; - BlockMix(X, blockX1, blockX2, blockY, r); + BlockMix(X, blockX1, blockY, r); Array.Copy(blockY, 0, V, off, BCount); off += BCount; - BlockMix(blockY, blockX1, blockX2, X, r); + BlockMix(blockY, blockX1, X, r); } } @@ -146,7 +214,7 @@ namespace Org.BouncyCastle.Crypto.Generators int VOff = (j & chunkMask) * BCount; Nat.Xor(BCount, V, VOff, X, 0, blockY, 0); - BlockMix(blockY, blockX1, blockX2, X, r); + BlockMix(blockY, blockX1, X, r); } Array.Copy(X, 0, B, BOff, BCount); @@ -154,29 +222,30 @@ namespace Org.BouncyCastle.Crypto.Generators finally { ClearAll(VV); - ClearAll(X, blockX1, blockX2, blockY); + ClearAll(X, blockX1, blockY); } } - private static void BlockMix(uint[] B, uint[] X1, uint[] X2, uint[] Y, int r) + private static void BlockMix(uint[] B, uint[] X1, uint[] Y, int r) { - Array.Copy(B, B.Length - 16, X1, 0, 16); + Array.Copy(B, B.Length - 16, X1, 0, 16); - int BOff = 0, YOff = 0, halfLen = B.Length >> 1; + int BOff = 0, YOff = 0, halfLen = B.Length >> 1; - for (int i = 2 * r; i > 0; --i) - { - Nat512.Xor(X1, 0, B, BOff, X2, 0); + for (int i = 2 * r; i > 0; --i) + { + Nat512.XorTo(B, BOff, X1, 0); - Salsa20Engine.SalsaCore(8, X2, X1); - Array.Copy(X1, 0, Y, YOff, 16); + Salsa20Engine.SalsaCore(8, X1, X1); + Array.Copy(X1, 0, Y, YOff, 16); - YOff = halfLen + BOff - YOff; - BOff += 16; - } - } + YOff = halfLen + BOff - YOff; + BOff += 16; + } + } +#endif - private static void Clear(Array array) + private static void Clear(Array array) { if (array != null) { -- cgit 1.4.1