diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2023-04-26 23:15:00 +0700 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2023-04-26 23:15:00 +0700 |
commit | 7b84d0d55ad0c1867f164c88c84dad5daeb61e46 (patch) | |
tree | 2921cac9649c840b766d740027bedc7c328b5a91 /crypto/src | |
parent | Refactor RecipientIdentifier (diff) | |
download | BouncyCastle.NET-ed25519-7b84d0d55ad0c1867f164c88c84dad5daeb61e46.tar.xz |
Re-add reworked SparkleEngine
Diffstat (limited to 'crypto/src')
-rw-r--r-- | crypto/src/crypto/digests/SparkleDigest.cs | 418 | ||||
-rw-r--r-- | crypto/src/crypto/engines/SparkleEngine.cs | 1394 |
2 files changed, 1420 insertions, 392 deletions
diff --git a/crypto/src/crypto/digests/SparkleDigest.cs b/crypto/src/crypto/digests/SparkleDigest.cs index 3a87d9e80..df51c9935 100644 --- a/crypto/src/crypto/digests/SparkleDigest.cs +++ b/crypto/src/crypto/digests/SparkleDigest.cs @@ -1,15 +1,9 @@ using System; -using System.Diagnostics; #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER using System.Runtime.CompilerServices; #endif -#if NETCOREAPP3_0_OR_GREATER -using System.Buffers.Binary; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.X86; -#endif +using Org.BouncyCastle.Crypto.Engines; using Org.BouncyCastle.Crypto.Utilities; using Org.BouncyCastle.Utilities; @@ -188,19 +182,19 @@ namespace Org.BouncyCastle.Crypto.Digests // addition of last msg block (incl. padding) ProcessBlock(m_buf, 0, SPARKLE_STEPS_BIG); + Pack.UInt32_To_LE(state, 0, RATE_UINTS, output, outOff); + if (STATE_UINTS == 16) { - OutputBlock16(output, outOff); - SparkleOpt16(state, SPARKLE_STEPS_SLIM); - OutputBlock16(output, outOff + 16); - SparkleOpt16(state, SPARKLE_STEPS_SLIM); - OutputBlock16(output, outOff + 32); + SparkleEngine.SparkleOpt16(state, SPARKLE_STEPS_SLIM); + Pack.UInt32_To_LE(state, 0, RATE_UINTS, output, outOff + 16); + SparkleEngine.SparkleOpt16(state, SPARKLE_STEPS_SLIM); + Pack.UInt32_To_LE(state, 0, RATE_UINTS, output, outOff + 32); } else { - OutputBlock12(output, outOff); - SparkleOpt12(state, SPARKLE_STEPS_SLIM); - OutputBlock12(output, outOff + 16); + SparkleEngine.SparkleOpt12(state, SPARKLE_STEPS_SLIM); + Pack.UInt32_To_LE(state, 0, RATE_UINTS, output, outOff + 16); } Reset(); @@ -231,19 +225,19 @@ namespace Org.BouncyCastle.Crypto.Digests // addition of last msg block (incl. padding) ProcessBlock(m_buf, SPARKLE_STEPS_BIG); + Pack.UInt32_To_LE(state[..RATE_UINTS], output); + if (STATE_UINTS == 16) { - OutputBlock16(output); - SparkleOpt16(state, SPARKLE_STEPS_SLIM); - OutputBlock16(output[16..]); - SparkleOpt16(state, SPARKLE_STEPS_SLIM); - OutputBlock16(output[32..]); + SparkleEngine.SparkleOpt16(state, SPARKLE_STEPS_SLIM); + Pack.UInt32_To_LE(state[..RATE_UINTS], output[16..]); + SparkleEngine.SparkleOpt16(state, SPARKLE_STEPS_SLIM); + Pack.UInt32_To_LE(state[..RATE_UINTS], output[32..]); } else { - OutputBlock12(output); - SparkleOpt12(state, SPARKLE_STEPS_SLIM); - OutputBlock12(output[16..]); + SparkleEngine.SparkleOpt12(state, SPARKLE_STEPS_SLIM); + Pack.UInt32_To_LE(state[..RATE_UINTS], output[16..]); } Reset(); @@ -258,34 +252,6 @@ namespace Org.BouncyCastle.Crypto.Digests m_bufPos = 0; } -#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER - private void OutputBlock12(Span<byte> output) - { - Pack.UInt32_To_LE(state[..RATE_UINTS], output); - } - - private void OutputBlock16(Span<byte> output) - { - Pack.UInt32_To_LE(state[0], output); - Pack.UInt32_To_LE(state[4], output[4..]); - Pack.UInt32_To_LE(state[1], output[8..]); - Pack.UInt32_To_LE(state[5], output[12..]); - } -#else - private void OutputBlock12(byte[] output, int outOff) - { - Pack.UInt32_To_LE(state, 0, RATE_UINTS, output, outOff); - } - - private void OutputBlock16(byte[] output, int outOff) - { - Pack.UInt32_To_LE(state[0], output, outOff); - Pack.UInt32_To_LE(state[4], output, outOff + 4); - Pack.UInt32_To_LE(state[1], output, outOff + 8); - Pack.UInt32_To_LE(state[5], output, outOff + 12); - } -#endif - #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER [MethodImpl(MethodImplOptions.AggressiveInlining)] #endif @@ -308,278 +274,22 @@ namespace Org.BouncyCastle.Crypto.Digests // addition of a buffer block to the state uint tx = ELL(t0 ^ t2); uint ty = ELL(t1 ^ t3); + state[0] ^= t0 ^ ty; + state[1] ^= t1 ^ tx; + state[2] ^= t2 ^ ty; + state[3] ^= t3 ^ tx; + state[4] ^= ty; + state[5] ^= tx; if (STATE_UINTS == 16) { - state[0] ^= t0 ^ ty; - state[1] ^= t2 ^ ty; - state[2] ^= ty; - state[3] ^= ty; - state[4] ^= t1 ^ tx; - state[5] ^= t3 ^ tx; - state[6] ^= tx; + state[6] ^= ty; state[7] ^= tx; - SparkleOpt16(state, steps); + SparkleEngine.SparkleOpt16(state, steps); } else { - state[0] ^= t0 ^ ty; - state[1] ^= t1 ^ tx; - state[2] ^= t2 ^ ty; - state[3] ^= t3 ^ tx; - state[4] ^= ty; - state[5] ^= tx; - SparkleOpt12(state, steps); - } - } - - private static void SparkleOpt12(uint[] state, int steps) - { - uint s00 = state[ 0]; - uint s01 = state[ 1]; - uint s02 = state[ 2]; - uint s03 = state[ 3]; - uint s04 = state[ 4]; - uint s05 = state[ 5]; - uint s06 = state[ 6]; - uint s07 = state[ 7]; - uint s08 = state[ 8]; - uint s09 = state[ 9]; - uint s10 = state[10]; - uint s11 = state[11]; - - for (int i = 0; i < steps; ++i) - { - // Add round ant - - s01 ^= RCON[i & 7]; - s03 ^= (uint)i; - - // ARXBOX layer - - ArxBoxRound(RCON[0], ref s00, ref s01); - ArxBoxRound(RCON[1], ref s02, ref s03); - ArxBoxRound(RCON[2], ref s04, ref s05); - ArxBoxRound(RCON[3], ref s06, ref s07); - ArxBoxRound(RCON[4], ref s08, ref s09); - ArxBoxRound(RCON[5], ref s10, ref s11); - - // Linear layer - - uint t024 = ELL(s00 ^ s02 ^ s04); - uint t135 = ELL(s01 ^ s03 ^ s05); - - uint u00 = s00 ^ s06; - uint u01 = s01 ^ s07; - uint u02 = s02 ^ s08; - uint u03 = s03 ^ s09; - uint u04 = s04 ^ s10; - uint u05 = s05 ^ s11; - - s06 = s00; - s07 = s01; - s08 = s02; - s09 = s03; - s10 = s04; - s11 = s05; - - s00 = u02 ^ t135; - s01 = u03 ^ t024; - s02 = u04 ^ t135; - s03 = u05 ^ t024; - s04 = u00 ^ t135; - s05 = u01 ^ t024; + SparkleEngine.SparkleOpt12(state, steps); } - - state[ 0] = s00; - state[ 1] = s01; - state[ 2] = s02; - state[ 3] = s03; - state[ 4] = s04; - state[ 5] = s05; - state[ 6] = s06; - state[ 7] = s07; - state[ 8] = s08; - state[ 9] = s09; - state[10] = s10; - state[11] = s11; - } - - private static void SparkleOpt16(uint[] state, int steps) - { - Debug.Assert((steps & 1) == 0); - -#if NETCOREAPP3_0_OR_GREATER - if (Sse2.IsSupported) - { - var s0246 = Load128(state.AsSpan(0)); - var s1357 = Load128(state.AsSpan(4)); - var s8ACE = Load128(state.AsSpan(8)); - var s9BDF = Load128(state.AsSpan(12)); - - var RC03 = Load128(RCON.AsSpan(0)); - var RC47 = Load128(RCON.AsSpan(4)); - - for (int step = 0; step < steps; ++step) - { - // Add round ant - - s1357 = Sse2.Xor(s1357, Vector128.Create(RCON[step & 7], (uint)step, 0U, 0U)); - - // ARXBOX layer - - ArxBoxRound(RC03, ref s0246, ref s1357); - ArxBoxRound(RC47, ref s8ACE, ref s9BDF); - - // Linear layer - - var t0246 = ELL(HorizontalXor(s0246)); - var t1357 = ELL(HorizontalXor(s1357)); - - var u0246 = Sse2.Xor(s0246, s8ACE); - var u1357 = Sse2.Xor(s1357, s9BDF); - - s8ACE = s0246; - s9BDF = s1357; - - s0246 = Sse2.Xor(t1357, Sse2.Shuffle(u0246, 0x39)); - s1357 = Sse2.Xor(t0246, Sse2.Shuffle(u1357, 0x39)); - } - - Store128(s0246, state.AsSpan(0)); - Store128(s1357, state.AsSpan(4)); - Store128(s8ACE, state.AsSpan(8)); - Store128(s9BDF, state.AsSpan(12)); - } - else -#endif - { - uint s00 = state[ 0]; - uint s02 = state[ 1]; - uint s04 = state[ 2]; - uint s06 = state[ 3]; - uint s01 = state[ 4]; - uint s03 = state[ 5]; - uint s05 = state[ 6]; - uint s07 = state[ 7]; - uint s08 = state[ 8]; - uint s10 = state[ 9]; - uint s12 = state[10]; - uint s14 = state[11]; - uint s09 = state[12]; - uint s11 = state[13]; - uint s13 = state[14]; - uint s15 = state[15]; - - int step = 0; - while (step < steps) - { - // STEP 1 - - // Add round ant - - s01 ^= RCON[step & 7]; - s03 ^= (uint)(step++); - - // ARXBOX layer - - ArxBoxRound(RCON[0], ref s00, ref s01); - ArxBoxRound(RCON[1], ref s02, ref s03); - ArxBoxRound(RCON[2], ref s04, ref s05); - ArxBoxRound(RCON[3], ref s06, ref s07); - ArxBoxRound(RCON[4], ref s08, ref s09); - ArxBoxRound(RCON[5], ref s10, ref s11); - ArxBoxRound(RCON[6], ref s12, ref s13); - ArxBoxRound(RCON[7], ref s14, ref s15); - - // Linear layer - - uint t0246 = ELL(s00 ^ s02 ^ s04 ^ s06); - uint t1357 = ELL(s01 ^ s03 ^ s05 ^ s07); - - uint u08 = s08; - uint u09 = s09; - - s08 = s02 ^ s10 ^ t1357; - s09 = s03 ^ s11 ^ t0246; - s10 = s04 ^ s12 ^ t1357; - s11 = s05 ^ s13 ^ t0246; - s12 = s06 ^ s14 ^ t1357; - s13 = s07 ^ s15 ^ t0246; - s14 = s00 ^ u08 ^ t1357; - s15 = s01 ^ u09 ^ t0246; - - // STEP 2 - - // Add round ant - - s09 ^= RCON[step & 7]; - s11 ^= (uint)(step++); - - // ARXBOX layer - - ArxBoxRound(RCON[0], ref s08, ref s09); - ArxBoxRound(RCON[1], ref s10, ref s11); - ArxBoxRound(RCON[2], ref s12, ref s13); - ArxBoxRound(RCON[3], ref s14, ref s15); - ArxBoxRound(RCON[4], ref s00, ref s01); - ArxBoxRound(RCON[5], ref s02, ref s03); - ArxBoxRound(RCON[6], ref s04, ref s05); - ArxBoxRound(RCON[7], ref s06, ref s07); - - // Linear layer - - uint t8ACE = ELL(s08 ^ s10 ^ s12 ^ s14); - uint t9BDF = ELL(s09 ^ s11 ^ s13 ^ s15); - - uint u00 = s00; - uint u01 = s01; - - s00 = s02 ^ s10 ^ t9BDF; - s01 = s03 ^ s11 ^ t8ACE; - s02 = s04 ^ s12 ^ t9BDF; - s03 = s05 ^ s13 ^ t8ACE; - s04 = s06 ^ s14 ^ t9BDF; - s05 = s07 ^ s15 ^ t8ACE; - s06 = u00 ^ s08 ^ t9BDF; - s07 = u01 ^ s09 ^ t8ACE; - } - - state[ 0] = s00; - state[ 1] = s02; - state[ 2] = s04; - state[ 3] = s06; - state[ 4] = s01; - state[ 5] = s03; - state[ 6] = s05; - state[ 7] = s07; - state[ 8] = s08; - state[ 9] = s10; - state[10] = s12; - state[11] = s14; - state[12] = s09; - state[13] = s11; - state[14] = s13; - state[15] = s15; - } - } - -#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] -#endif - private static void ArxBoxRound(uint rc, ref uint s00, ref uint s01) - { - s00 += Integers.RotateRight(s01, 31); - s01 ^= Integers.RotateRight(s00, 24); - s00 ^= rc; - s00 += Integers.RotateRight(s01, 17); - s01 ^= Integers.RotateRight(s00, 17); - s00 ^= rc; - s00 += s01; - s01 ^= Integers.RotateRight(s00, 31); - s00 ^= rc; - s00 += Integers.RotateRight(s01, 24); - s01 ^= Integers.RotateRight(s00, 16); - s00 ^= rc; } #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER @@ -589,81 +299,5 @@ namespace Org.BouncyCastle.Crypto.Digests { return Integers.RotateRight(x, 16) ^ (x & 0xFFFFU); } - -#if NETCOREAPP3_0_OR_GREATER - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void ArxBoxRound(Vector128<uint> rc, ref Vector128<uint> s00, ref Vector128<uint> s01) - { - s00 = Sse2.Add(s00, Sse2.ShiftRightLogical(s01, 31)); - s00 = Sse2.Add(s00, Sse2.ShiftLeftLogical(s01, 1)); - - s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 24)); - s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 8)); - - s00 = Sse2.Xor(s00, rc); - - s00 = Sse2.Add(s00, Sse2.ShiftRightLogical(s01, 17)); - s00 = Sse2.Add(s00, Sse2.ShiftLeftLogical(s01, 15)); - - s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 17)); - s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 15)); - - s00 = Sse2.Xor(s00, rc); - - s00 = Sse2.Add(s00, s01); - - s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 31)); - s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 1)); - - s00 = Sse2.Xor(s00, rc); - - s00 = Sse2.Add(s00, Sse2.ShiftRightLogical(s01, 24)); - s00 = Sse2.Add(s00, Sse2.ShiftLeftLogical(s01, 8)); - - s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 16)); - s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 16)); - - s00 = Sse2.Xor(s00, rc); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128<uint> ELL(Vector128<uint> x) - { - var t = Sse2.ShiftLeftLogical(x, 16); - var u = Sse2.Xor(x, t); - return Sse2.Xor(t, Sse2.ShiftRightLogical(u, 16)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128<uint> HorizontalXor(Vector128<uint> x) - { - var t = Sse2.Xor(x, Sse2.Shuffle(x, 0x1B)); - return Sse2.Xor(t, Sse2.Shuffle(t, 0xB1)); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128<uint> Load128(ReadOnlySpan<uint> t) - { - if (BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<uint>>() == 16) - return MemoryMarshal.Read<Vector128<uint>>(MemoryMarshal.AsBytes(t)); - - return Vector128.Create(t[0], t[1], t[2], t[3]); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static void Store128(Vector128<uint> s, Span<uint> t) - { - var b = MemoryMarshal.AsBytes(t); - if (BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<uint>>() == 16) - { - MemoryMarshal.Write(b, ref s); - return; - } - - var u = s.AsUInt64(); - BinaryPrimitives.WriteUInt64LittleEndian(b[..8], u.GetElement(0)); - BinaryPrimitives.WriteUInt64LittleEndian(b[8..], u.GetElement(1)); - } -#endif } } diff --git a/crypto/src/crypto/engines/SparkleEngine.cs b/crypto/src/crypto/engines/SparkleEngine.cs new file mode 100644 index 000000000..34c9bf015 --- /dev/null +++ b/crypto/src/crypto/engines/SparkleEngine.cs @@ -0,0 +1,1394 @@ +using System; +using System.Diagnostics; +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER +using System.Runtime.CompilerServices; +#endif +#if NETCOREAPP3_0_OR_GREATER +using System.Buffers.Binary; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif + +using Org.BouncyCastle.Crypto.Modes; +using Org.BouncyCastle.Crypto.Parameters; +using Org.BouncyCastle.Crypto.Utilities; +using Org.BouncyCastle.Utilities; + +namespace Org.BouncyCastle.Crypto.Engines +{ + /// <summary>Sparkle v1.2, based on the current round 3 submission, https://sparkle-lwc.github.io/ .</summary> + /// <remarks> + /// Reference C implementation: https://github.com/cryptolu/sparkle.<br/> + /// Specification: + /// https://csrc.nist.gov/CSRC/media/Projects/lightweight-cryptography/documents/finalist-round/updated-spec-doc/sparkle-spec-final.pdf . + /// </remarks> + public sealed class SparkleEngine + : IAeadCipher + { + public enum SparkleParameters + { + SCHWAEMM128_128, + SCHWAEMM256_128, + SCHWAEMM192_192, + SCHWAEMM256_256 + } + + private enum State + { + Uninitialized = 0, + EncInit = 1, + EncAad = 2, + EncData = 3, + EncFinal = 4, + DecInit = 5, + DecAad = 6, + DecData = 7, + DecFinal = 8, + } + + private static readonly uint[] RCON = { 0xB7E15162U, 0xBF715880U, 0x38B4DA56U, 0x324E7738U, 0xBB1185EBU, + 0x4F7C7B57U, 0xCFBFA1C8U, 0xC2B3293DU }; + + private string algorithmName; + private readonly uint[] state; + private readonly uint[] k; + private readonly uint[] npub; + private byte[] tag; + private bool encrypted; + private State m_state = State.Uninitialized; + private byte[] initialAssociatedText; + + private readonly int m_bufferSizeDecrypt; + private readonly byte[] m_buf; + private int m_bufPos = 0; + + private readonly int SCHWAEMM_KEY_LEN; + private readonly int SCHWAEMM_NONCE_LEN; + private readonly int SPARKLE_STEPS_SLIM; + private readonly int SPARKLE_STEPS_BIG; + private readonly int KEY_BYTES; + private readonly int KEY_WORDS; + private readonly int TAG_WORDS; + private readonly int TAG_BYTES; + private readonly int STATE_WORDS; + private readonly int RATE_WORDS; + private readonly int RATE_BYTES; + private readonly int CAP_MASK; + private readonly uint _A0; + private readonly uint _A1; + private readonly uint _M2; + private readonly uint _M3; + + public SparkleEngine(SparkleParameters sparkleParameters) + { + int SPARKLE_STATE; + int SCHWAEMM_TAG_LEN; + int SPARKLE_CAPACITY; + switch (sparkleParameters) + { + case SparkleParameters.SCHWAEMM128_128: + SCHWAEMM_KEY_LEN = 128; + SCHWAEMM_NONCE_LEN = 128; + SCHWAEMM_TAG_LEN = 128; + SPARKLE_STATE = 256; + SPARKLE_CAPACITY = 128; + SPARKLE_STEPS_SLIM = 7; + SPARKLE_STEPS_BIG = 10; + algorithmName = "SCHWAEMM128-128"; + break; + case SparkleParameters.SCHWAEMM256_128: + SCHWAEMM_KEY_LEN = 128; + SCHWAEMM_NONCE_LEN = 256; + SCHWAEMM_TAG_LEN = 128; + SPARKLE_STATE = 384; + SPARKLE_CAPACITY = 128; + SPARKLE_STEPS_SLIM = 7; + SPARKLE_STEPS_BIG = 11; + algorithmName = "SCHWAEMM256-128"; + break; + case SparkleParameters.SCHWAEMM192_192: + SCHWAEMM_KEY_LEN = 192; + SCHWAEMM_NONCE_LEN = 192; + SCHWAEMM_TAG_LEN = 192; + SPARKLE_STATE = 384; + SPARKLE_CAPACITY = 192; + SPARKLE_STEPS_SLIM = 7; + SPARKLE_STEPS_BIG = 11; + algorithmName = "SCHWAEMM192-192"; + break; + case SparkleParameters.SCHWAEMM256_256: + SCHWAEMM_KEY_LEN = 256; + SCHWAEMM_NONCE_LEN = 256; + SCHWAEMM_TAG_LEN = 256; + SPARKLE_STATE = 512; + SPARKLE_CAPACITY = 256; + SPARKLE_STEPS_SLIM = 8; + SPARKLE_STEPS_BIG = 12; + algorithmName = "SCHWAEMM256-256"; + break; + default: + throw new ArgumentException("Invalid definition of SCHWAEMM instance"); + } + KEY_WORDS = SCHWAEMM_KEY_LEN >> 5; + KEY_BYTES = SCHWAEMM_KEY_LEN >> 3; + TAG_WORDS = SCHWAEMM_TAG_LEN >> 5; + TAG_BYTES = SCHWAEMM_TAG_LEN >> 3; + STATE_WORDS = SPARKLE_STATE >> 5; + RATE_WORDS = SCHWAEMM_NONCE_LEN >> 5; + RATE_BYTES = SCHWAEMM_NONCE_LEN >> 3; + int CAP_BRANS = SPARKLE_CAPACITY >> 6; + int CAP_WORDS = SPARKLE_CAPACITY >> 5; + CAP_MASK = RATE_WORDS > CAP_WORDS ? CAP_WORDS - 1 : -1; + _A0 = ((((1u << CAP_BRANS))) << 24); + _A1 = (((1u ^ (1u << CAP_BRANS))) << 24); + _M2 = (((2u ^ (1u << CAP_BRANS))) << 24); + _M3 = (((3u ^ (1u << CAP_BRANS))) << 24); + state = new uint[STATE_WORDS]; + tag = new byte[TAG_BYTES]; + k = new uint[KEY_WORDS]; + npub = new uint[RATE_WORDS]; + + m_bufferSizeDecrypt = RATE_BYTES + TAG_BYTES; + m_buf = new byte[m_bufferSizeDecrypt]; + + // Relied on by ProcessBytes methods for decryption + Debug.Assert(RATE_BYTES >= TAG_BYTES); + } + + public int GetKeyBytesSize() => KEY_BYTES; + + public int GetIVBytesSize() => RATE_BYTES; + + public string AlgorithmName => algorithmName; + + public void Init(bool forEncryption, ICipherParameters parameters) + { + KeyParameter key; +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + ReadOnlySpan<byte> iv; +#else + byte[] iv; +#endif + + if (parameters is AeadParameters aeadParameters) + { + key = aeadParameters.Key; +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + iv = aeadParameters.Nonce; +#else + iv = aeadParameters.GetNonce(); +#endif + initialAssociatedText = aeadParameters.GetAssociatedText(); + + int macSizeBits = aeadParameters.MacSize; + if (macSizeBits != TAG_BYTES * 8) + throw new ArgumentException("Invalid value for MAC size: " + macSizeBits); + } + else if (parameters is ParametersWithIV withIV) + { + key = withIV.Parameters as KeyParameter; +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + iv = withIV.IV; +#else + iv = withIV.GetIV(); +#endif + initialAssociatedText = null; + } + else + { + throw new ArgumentException("invalid parameters passed to Sparkle"); + } + + if (key == null) + throw new ArgumentException("Sparkle Init parameters must include a key"); + + int expectedKeyLength = KEY_WORDS * 4; + if (expectedKeyLength != key.KeyLength) + throw new ArgumentException(algorithmName + " requires exactly " + expectedKeyLength + " bytes of key"); + + int expectedIVLength = RATE_WORDS * 4; + if (expectedIVLength != iv.Length) + throw new ArgumentException(algorithmName + " requires exactly " + expectedIVLength + " bytes of IV"); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + Pack.LE_To_UInt32(key.Key, k); + Pack.LE_To_UInt32(iv, npub); +#else + Pack.LE_To_UInt32(key.GetKey(), 0, k); + Pack.LE_To_UInt32(iv, 0, npub); +#endif + + m_state = forEncryption ? State.EncInit : State.DecInit; + + // TODO true might be better? + Reset(false); + } + + public void ProcessAadByte(byte input) + { + CheckAad(); + + if (m_bufPos == RATE_BYTES) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + ProcessBufferAad(m_buf); +#else + ProcessBufferAad(m_buf, 0); +#endif + m_bufPos = 0; + } + + m_buf[m_bufPos++] = input; + } + + public void ProcessAadBytes(byte[] inBytes, int inOff, int len) + { + Check.DataLength(inBytes, inOff, len, "input buffer too short"); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + ProcessAadBytes(inBytes.AsSpan(inOff, len)); +#else + // Don't enter AAD state until we actually get input + if (len <= 0) + return; + + CheckAad(); + + if (m_bufPos > 0) + { + int available = RATE_BYTES - m_bufPos; + if (len <= available) + { + Array.Copy(inBytes, inOff, m_buf, m_bufPos, len); + m_bufPos += len; + return; + } + + Array.Copy(inBytes, inOff, m_buf, m_bufPos, available); + inOff += available; + len -= available; + + ProcessBufferAad(m_buf, 0); + //m_bufPos = 0; + } + + while (len > RATE_BYTES) + { + ProcessBufferAad(inBytes, inOff); + inOff += RATE_BYTES; + len -= RATE_BYTES; + } + + Array.Copy(inBytes, inOff, m_buf, 0, len); + m_bufPos = len; +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public void ProcessAadBytes(ReadOnlySpan<byte> input) + { + // Don't enter AAD state until we actually get input + if (input.IsEmpty) + return; + + CheckAad(); + + if (m_bufPos > 0) + { + int available = RATE_BYTES - m_bufPos; + if (input.Length <= available) + { + input.CopyTo(m_buf.AsSpan(m_bufPos)); + m_bufPos += input.Length; + return; + } + + input[..available].CopyTo(m_buf.AsSpan(m_bufPos)); + input = input[available..]; + + ProcessBufferAad(m_buf); + //m_bufPos = 0; + } + + while (input.Length > RATE_BYTES) + { + ProcessBufferAad(input); + input = input[RATE_BYTES..]; + } + + input.CopyTo(m_buf); + m_bufPos = input.Length; + } +#endif + + public int ProcessByte(byte input, byte[] outBytes, int outOff) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ProcessByte(input, Spans.FromNullable(outBytes, outOff)); +#else + return ProcessBytes(new byte[]{ input }, 0, 1, outBytes, outOff); +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public int ProcessByte(byte input, Span<byte> output) + { + Span<byte> singleByte = stackalloc byte[1]{ input }; + + return ProcessBytes(singleByte, output); + } +#endif + + public int ProcessBytes(byte[] inBytes, int inOff, int len, byte[] outBytes, int outOff) + { + Check.DataLength(inBytes, inOff, len, "input buffer too short"); + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + return ProcessBytes(inBytes.AsSpan(inOff, len), Spans.FromNullable(outBytes, outOff)); +#else + bool forEncryption = CheckData(); + + int resultLength = 0; + + if (forEncryption) + { + if (m_bufPos > 0) + { + int available = RATE_BYTES - m_bufPos; + if (len <= available) + { + Array.Copy(inBytes, inOff, m_buf, m_bufPos, len); + m_bufPos += len; + return 0; + } + + Array.Copy(inBytes, inOff, m_buf, m_bufPos, available); + inOff += available; + len -= available; + + ProcessBufferEncrypt(m_buf, 0, outBytes, outOff); + resultLength = RATE_BYTES; + //m_bufPos = 0; + } + + while (len > RATE_BYTES) + { + ProcessBufferEncrypt(inBytes, inOff, outBytes, outOff + resultLength); + inOff += RATE_BYTES; + len -= RATE_BYTES; + resultLength += RATE_BYTES; + } + } + else + { + int available = m_bufferSizeDecrypt - m_bufPos; + if (len <= available) + { + Array.Copy(inBytes, inOff, m_buf, m_bufPos, len); + m_bufPos += len; + return 0; + } + + if (m_bufPos > RATE_BYTES) + { + ProcessBufferDecrypt(m_buf, 0, outBytes, outOff); + m_bufPos -= RATE_BYTES; + Array.Copy(m_buf, RATE_BYTES, m_buf, 0, m_bufPos); + resultLength = RATE_BYTES; + + available += RATE_BYTES; + if (len <= available) + { + Array.Copy(inBytes, inOff, m_buf, m_bufPos, len); + m_bufPos += len; + return resultLength; + } + } + + available = RATE_BYTES - m_bufPos; + Array.Copy(inBytes, inOff, m_buf, m_bufPos, available); + inOff += available; + len -= available; + ProcessBufferDecrypt(m_buf, 0, outBytes, outOff + resultLength); + resultLength += RATE_BYTES; + //m_bufPos = 0; + + while (len > m_bufferSizeDecrypt) + { + ProcessBufferDecrypt(inBytes, inOff, outBytes, outOff + resultLength); + inOff += RATE_BYTES; + len -= RATE_BYTES; + resultLength += RATE_BYTES; + } + } + + Array.Copy(inBytes, inOff, m_buf, 0, len); + m_bufPos = len; + + return resultLength; +#endif + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public int ProcessBytes(ReadOnlySpan<byte> input, Span<byte> output) + { + bool forEncryption = CheckData(); + + int resultLength = 0; + + if (forEncryption) + { + if (m_bufPos > 0) + { + int available = RATE_BYTES - m_bufPos; + if (input.Length <= available) + { + input.CopyTo(m_buf.AsSpan(m_bufPos)); + m_bufPos += input.Length; + return 0; + } + + input[..available].CopyTo(m_buf.AsSpan(m_bufPos)); + input = input[available..]; + + ProcessBufferEncrypt(m_buf, output); + resultLength = RATE_BYTES; + //m_bufPos = 0; + } + + while (input.Length > RATE_BYTES) + { + ProcessBufferEncrypt(input, output[resultLength..]); + input = input[RATE_BYTES..]; + resultLength += RATE_BYTES; + } + } + else + { + int available = m_bufferSizeDecrypt - m_bufPos; + if (input.Length <= available) + { + input.CopyTo(m_buf.AsSpan(m_bufPos)); + m_bufPos += input.Length; + return 0; + } + + if (m_bufPos > RATE_BYTES) + { + ProcessBufferDecrypt(m_buf, output); + m_bufPos -= RATE_BYTES; + m_buf.AsSpan(0, m_bufPos).CopyFrom(m_buf.AsSpan(RATE_BYTES)); + resultLength = RATE_BYTES; + + available += RATE_BYTES; + if (input.Length <= available) + { + input.CopyTo(m_buf.AsSpan(m_bufPos)); + m_bufPos += input.Length; + return resultLength; + } + } + + available = RATE_BYTES - m_bufPos; + input[..available].CopyTo(m_buf.AsSpan(m_bufPos)); + input = input[available..]; + ProcessBufferDecrypt(m_buf, output[resultLength..]); + resultLength += RATE_BYTES; + //m_bufPos = 0; + + while (input.Length > m_bufferSizeDecrypt) + { + ProcessBufferDecrypt(input, output[resultLength..]); + input = input[RATE_BYTES..]; + resultLength += RATE_BYTES; + } + } + + input.CopyTo(m_buf); + m_bufPos = input.Length; + + return resultLength; + } +#endif + + public int DoFinal(byte[] outBytes, int outOff) + { + bool forEncryption = CheckData(); + + int resultLength; + if (forEncryption) + { + resultLength = m_bufPos + TAG_BYTES; + Check.OutputLength(outBytes, outOff, resultLength, "output buffer too short"); + + } + else + { + if (m_bufPos < TAG_BYTES) + throw new InvalidCipherTextException("data too short"); + + m_bufPos -= TAG_BYTES; + + resultLength = m_bufPos; + Check.OutputLength(outBytes, outOff, resultLength, "output buffer too short"); + + } + + if (encrypted || m_bufPos > 0) + { + // Encryption of Last Block + // addition of ant M2 or M3 to the state + state[STATE_WORDS - 1] ^= (m_bufPos < RATE_BYTES) ? _M2 : _M3; + // combined Rho and rate-whitening (incl. padding) + // Rho and rate-whitening for the encryption of the last plaintext block. Since + // this last block may require padding, it is always copied to a buffer. + uint[] buffer = new uint[RATE_WORDS]; + for (int i = 0; i < m_bufPos; ++i) + { + buffer[i >> 2] |= (uint)m_buf[i] << ((i & 3) << 3); + } + if (m_bufPos < RATE_BYTES) + { + if (!forEncryption) + { + int tmp = (m_bufPos & 3) << 3; + buffer[m_bufPos >> 2] |= (state[m_bufPos >> 2] >> tmp) << tmp; + tmp = (m_bufPos >> 2) + 1; + Array.Copy(state, tmp, buffer, tmp, RATE_WORDS - tmp); + } + buffer[m_bufPos >> 2] ^= 0x80U << ((m_bufPos & 3) << 3); + } + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + RATE_WORDS / 2; + + uint s_i = state[i]; + uint s_j = state[j]; + if (forEncryption) + { + state[i] = s_j ^ buffer[i] ^ state[RATE_WORDS + i]; + state[j] = s_i ^ s_j ^ buffer[j] ^ state[RATE_WORDS + (j & CAP_MASK)]; + } + else + { + state[i] = s_i ^ s_j ^ buffer[i] ^ state[RATE_WORDS + i]; + state[j] = s_i ^ buffer[j] ^ state[RATE_WORDS + (j & CAP_MASK)]; + } + buffer[i] ^= s_i; + buffer[j] ^= s_j; + } + for (int i = 0; i < m_bufPos; ++i) + { + outBytes[outOff++] = (byte)(buffer[i >> 2] >> ((i & 3) << 3)); + } + + SparkleOpt(state, SPARKLE_STEPS_BIG); + } + // add key to the capacity-part of the state + for (int i = 0; i < KEY_WORDS; i++) + { + state[RATE_WORDS + i] ^= k[i]; + } + tag = new byte[TAG_BYTES]; + Pack.UInt32_To_LE(state, RATE_WORDS, TAG_WORDS, tag, 0); + if (forEncryption) + { + Array.Copy(tag, 0, outBytes, outOff, TAG_BYTES); + } + else + { + if (!Arrays.FixedTimeEquals(TAG_BYTES, tag, 0, m_buf, m_bufPos)) + throw new InvalidCipherTextException("mac check in " + AlgorithmName + " failed"); + } + Reset(!forEncryption); + return resultLength; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + public int DoFinal(Span<byte> output) + { + byte[] rv = new byte[GetOutputSize(0)]; + int len = DoFinal(rv, 0); + rv.AsSpan(0, len).CopyTo(output); + return len; + } +#endif + + public byte[] GetMac() + { + return tag; + } + + public int GetUpdateOutputSize(int len) + { + // The -1 is to account for the lazy processing of a full buffer + int total = System.Math.Max(0, len - 1); + + switch (m_state) + { + case State.DecInit: + case State.DecAad: + total = System.Math.Max(0, total - TAG_BYTES); + break; + case State.DecData: + case State.DecFinal: + total = System.Math.Max(0, total + m_bufPos - TAG_BYTES); + break; + case State.EncData: + case State.EncFinal: + total = System.Math.Max(0, total + m_bufPos); + break; + default: + break; + } + + return total - total % RATE_BYTES; + } + + public int GetOutputSize(int len) + { + int total = System.Math.Max(0, len); + + switch (m_state) + { + case State.DecInit: + case State.DecAad: + return System.Math.Max(0, total - TAG_BYTES); + case State.DecData: + case State.DecFinal: + return System.Math.Max(0, total + m_bufPos - TAG_BYTES); + case State.EncData: + case State.EncFinal: + return total + m_bufPos + TAG_BYTES; + default: + return total + TAG_BYTES; + } + } + + public void Reset() + { + Reset(true); + } + + private void CheckAad() + { + switch (m_state) + { + case State.DecInit: + m_state = State.DecAad; + break; + case State.EncInit: + m_state = State.EncAad; + break; + case State.DecAad: + case State.EncAad: + break; + case State.EncFinal: + throw new InvalidOperationException(AlgorithmName + " cannot be reused for encryption"); + default: + throw new InvalidOperationException(AlgorithmName + " needs to be initialized"); + } + } + + private bool CheckData() + { + switch (m_state) + { + case State.DecInit: + case State.DecAad: + FinishAad(State.DecData); + return false; + case State.EncInit: + case State.EncAad: + FinishAad(State.EncData); + return true; + case State.DecData: + return false; + case State.EncData: + return true; + case State.EncFinal: + throw new InvalidOperationException(AlgorithmName + " cannot be reused for encryption"); + default: + throw new InvalidOperationException(AlgorithmName + " needs to be initialized"); + } + } + + private void FinishAad(State nextState) + { + // State indicates whether we ever received AAD + switch (m_state) + { + case State.DecAad: + case State.EncAad: + { + ProcessFinalAad(); + break; + } + } + + m_bufPos = 0; + m_state = nextState; + } + + private void FinishData(State nextState) + { + // TODO + //switch (asconParameters) + //{ + //case AsconParameters.ascon128: + // x1 ^= K1; + // x2 ^= K2; + // break; + //case AsconParameters.ascon128a: + // x2 ^= K1; + // x3 ^= K2; + // break; + //case AsconParameters.ascon80pq: + // x1 ^= (K0 << 32 | K1 >> 32); + // x2 ^= (K1 << 32 | K2 >> 32); + // x3 ^= K2 << 32; + // break; + //default: + // throw new InvalidOperationException(); + //} + //P(12); + //x3 ^= K1; + //x4 ^= K2; + + m_state = nextState; + } + +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + private void ProcessBufferAad(ReadOnlySpan<byte> buffer) + { + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + (RATE_WORDS >> 1); + + uint si = state[i]; + uint sj = state[j]; + + uint d_i = Pack.LE_To_UInt32(buffer, i << 2); + uint d_j = Pack.LE_To_UInt32(buffer, j << 2); + + state[i] = sj ^ d_i ^ state[RATE_WORDS + i]; + state[j] = si ^ sj ^ d_j ^ state[RATE_WORDS + (j & CAP_MASK)]; + } + + SparkleOpt(state, SPARKLE_STEPS_SLIM); + } + + private void ProcessBufferDecrypt(ReadOnlySpan<byte> buffer, Span<byte> output) + { + Debug.Assert(buffer.Length >= RATE_BYTES); + + Check.OutputLength(output, RATE_BYTES, "output buffer too short"); + + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + (RATE_WORDS / 2); + + uint s_i = state[i]; + uint s_j = state[j]; + + uint d_i = Pack.LE_To_UInt32(buffer, i * 4); + uint d_j = Pack.LE_To_UInt32(buffer, j * 4); + + state[i] = s_i ^ s_j ^ d_i ^ state[RATE_WORDS + i]; + state[j] = s_i ^ d_j ^ state[RATE_WORDS + (j & CAP_MASK)]; + + Pack.UInt32_To_LE(d_i ^ s_i, output, i * 4); + Pack.UInt32_To_LE(d_j ^ s_j, output, j * 4); + } + + SparkleOpt(state, SPARKLE_STEPS_SLIM); + + encrypted = true; + } + + private void ProcessBufferEncrypt(ReadOnlySpan<byte> buffer, Span<byte> output) + { + Debug.Assert(buffer.Length >= RATE_BYTES); + + Check.OutputLength(output, RATE_BYTES, "output buffer too short"); + + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + (RATE_WORDS / 2); + + uint s_i = state[i]; + uint s_j = state[j]; + + uint d_i = Pack.LE_To_UInt32(buffer, i * 4); + uint d_j = Pack.LE_To_UInt32(buffer, j * 4); + + state[i] = s_j ^ d_i ^ state[RATE_WORDS + i]; + state[j] = s_i ^ s_j ^ d_j ^ state[RATE_WORDS + (j & CAP_MASK)]; + + Pack.UInt32_To_LE(d_i ^ s_i, output, i * 4); + Pack.UInt32_To_LE(d_j ^ s_j, output, j * 4); + } + + SparkleOpt(state, SPARKLE_STEPS_SLIM); + + encrypted = true; + } +#else + private void ProcessBufferAad(byte[] buffer, int bufOff) + { + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + (RATE_WORDS / 2); + + uint s_i = state[i]; + uint s_j = state[j]; + + uint d_i = Pack.LE_To_UInt32(buffer, bufOff + (i * 4)); + uint d_j = Pack.LE_To_UInt32(buffer, bufOff + (j * 4)); + + state[i] = s_j ^ d_i ^ state[RATE_WORDS + i]; + state[j] = s_i ^ s_j ^ d_j ^ state[RATE_WORDS + (j & CAP_MASK)]; + } + + SparkleOpt(state, SPARKLE_STEPS_SLIM); + } + + private void ProcessBufferDecrypt(byte[] buffer, int bufOff, byte[] output, int outOff) + { + Debug.Assert(bufOff <= buffer.Length - RATE_BYTES); + + Check.OutputLength(output, outOff, RATE_BYTES, "output buffer too short"); + + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + (RATE_WORDS / 2); + + uint s_i = state[i]; + uint s_j = state[j]; + + uint d_i = Pack.LE_To_UInt32(buffer, bufOff + (i * 4)); + uint d_j = Pack.LE_To_UInt32(buffer, bufOff + (j * 4)); + + state[i] = s_i ^ s_j ^ d_i ^ state[RATE_WORDS + i]; + state[j] = s_i ^ d_j ^ state[RATE_WORDS + (j & CAP_MASK)]; + + Pack.UInt32_To_LE(d_i ^ s_i, output, outOff + (i * 4)); + Pack.UInt32_To_LE(d_j ^ s_j, output, outOff + (j * 4)); + } + + SparkleOpt(state, SPARKLE_STEPS_SLIM); + + encrypted = true; + } + + private void ProcessBufferEncrypt(byte[] buffer, int bufOff, byte[] output, int outOff) + { + Debug.Assert(bufOff <= buffer.Length - RATE_BYTES); + + Check.OutputLength(output, outOff, RATE_BYTES, "output buffer too short"); + + for (int i = 0; i < RATE_WORDS / 2; ++i) + { + int j = i + (RATE_WORDS / 2); + + uint s_i = state[i]; + uint s_j = state[j]; + + uint d_i = Pack.LE_To_UInt32(buffer, bufOff + (i * 4)); + uint d_j = Pack.LE_To_UInt32(buffer, bufOff + (j * 4)); + + state[i] = s_j ^ d_i ^ state[RATE_WORDS + i]; + state[j] = s_i ^ s_j ^ d_j ^ state[RATE_WORDS + (j & CAP_MASK)]; + + Pack.UInt32_To_LE(d_i ^ s_i, output, outOff + (i * 4)); + Pack.UInt32_To_LE(d_j ^ s_j, output, outOff + (j * 4)); + } + + SparkleOpt(state, SPARKLE_STEPS_SLIM); + + encrypted = true; + } +#endif + + private void ProcessFinalAad() + { + // Authentication of Last Block + + // addition of ant A0 or A1 to the state + state[STATE_WORDS - 1] ^= (m_bufPos < RATE_BYTES) ? _A0 : _A1; + + // Rho and rate-whitening for the authentication of the last associated-data block. + uint[] buffer = new uint[RATE_WORDS]; + for (int i = 0; i < m_bufPos; ++i) + { + buffer[i >> 2] |= (uint)m_buf[i] << ((i & 3) << 3); + } + if (m_bufPos < RATE_BYTES) + { // padding + buffer[m_bufPos >> 2] |= 0x80U << ((m_bufPos & 3) << 3); + } + for (int i = 0, j = RATE_WORDS / 2; i < RATE_WORDS / 2; i++, j++) + { + uint tmp = state[i]; + state[i] = state[j] ^ buffer[i] ^ state[RATE_WORDS + i]; + state[j] ^= tmp ^ buffer[j] ^ state[RATE_WORDS + (j & CAP_MASK)]; + } + + SparkleOpt(state, SPARKLE_STEPS_BIG); + } + + private void Reset(bool clearMac) + { + if (clearMac) + { + tag = null; + } + + Arrays.Clear(m_buf); + m_bufPos = 0; + encrypted = false; + + switch (m_state) + { + case State.DecInit: + case State.EncInit: + break; + case State.DecAad: + case State.DecData: + case State.DecFinal: + m_state = State.DecInit; + break; + case State.EncAad: + case State.EncData: + case State.EncFinal: + m_state = State.EncFinal; + return; + default: + throw new InvalidOperationException(AlgorithmName + " needs to be initialized"); + } + + // The Initialize function loads nonce and key into the state and executes the + // SPARKLE permutation with the big number of steps. + // load nonce into the rate-part of the state + Array.Copy(npub, 0, state, 0, RATE_WORDS); + // load key into the capacity-part of the sate + Array.Copy(k, 0, state, RATE_WORDS, KEY_WORDS); + + SparkleOpt(state, SPARKLE_STEPS_BIG); + + if (initialAssociatedText != null) + { +#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER + ProcessAadBytes(initialAssociatedText); +#else + ProcessAadBytes(initialAssociatedText, 0, initialAssociatedText.Length); +#endif + } + } + +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + private static void ArxBox(uint rc, ref uint s00, ref uint s01) + { + s00 += Integers.RotateRight(s01, 31); + s01 ^= Integers.RotateRight(s00, 24); + s00 ^= rc; + s00 += Integers.RotateRight(s01, 17); + s01 ^= Integers.RotateRight(s00, 17); + s00 ^= rc; + s00 += s01; + s01 ^= Integers.RotateRight(s00, 31); + s00 ^= rc; + s00 += Integers.RotateRight(s01, 24); + s01 ^= Integers.RotateRight(s00, 16); + s00 ^= rc; + } + +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + private static uint ELL(uint x) + { + return Integers.RotateRight(x, 16) ^ (x & 0xFFFFU); + } + +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + private static void SparkleOpt(uint[] state, int steps) + { + switch (state.Length) + { + case 8: SparkleOpt8 (state, steps); break; + case 12: SparkleOpt12(state, steps); break; + case 16: SparkleOpt16(state, steps); break; + default: throw new InvalidOperationException(); + } + } + + internal static void SparkleOpt8(uint[] state, int steps) + { + uint s00 = state[0]; + uint s01 = state[1]; + uint s02 = state[2]; + uint s03 = state[3]; + uint s04 = state[4]; + uint s05 = state[5]; + uint s06 = state[6]; + uint s07 = state[7]; + + for (int step = 0; step < steps; ++step) + { + // Add round ant + + s01 ^= RCON[step & 7]; + s03 ^= (uint)step; + + // ARXBOX layer + + ArxBox(RCON[0], ref s00, ref s01); + ArxBox(RCON[1], ref s02, ref s03); + ArxBox(RCON[2], ref s04, ref s05); + ArxBox(RCON[3], ref s06, ref s07); + + // Linear layer + + uint t02 = ELL(s00 ^ s02); + uint t13 = ELL(s01 ^ s03); + + uint u00 = s00 ^ s04; + uint u01 = s01 ^ s05; + uint u02 = s02 ^ s06; + uint u03 = s03 ^ s07; + + s04 = s00; + s05 = s01; + s06 = s02; + s07 = s03; + + s00 = u02 ^ t13; + s01 = u03 ^ t02; + s02 = u00 ^ t13; + s03 = u01 ^ t02; + } + + state[0] = s00; + state[1] = s01; + state[2] = s02; + state[3] = s03; + state[4] = s04; + state[5] = s05; + state[6] = s06; + state[7] = s07; + } + + internal static void SparkleOpt12(uint[] state, int steps) + { + uint s00 = state[0]; + uint s01 = state[1]; + uint s02 = state[2]; + uint s03 = state[3]; + uint s04 = state[4]; + uint s05 = state[5]; + uint s06 = state[6]; + uint s07 = state[7]; + uint s08 = state[8]; + uint s09 = state[9]; + uint s10 = state[10]; + uint s11 = state[11]; + + for (int step = 0; step < steps; ++step) + { + // Add round ant + + s01 ^= RCON[step & 7]; + s03 ^= (uint)step; + + // ARXBOX layer + + ArxBox(RCON[0], ref s00, ref s01); + ArxBox(RCON[1], ref s02, ref s03); + ArxBox(RCON[2], ref s04, ref s05); + ArxBox(RCON[3], ref s06, ref s07); + ArxBox(RCON[4], ref s08, ref s09); + ArxBox(RCON[5], ref s10, ref s11); + + // Linear layer + + uint t024 = ELL(s00 ^ s02 ^ s04); + uint t135 = ELL(s01 ^ s03 ^ s05); + + uint u00 = s00 ^ s06; + uint u01 = s01 ^ s07; + uint u02 = s02 ^ s08; + uint u03 = s03 ^ s09; + uint u04 = s04 ^ s10; + uint u05 = s05 ^ s11; + + s06 = s00; + s07 = s01; + s08 = s02; + s09 = s03; + s10 = s04; + s11 = s05; + + s00 = u02 ^ t135; + s01 = u03 ^ t024; + s02 = u04 ^ t135; + s03 = u05 ^ t024; + s04 = u00 ^ t135; + s05 = u01 ^ t024; + } + + state[0] = s00; + state[1] = s01; + state[2] = s02; + state[3] = s03; + state[4] = s04; + state[5] = s05; + state[6] = s06; + state[7] = s07; + state[8] = s08; + state[9] = s09; + state[10] = s10; + state[11] = s11; + } + + internal static void SparkleOpt16(uint[] state, int steps) + { + Debug.Assert((steps & 1) == 0); + +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported) + { + var s0246 = Vector128.Create(state[0], state[2], state[4], state[6]); + var s1357 = Vector128.Create(state[1], state[3], state[5], state[7]); + var s8ACE = Vector128.Create(state[8], state[10], state[12], state[14]); + var s9BDF = Vector128.Create(state[9], state[11], state[13], state[15]); + + var RC03 = Load128(RCON.AsSpan(0)); + var RC47 = Load128(RCON.AsSpan(4)); + + for (int step = 0; step < steps; ++step) + { + // Add round ant + + s1357 = Sse2.Xor(s1357, Vector128.Create(RCON[step & 7], (uint)step, 0U, 0U)); + + // ARXBOX layer + + ArxBox(RC03, ref s0246, ref s1357); + ArxBox(RC47, ref s8ACE, ref s9BDF); + + // Linear layer + + var t0246 = ELL(HorizontalXor(s0246)); + var t1357 = ELL(HorizontalXor(s1357)); + + var u0246 = Sse2.Xor(s0246, s8ACE); + var u1357 = Sse2.Xor(s1357, s9BDF); + + s8ACE = s0246; + s9BDF = s1357; + + s0246 = Sse2.Xor(t1357, Sse2.Shuffle(u0246, 0x39)); + s1357 = Sse2.Xor(t0246, Sse2.Shuffle(u1357, 0x39)); + } + + Store128(Sse2.UnpackLow (s0246, s1357), state.AsSpan(0)); + Store128(Sse2.UnpackHigh(s0246, s1357), state.AsSpan(4)); + Store128(Sse2.UnpackLow (s8ACE, s9BDF), state.AsSpan(8)); + Store128(Sse2.UnpackHigh(s8ACE, s9BDF), state.AsSpan(12)); + } + else +#endif + { + uint s00 = state[0]; + uint s01 = state[1]; + uint s02 = state[2]; + uint s03 = state[3]; + uint s04 = state[4]; + uint s05 = state[5]; + uint s06 = state[6]; + uint s07 = state[7]; + uint s08 = state[8]; + uint s09 = state[9]; + uint s10 = state[10]; + uint s11 = state[11]; + uint s12 = state[12]; + uint s13 = state[13]; + uint s14 = state[14]; + uint s15 = state[15]; + + int step = 0; + while (step < steps) + { + // STEP 1 + + // Add round ant + + s01 ^= RCON[step & 7]; + s03 ^= (uint)(step++); + + // ARXBOX layer + + ArxBox(RCON[0], ref s00, ref s01); + ArxBox(RCON[1], ref s02, ref s03); + ArxBox(RCON[2], ref s04, ref s05); + ArxBox(RCON[3], ref s06, ref s07); + ArxBox(RCON[4], ref s08, ref s09); + ArxBox(RCON[5], ref s10, ref s11); + ArxBox(RCON[6], ref s12, ref s13); + ArxBox(RCON[7], ref s14, ref s15); + + // Linear layer + + uint t0246 = ELL(s00 ^ s02 ^ s04 ^ s06); + uint t1357 = ELL(s01 ^ s03 ^ s05 ^ s07); + + uint u08 = s08; + uint u09 = s09; + + s08 = s02 ^ s10 ^ t1357; + s09 = s03 ^ s11 ^ t0246; + s10 = s04 ^ s12 ^ t1357; + s11 = s05 ^ s13 ^ t0246; + s12 = s06 ^ s14 ^ t1357; + s13 = s07 ^ s15 ^ t0246; + s14 = s00 ^ u08 ^ t1357; + s15 = s01 ^ u09 ^ t0246; + + // STEP 2 + + // Add round ant + + s09 ^= RCON[step & 7]; + s11 ^= (uint)(step++); + + // ARXBOX layer + + ArxBox(RCON[0], ref s08, ref s09); + ArxBox(RCON[1], ref s10, ref s11); + ArxBox(RCON[2], ref s12, ref s13); + ArxBox(RCON[3], ref s14, ref s15); + ArxBox(RCON[4], ref s00, ref s01); + ArxBox(RCON[5], ref s02, ref s03); + ArxBox(RCON[6], ref s04, ref s05); + ArxBox(RCON[7], ref s06, ref s07); + + // Linear layer + + uint t8ACE = ELL(s08 ^ s10 ^ s12 ^ s14); + uint t9BDF = ELL(s09 ^ s11 ^ s13 ^ s15); + + uint u00 = s00; + uint u01 = s01; + + s00 = s02 ^ s10 ^ t9BDF; + s01 = s03 ^ s11 ^ t8ACE; + s02 = s04 ^ s12 ^ t9BDF; + s03 = s05 ^ s13 ^ t8ACE; + s04 = s06 ^ s14 ^ t9BDF; + s05 = s07 ^ s15 ^ t8ACE; + s06 = u00 ^ s08 ^ t9BDF; + s07 = u01 ^ s09 ^ t8ACE; + } + + state[0] = s00; + state[1] = s01; + state[2] = s02; + state[3] = s03; + state[4] = s04; + state[5] = s05; + state[6] = s06; + state[7] = s07; + state[8] = s08; + state[9] = s09; + state[10] = s10; + state[11] = s11; + state[12] = s12; + state[13] = s13; + state[14] = s14; + state[15] = s15; + } + } + +#if NETCOREAPP3_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void ArxBox(Vector128<uint> rc, ref Vector128<uint> s00, ref Vector128<uint> s01) + { + s00 = Sse2.Add(s00, Sse2.ShiftRightLogical(s01, 31)); + s00 = Sse2.Add(s00, Sse2.ShiftLeftLogical(s01, 1)); + + s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 24)); + s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 8)); + + s00 = Sse2.Xor(s00, rc); + + s00 = Sse2.Add(s00, Sse2.ShiftRightLogical(s01, 17)); + s00 = Sse2.Add(s00, Sse2.ShiftLeftLogical(s01, 15)); + + s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 17)); + s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 15)); + + s00 = Sse2.Xor(s00, rc); + + s00 = Sse2.Add(s00, s01); + + s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 31)); + s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 1)); + + s00 = Sse2.Xor(s00, rc); + + s00 = Sse2.Add(s00, Sse2.ShiftRightLogical(s01, 24)); + s00 = Sse2.Add(s00, Sse2.ShiftLeftLogical(s01, 8)); + + s01 = Sse2.Xor(s01, Sse2.ShiftRightLogical(s00, 16)); + s01 = Sse2.Xor(s01, Sse2.ShiftLeftLogical(s00, 16)); + + s00 = Sse2.Xor(s00, rc); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128<uint> ELL(Vector128<uint> x) + { + var t = Sse2.ShiftLeftLogical(x, 16); + var u = Sse2.Xor(x, t); + return Sse2.Xor(t, Sse2.ShiftRightLogical(u, 16)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128<uint> HorizontalXor(Vector128<uint> x) + { + var t = Sse2.Xor(x, Sse2.Shuffle(x, 0x1B)); + return Sse2.Xor(t, Sse2.Shuffle(t, 0xB1)); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static Vector128<uint> Load128(ReadOnlySpan<uint> t) + { + if (BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<uint>>() == 16) + return MemoryMarshal.Read<Vector128<uint>>(MemoryMarshal.AsBytes(t)); + + return Vector128.Create(t[0], t[1], t[2], t[3]); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static void Store128(Vector128<uint> s, Span<uint> t) + { + var b = MemoryMarshal.AsBytes(t); + if (BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<uint>>() == 16) + { + MemoryMarshal.Write(b, ref s); + return; + } + + var u = s.AsUInt64(); + BinaryPrimitives.WriteUInt64LittleEndian(b[..8], u.GetElement(0)); + BinaryPrimitives.WriteUInt64LittleEndian(b[8..], u.GetElement(1)); + } +#endif + } +} |