diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2022-07-24 21:51:13 +0700 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2022-07-24 21:51:13 +0700 |
commit | f802d60b4077d8c190995ea88885c46faa813d1a (patch) | |
tree | fa2bef08a7c5f98cbfc1e753838a4b3c8a1753d2 | |
parent | moved picnic GetParameters to Parameters (diff) | |
download | BouncyCastle.NET-ed25519-f802d60b4077d8c190995ea88885c46faa813d1a.tar.xz |
GCM perf. opts.
-rw-r--r-- | crypto/src/crypto/modes/GCMBlockCipher.cs | 432 | ||||
-rw-r--r-- | crypto/src/crypto/modes/gcm/GcmUtilities.cs | 358 | ||||
-rw-r--r-- | crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs | 3 | ||||
-rw-r--r-- | crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs | 3 | ||||
-rw-r--r-- | crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs | 3 |
5 files changed, 423 insertions, 376 deletions
diff --git a/crypto/src/crypto/modes/GCMBlockCipher.cs b/crypto/src/crypto/modes/GCMBlockCipher.cs index 9250097cd..ac54e9762 100644 --- a/crypto/src/crypto/modes/GCMBlockCipher.cs +++ b/crypto/src/crypto/modes/GCMBlockCipher.cs @@ -1,4 +1,9 @@ using System; +#if NETCOREAPP3_0_OR_GREATER +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +#endif using Org.BouncyCastle.Crypto.Modes.Gcm; using Org.BouncyCastle.Crypto.Parameters; @@ -48,6 +53,7 @@ namespace Org.BouncyCastle.Crypto.Modes private byte[] macBlock; private byte[] S, S_at, S_atPre; private byte[] counter; + private uint counter32; private uint blocksRemaining; private int bufOff; private ulong totalLength; @@ -210,6 +216,7 @@ namespace Org.BouncyCastle.Crypto.Modes this.atLength = 0; this.atLengthPre = 0; this.counter = Arrays.Clone(J0); + this.counter32 = Pack.BE_To_UInt32(counter, 12); this.blocksRemaining = uint.MaxValue - 1; // page 8, len(P) <= 2^39 - 256, 1 block used by tag this.bufOff = 0; this.totalLength = 0; @@ -273,17 +280,35 @@ namespace Org.BouncyCastle.Crypto.Modes { CheckStatus(); - for (int i = 0; i < len; ++i) + if (atBlockPos > 0) { - atBlock[atBlockPos] = inBytes[inOff + i]; - if (++atBlockPos == BlockSize) + int available = BlockSize - atBlockPos; + if (len < available) { - // Hash each block as it fills - gHASHBlock(S_at, atBlock); - atBlockPos = 0; - atLength += BlockSize; + Array.Copy(inBytes, inOff, atBlock, atBlockPos, len); + atBlockPos += len; + return; } + + Array.Copy(inBytes, inOff, atBlock, atBlockPos, available); + gHASHBlock(S_at, atBlock); + atLength += BlockSize; + inOff += available; + len -= available; + //atBlockPos = 0; + } + + int inLimit = inOff + len - BlockSize; + + while (inOff <= inLimit) + { + gHASHBlock(S_at, inBytes, inOff); + atLength += BlockSize; + inOff += BlockSize; } + + atBlockPos = BlockSize + inLimit - inOff; + Array.Copy(inBytes, inOff, atBlock, 0, atBlockPos); } private void InitCipher() @@ -307,23 +332,21 @@ namespace Org.BouncyCastle.Crypto.Modes } } - public virtual int ProcessByte( - byte input, - byte[] output, - int outOff) + public virtual int ProcessByte(byte input, byte[] output, int outOff) { CheckStatus(); bufBlock[bufOff] = input; if (++bufOff == bufBlock.Length) { - ProcessBlock(bufBlock, 0, output, outOff); if (forEncryption) { + EncryptBlock(bufBlock, 0, output, outOff); bufOff = 0; } else { + DecryptBlock(bufBlock, 0, output, outOff); Array.Copy(bufBlock, BlockSize, bufBlock, 0, macSize); bufOff = macSize; } @@ -332,12 +355,7 @@ namespace Org.BouncyCastle.Crypto.Modes return 0; } - public virtual int ProcessBytes( - byte[] input, - int inOff, - int len, - byte[] output, - int outOff) + public virtual int ProcessBytes(byte[] input, int inOff, int len, byte[] output, int outOff) { CheckStatus(); @@ -347,49 +365,95 @@ namespace Org.BouncyCastle.Crypto.Modes if (forEncryption) { - if (bufOff != 0) + if (bufOff > 0) { - while (len > 0) + int available = BlockSize - bufOff; + if (len < available) { - --len; - bufBlock[bufOff] = input[inOff++]; - if (++bufOff == BlockSize) - { - ProcessBlock(bufBlock, 0, output, outOff); - bufOff = 0; - resultLen += BlockSize; - break; - } + Array.Copy(input, inOff, bufBlock, bufOff, len); + bufOff += len; + return 0; } + + Array.Copy(input, inOff, bufBlock, bufOff, available); + EncryptBlock(bufBlock, 0, output, outOff); + inOff += available; + len -= available; + resultLen = BlockSize; + //bufOff = 0; } - while (len >= BlockSize) + int inLimit1 = inOff + len - BlockSize; + int inLimit2 = inLimit1 - BlockSize; + + while (inOff <= inLimit2) { - ProcessBlock(input, inOff, output, outOff + resultLen); - inOff += BlockSize; - len -= BlockSize; - resultLen += BlockSize; + EncryptBlocks2(input, inOff, output, outOff + resultLen); + inOff += BlockSize * 2; + resultLen += BlockSize * 2; } - if (len > 0) + if (inOff <= inLimit1) { - Array.Copy(input, inOff, bufBlock, 0, len); - bufOff = len; + EncryptBlock(input, inOff, output, outOff + resultLen); + inOff += BlockSize; + resultLen += BlockSize; } + + bufOff = BlockSize + inLimit1 - inOff; + Array.Copy(input, inOff, bufBlock, 0, bufOff); } else { - for (int i = 0; i < len; ++i) + int available = bufBlock.Length - bufOff; + if (len < available) + { + Array.Copy(input, inOff, bufBlock, bufOff, len); + bufOff += len; + return 0; + } + + if (bufOff >= BlockSize) { - bufBlock[bufOff] = input[inOff + i]; - if (++bufOff == bufBlock.Length) + DecryptBlock(bufBlock, 0, output, outOff); + Array.Copy(bufBlock, BlockSize, bufBlock, 0, bufOff -= BlockSize); + resultLen = BlockSize; + + available += BlockSize; + if (len < available) { - ProcessBlock(bufBlock, 0, output, outOff + resultLen); - Array.Copy(bufBlock, BlockSize, bufBlock, 0, macSize); - bufOff = macSize; - resultLen += BlockSize; + Array.Copy(input, inOff, bufBlock, bufOff, len); + bufOff += len; + return resultLen; } } + + int inLimit1 = inOff + len - bufBlock.Length; + int inLimit2 = inLimit1 - BlockSize; + + available = BlockSize - bufOff; + Array.Copy(input, inOff, bufBlock, bufOff, available); + DecryptBlock(bufBlock, 0, output, outOff + resultLen); + inOff += available; + resultLen += BlockSize; + //bufOff = 0; + + while (inOff <= inLimit2) + { + DecryptBlocks2(input, inOff, output, outOff + resultLen); + inOff += BlockSize * 2; + resultLen += BlockSize * 2; + } + + if (inOff <= inLimit1) + { + DecryptBlock(input, inOff, output, outOff + resultLen); + inOff += BlockSize; + resultLen += BlockSize; + } + + bufOff = bufBlock.Length + inLimit1 - inOff; + Array.Copy(input, inOff, bufBlock, 0, bufOff); } return resultLen; @@ -525,6 +589,7 @@ namespace Org.BouncyCastle.Crypto.Modes atLength = 0; atLengthPre = 0; counter = Arrays.Clone(J0); + counter32 = Pack.BE_To_UInt32(counter, 12); blocksRemaining = uint.MaxValue - 1; bufOff = 0; totalLength = 0; @@ -552,9 +617,9 @@ namespace Org.BouncyCastle.Crypto.Modes } } - private void ProcessBlock(byte[] buf, int bufOff, byte[] output, int outOff) + private void DecryptBlock(byte[] inBuf, int inOff, byte[] outBuf, int outOff) { - Check.OutputLength(output, outOff, BlockSize, "Output buffer too short"); + Check.OutputLength(outBuf, outOff, BlockSize, "Output buffer too short"); if (totalLength == 0) { @@ -562,23 +627,280 @@ namespace Org.BouncyCastle.Crypto.Modes } byte[] ctrBlock = new byte[BlockSize]; + GetNextCtrBlock(ctrBlock); +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == BlockSize) + { + var t0 = Unsafe.ReadUnaligned<Vector128<byte>>(ref inBuf[inOff]); + var t1 = Unsafe.ReadUnaligned<Vector128<byte>>(ref ctrBlock[0]); + var t2 = Unsafe.ReadUnaligned<Vector128<byte>>(ref S[0]); - if (forEncryption) + t1 = Sse2.Xor(t1, t0); + t2 = Sse2.Xor(t2, t0); + + Unsafe.WriteUnaligned(ref outBuf[outOff], t1); + Unsafe.WriteUnaligned(ref S[0], t2); + } + else +#endif + { + for (int i = 0; i < BlockSize; i += 4) + { + byte c0 = inBuf[inOff + i + 0]; + byte c1 = inBuf[inOff + i + 1]; + byte c2 = inBuf[inOff + i + 2]; + byte c3 = inBuf[inOff + i + 3]; + + S[i + 0] ^= c0; + S[i + 1] ^= c1; + S[i + 2] ^= c2; + S[i + 3] ^= c3; + + outBuf[outOff + i + 0] = (byte)(c0 ^ ctrBlock[i + 0]); + outBuf[outOff + i + 1] = (byte)(c1 ^ ctrBlock[i + 1]); + outBuf[outOff + i + 2] = (byte)(c2 ^ ctrBlock[i + 2]); + outBuf[outOff + i + 3] = (byte)(c3 ^ ctrBlock[i + 3]); + } + } + multiplier.MultiplyH(S); + + totalLength += BlockSize; + } + + private void DecryptBlocks2(byte[] inBuf, int inOff, byte[] outBuf, int outOff) + { + Check.OutputLength(outBuf, outOff, BlockSize * 2, "Output buffer too short"); + + if (totalLength == 0) { - GcmUtilities.Xor(ctrBlock, buf, bufOff); - gHASHBlock(S, ctrBlock); - Array.Copy(ctrBlock, 0, output, outOff, BlockSize); + InitCipher(); + } + + byte[] ctrBlock = new byte[BlockSize]; + + GetNextCtrBlock(ctrBlock); +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == BlockSize) + { + var t0 = Unsafe.ReadUnaligned<Vector128<byte>>(ref inBuf[inOff]); + var t1 = Unsafe.ReadUnaligned<Vector128<byte>>(ref ctrBlock[0]); + var t2 = Unsafe.ReadUnaligned<Vector128<byte>>(ref S[0]); + + t1 = Sse2.Xor(t1, t0); + t2 = Sse2.Xor(t2, t0); + + Unsafe.WriteUnaligned(ref outBuf[outOff], t1); + Unsafe.WriteUnaligned(ref S[0], t2); } else +#endif { - gHASHBlock(S, buf, bufOff); - GcmUtilities.Xor(ctrBlock, 0, buf, bufOff, output, outOff); + for (int i = 0; i < BlockSize; i += 4) + { + byte c0 = inBuf[inOff + i + 0]; + byte c1 = inBuf[inOff + i + 1]; + byte c2 = inBuf[inOff + i + 2]; + byte c3 = inBuf[inOff + i + 3]; + + S[i + 0] ^= c0; + S[i + 1] ^= c1; + S[i + 2] ^= c2; + S[i + 3] ^= c3; + + outBuf[outOff + i + 0] = (byte)(c0 ^ ctrBlock[i + 0]); + outBuf[outOff + i + 1] = (byte)(c1 ^ ctrBlock[i + 1]); + outBuf[outOff + i + 2] = (byte)(c2 ^ ctrBlock[i + 2]); + outBuf[outOff + i + 3] = (byte)(c3 ^ ctrBlock[i + 3]); + } } + multiplier.MultiplyH(S); + + inOff += BlockSize; + outOff += BlockSize; + + GetNextCtrBlock(ctrBlock); +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == BlockSize) + { + var t0 = Unsafe.ReadUnaligned<Vector128<byte>>(ref inBuf[inOff]); + var t1 = Unsafe.ReadUnaligned<Vector128<byte>>(ref ctrBlock[0]); + var t2 = Unsafe.ReadUnaligned<Vector128<byte>>(ref S[0]); + + t1 = Sse2.Xor(t1, t0); + t2 = Sse2.Xor(t2, t0); + + Unsafe.WriteUnaligned(ref outBuf[outOff], t1); + Unsafe.WriteUnaligned(ref S[0], t2); + } + else +#endif + { + for (int i = 0; i < BlockSize; i += 4) + { + byte c0 = inBuf[inOff + i + 0]; + byte c1 = inBuf[inOff + i + 1]; + byte c2 = inBuf[inOff + i + 2]; + byte c3 = inBuf[inOff + i + 3]; + + S[i + 0] ^= c0; + S[i + 1] ^= c1; + S[i + 2] ^= c2; + S[i + 3] ^= c3; + + outBuf[outOff + i + 0] = (byte)(c0 ^ ctrBlock[i + 0]); + outBuf[outOff + i + 1] = (byte)(c1 ^ ctrBlock[i + 1]); + outBuf[outOff + i + 2] = (byte)(c2 ^ ctrBlock[i + 2]); + outBuf[outOff + i + 3] = (byte)(c3 ^ ctrBlock[i + 3]); + } + } + multiplier.MultiplyH(S); + + totalLength += BlockSize * 2; + } + + private void EncryptBlock(byte[] inBuf, int inOff, byte[] outBuf, int outOff) + { + Check.OutputLength(outBuf, outOff, BlockSize, "Output buffer too short"); + + if (totalLength == 0) + { + InitCipher(); + } + + byte[] ctrBlock = new byte[BlockSize]; + + GetNextCtrBlock(ctrBlock); +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == BlockSize) + { + var t0 = Unsafe.ReadUnaligned<Vector128<byte>>(ref inBuf[inOff]); + var t1 = Unsafe.ReadUnaligned<Vector128<byte>>(ref ctrBlock[0]); + var t2 = Unsafe.ReadUnaligned<Vector128<byte>>(ref S[0]); + + t1 = Sse2.Xor(t1, t0); + t2 = Sse2.Xor(t2, t1); + + Unsafe.WriteUnaligned(ref outBuf[outOff], t1); + Unsafe.WriteUnaligned(ref S[0], t2); + } + else +#endif + { + for (int i = 0; i < BlockSize; i += 4) + { + byte c0 = (byte)(ctrBlock[i + 0] ^ inBuf[inOff + i + 0]); + byte c1 = (byte)(ctrBlock[i + 1] ^ inBuf[inOff + i + 1]); + byte c2 = (byte)(ctrBlock[i + 2] ^ inBuf[inOff + i + 2]); + byte c3 = (byte)(ctrBlock[i + 3] ^ inBuf[inOff + i + 3]); + + S[i + 0] ^= c0; + S[i + 1] ^= c1; + S[i + 2] ^= c2; + S[i + 3] ^= c3; + + outBuf[outOff + i + 0] = c0; + outBuf[outOff + i + 1] = c1; + outBuf[outOff + i + 2] = c2; + outBuf[outOff + i + 3] = c3; + } + } + multiplier.MultiplyH(S); totalLength += BlockSize; } + private void EncryptBlocks2(byte[] inBuf, int inOff, byte[] outBuf, int outOff) + { + Check.OutputLength(outBuf, outOff, BlockSize * 2, "Output buffer too short"); + + if (totalLength == 0) + { + InitCipher(); + } + + byte[] ctrBlock = new byte[BlockSize]; + + GetNextCtrBlock(ctrBlock); +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == BlockSize) + { + var t0 = Unsafe.ReadUnaligned<Vector128<byte>>(ref inBuf[inOff]); + var t1 = Unsafe.ReadUnaligned<Vector128<byte>>(ref ctrBlock[0]); + var t2 = Unsafe.ReadUnaligned<Vector128<byte>>(ref S[0]); + + t1 = Sse2.Xor(t1, t0); + t2 = Sse2.Xor(t2, t1); + + Unsafe.WriteUnaligned(ref outBuf[outOff], t1); + Unsafe.WriteUnaligned(ref S[0], t2); + } + else +#endif + { + for (int i = 0; i < BlockSize; i += 4) + { + byte c0 = (byte)(ctrBlock[i + 0] ^ inBuf[inOff + i + 0]); + byte c1 = (byte)(ctrBlock[i + 1] ^ inBuf[inOff + i + 1]); + byte c2 = (byte)(ctrBlock[i + 2] ^ inBuf[inOff + i + 2]); + byte c3 = (byte)(ctrBlock[i + 3] ^ inBuf[inOff + i + 3]); + + S[i + 0] ^= c0; + S[i + 1] ^= c1; + S[i + 2] ^= c2; + S[i + 3] ^= c3; + + outBuf[outOff + i + 0] = c0; + outBuf[outOff + i + 1] = c1; + outBuf[outOff + i + 2] = c2; + outBuf[outOff + i + 3] = c3; + } + } + multiplier.MultiplyH(S); + + inOff += BlockSize; + outOff += BlockSize; + + GetNextCtrBlock(ctrBlock); +#if NETCOREAPP3_0_OR_GREATER + if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == BlockSize) + { + var t0 = Unsafe.ReadUnaligned<Vector128<byte>>(ref inBuf[inOff]); + var t1 = Unsafe.ReadUnaligned<Vector128<byte>>(ref ctrBlock[0]); + var t2 = Unsafe.ReadUnaligned<Vector128<byte>>(ref S[0]); + + t1 = Sse2.Xor(t1, t0); + t2 = Sse2.Xor(t2, t1); + + Unsafe.WriteUnaligned(ref outBuf[outOff], t1); + Unsafe.WriteUnaligned(ref S[0], t2); + } + else +#endif + { + for (int i = 0; i < BlockSize; i += 4) + { + byte c0 = (byte)(ctrBlock[i + 0] ^ inBuf[inOff + i + 0]); + byte c1 = (byte)(ctrBlock[i + 1] ^ inBuf[inOff + i + 1]); + byte c2 = (byte)(ctrBlock[i + 2] ^ inBuf[inOff + i + 2]); + byte c3 = (byte)(ctrBlock[i + 3] ^ inBuf[inOff + i + 3]); + + S[i + 0] ^= c0; + S[i + 1] ^= c1; + S[i + 2] ^= c2; + S[i + 3] ^= c3; + + outBuf[outOff + i + 0] = c0; + outBuf[outOff + i + 1] = c1; + outBuf[outOff + i + 2] = c2; + outBuf[outOff + i + 3] = c3; + } + } + multiplier.MultiplyH(S); + + totalLength += BlockSize * 2; + } + private void ProcessPartial(byte[] buf, int off, int len, byte[] output, int outOff) { byte[] ctrBlock = new byte[BlockSize]; @@ -633,11 +955,7 @@ namespace Org.BouncyCastle.Crypto.Modes blocksRemaining--; - uint c = 1; - c += counter[15]; counter[15] = (byte)c; c >>= 8; - c += counter[14]; counter[14] = (byte)c; c >>= 8; - c += counter[13]; counter[13] = (byte)c; c >>= 8; - c += counter[12]; counter[12] = (byte)c; + Pack.UInt32_To_BE(++counter32, counter, 12); cipher.ProcessBlock(counter, 0, block, 0); } diff --git a/crypto/src/crypto/modes/gcm/GcmUtilities.cs b/crypto/src/crypto/modes/gcm/GcmUtilities.cs index cf21ace23..4528e172a 100644 --- a/crypto/src/crypto/modes/gcm/GcmUtilities.cs +++ b/crypto/src/crypto/modes/gcm/GcmUtilities.cs @@ -1,5 +1,8 @@ using System; using System.Diagnostics; +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER +using System.Runtime.CompilerServices; +#endif #if NETCOREAPP3_0_OR_GREATER using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.X86; @@ -13,6 +16,12 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm { internal abstract class GcmUtilities { +#if NETCOREAPP3_0_OR_GREATER + private static readonly Vector128<byte> EndianMask = Vector128.Create( + (byte)0x07, (byte)0x06, (byte)0x05, (byte)0x04, (byte)0x03, (byte)0x02, (byte)0x01, (byte)0x00, + (byte)0x0F, (byte)0x0E, (byte)0x0D, (byte)0x0C, (byte)0x0B, (byte)0x0A, (byte)0x09, (byte)0x08); +#endif + internal struct FieldElement { internal ulong n0, n1; @@ -27,128 +36,53 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm x.n1 = 0UL; } - internal static byte[] OneAsBytes() - { - byte[] tmp = new byte[16]; - tmp[0] = 0x80; - return tmp; - } - - internal static uint[] OneAsUints() - { - uint[] tmp = new uint[4]; - tmp[0] = 0x80000000; - return tmp; - } - - internal static ulong[] OneAsUlongs() - { - ulong[] tmp = new ulong[2]; - tmp[0] = 1UL << 63; - return tmp; - } - - internal static byte[] AsBytes(uint[] x) - { - return Pack.UInt32_To_BE(x); - } - - internal static void AsBytes(uint[] x, byte[] z) - { - Pack.UInt32_To_BE(x, z, 0); - } - - internal static byte[] AsBytes(ulong[] x) +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif + internal static void AsBytes(ulong x0, ulong x1, byte[] z) { - byte[] z = new byte[16]; - Pack.UInt64_To_BE(x, z, 0); - return z; - } +#if NETCOREAPP3_0_OR_GREATER + if (Ssse3.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = Vector128.Create(x0, x1).AsByte(); + var Z = Ssse3.Shuffle(X, EndianMask); + Unsafe.WriteUnaligned(ref z[0], Z); + return; + } +#endif - internal static void AsBytes(ulong[] x, byte[] z) - { - Pack.UInt64_To_BE(x, z, 0); + Pack.UInt64_To_BE(x0, z, 0); + Pack.UInt64_To_BE(x1, z, 8); } +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif internal static void AsBytes(ref FieldElement x, byte[] z) { - Pack.UInt64_To_BE(x.n0, z, 0); - Pack.UInt64_To_BE(x.n1, z, 8); + AsBytes(x.n0, x.n1, z); } +#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER + [MethodImpl(MethodImplOptions.AggressiveInlining)] +#endif internal static void AsFieldElement(byte[] x, out FieldElement z) { +#if NETCOREAPP3_0_OR_GREATER + if (Ssse3.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<byte>>() == 16) + { + var X = Unsafe.ReadUnaligned<Vector128<byte>>(ref x[0]); + var Z = Ssse3.Shuffle(X, EndianMask).AsUInt64(); + z.n0 = Z.GetElement(0); + z.n1 = Z.GetElement(1); + return; + } +#endif + z.n0 = Pack.BE_To_UInt64(x, 0); z.n1 = Pack.BE_To_UInt64(x, 8); } - internal static uint[] AsUints(byte[] bs) - { - uint[] output = new uint[4]; - Pack.BE_To_UInt32(bs, 0, output); - return output; - } - - internal static void AsUints(byte[] bs, uint[] output) - { - Pack.BE_To_UInt32(bs, 0, output); - } - - internal static ulong[] AsUlongs(byte[] x) - { - ulong[] z = new ulong[2]; - Pack.BE_To_UInt64(x, 0, z); - return z; - } - - internal static void AsUlongs(byte[] x, ulong[] z) - { - Pack.BE_To_UInt64(x, 0, z); - } - - internal static void AsUlongs(byte[] x, ulong[] z, int zOff) - { - Pack.BE_To_UInt64(x, 0, z, zOff, 2); - } - - internal static void Copy(uint[] x, uint[] z) - { - z[0] = x[0]; - z[1] = x[1]; - z[2] = x[2]; - z[3] = x[3]; - } - - internal static void Copy(ulong[] x, ulong[] z) - { - z[0] = x[0]; - z[1] = x[1]; - } - - internal static void Copy(ulong[] x, int xOff, ulong[] z, int zOff) - { - z[zOff + 0] = x[xOff + 0]; - z[zOff + 1] = x[xOff + 1]; - } - - internal static void DivideP(ulong[] x, ulong[] z) - { - ulong x0 = x[0], x1 = x[1]; - ulong m = (ulong)((long)x0 >> 63); - x0 ^= (m & E1UL); - z[0] = (x0 << 1) | (x1 >> 63); - z[1] = (x1 << 1) | (ulong)(-(long)m); - } - - internal static void DivideP(ulong[] x, int xOff, ulong[] z, int zOff) - { - ulong x0 = x[xOff + 0], x1 = x[xOff + 1]; - ulong m = (ulong)((long)x0 >> 63); - x0 ^= (m & E1UL); - z[zOff + 0] = (x0 << 1) | (x1 >> 63); - z[zOff + 1] = (x1 << 1) | (ulong)(-(long)m); - } - internal static void DivideP(ref FieldElement x, out FieldElement z) { ulong x0 = x.n0, x1 = x.n1; @@ -233,90 +167,6 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm x.n1 = z1; } - internal static void MultiplyP(uint[] x) - { - uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; - uint m = (uint)((int)(x3 << 31) >> 31); - x[0] = (x0 >> 1) ^ (m & E1); - x[1] = (x1 >> 1) | (x0 << 31); - x[2] = (x2 >> 1) | (x1 << 31); - x[3] = (x3 >> 1) | (x2 << 31); - } - - internal static void MultiplyP(uint[] x, uint[] z) - { - uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; - uint m = (uint)((int)(x3 << 31) >> 31); - z[0] = (x0 >> 1) ^ (m & E1); - z[1] = (x1 >> 1) | (x0 << 31); - z[2] = (x2 >> 1) | (x1 << 31); - z[3] = (x3 >> 1) | (x2 << 31); - } - - internal static void MultiplyP(ulong[] x) - { - ulong x0 = x[0], x1 = x[1]; - ulong m = (ulong)((long)(x1 << 63) >> 63); - x[0] = (x0 >> 1) ^ (m & E1UL); - x[1] = (x1 >> 1) | (x0 << 63); - } - - internal static void MultiplyP(ulong[] x, ulong[] z) - { - ulong x0 = x[0], x1 = x[1]; - ulong m = (ulong)((long)(x1 << 63) >> 63); - z[0] = (x0 >> 1) ^ (m & E1UL); - z[1] = (x1 >> 1) | (x0 << 63); - } - - internal static void MultiplyP3(ulong[] x, ulong[] z) - { - ulong x0 = x[0], x1 = x[1]; - ulong c = x1 << 61; - z[0] = (x0 >> 3) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - z[1] = (x1 >> 3) | (x0 << 61); - } - - internal static void MultiplyP3(ulong[] x, int xOff, ulong[] z, int zOff) - { - ulong x0 = x[xOff + 0], x1 = x[xOff + 1]; - ulong c = x1 << 61; - z[zOff + 0] = (x0 >> 3) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - z[zOff + 1] = (x1 >> 3) | (x0 << 61); - } - - internal static void MultiplyP4(ulong[] x, ulong[] z) - { - ulong x0 = x[0], x1 = x[1]; - ulong c = x1 << 60; - z[0] = (x0 >> 4) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - z[1] = (x1 >> 4) | (x0 << 60); - } - - internal static void MultiplyP4(ulong[] x, int xOff, ulong[] z, int zOff) - { - ulong x0 = x[xOff + 0], x1 = x[xOff + 1]; - ulong c = x1 << 60; - z[zOff + 0] = (x0 >> 4) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - z[zOff + 1] = (x1 >> 4) | (x0 << 60); - } - - internal static void MultiplyP7(ulong[] x, ulong[] z) - { - ulong x0 = x[0], x1 = x[1]; - ulong c = x1 << 57; - z[0] = (x0 >> 7) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - z[1] = (x1 >> 7) | (x0 << 57); - } - - internal static void MultiplyP7(ulong[] x, int xOff, ulong[] z, int zOff) - { - ulong x0 = x[xOff + 0], x1 = x[xOff + 1]; - ulong c = x1 << 57; - z[zOff + 0] = (x0 >> 7) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - z[zOff + 1] = (x1 >> 7) | (x0 << 57); - } - internal static void MultiplyP7(ref FieldElement x) { ulong x0 = x.n0, x1 = x.n1; @@ -325,50 +175,6 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm x.n1 = (x1 >> 7) | (x0 << 57); } - internal static void MultiplyP8(uint[] x) - { - uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; - uint c = x3 << 24; - x[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - x[1] = (x1 >> 8) | (x0 << 24); - x[2] = (x2 >> 8) | (x1 << 24); - x[3] = (x3 >> 8) | (x2 << 24); - } - - internal static void MultiplyP8(uint[] x, uint[] y) - { - uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3]; - uint c = x3 << 24; - y[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - y[1] = (x1 >> 8) | (x0 << 24); - y[2] = (x2 >> 8) | (x1 << 24); - y[3] = (x3 >> 8) | (x2 << 24); - } - - internal static void MultiplyP8(ulong[] x) - { - ulong x0 = x[0], x1 = x[1]; - ulong c = x1 << 56; - x[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - x[1] = (x1 >> 8) | (x0 << 56); - } - - internal static void MultiplyP8(ulong[] x, ulong[] y) - { - ulong x0 = x[0], x1 = x[1]; - ulong c = x1 << 56; - y[0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - y[1] = (x1 >> 8) | (x0 << 56); - } - - internal static void MultiplyP8(ulong[] x, int xOff, ulong[] y, int yOff) - { - ulong x0 = x[xOff + 0], x1 = x[xOff + 1]; - ulong c = x1 << 56; - y[yOff + 0] = (x0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - y[yOff + 1] = (x1 >> 8) | (x0 << 56); - } - internal static void MultiplyP8(ref FieldElement x) { ulong x0 = x.n0, x1 = x.n1; @@ -385,14 +191,6 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm y.n1 = (x1 >> 8) | (x0 << 56); } - internal static void MultiplyP16(ulong[] x) - { - ulong x0 = x[0], x1 = x[1]; - ulong c = x1 << 48; - x[0] = (x0 >> 16) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); - x[1] = (x1 >> 16) | (x0 << 48); - } - internal static void MultiplyP16(ref FieldElement x) { ulong x0 = x.n0, x1 = x.n1; @@ -448,19 +246,6 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm while (i < 16); } - internal static void Xor(byte[] x, int xOff, byte[] y, int yOff, byte[] z, int zOff) - { - int i = 0; - do - { - z[zOff + i] = (byte)(x[xOff + i] ^ y[yOff + i]); ++i; - z[zOff + i] = (byte)(x[xOff + i] ^ y[yOff + i]); ++i; - z[zOff + i] = (byte)(x[xOff + i] ^ y[yOff + i]); ++i; - z[zOff + i] = (byte)(x[xOff + i] ^ y[yOff + i]); ++i; - } - while (i < 16); - } - internal static void Xor(byte[] x, byte[] y, int yOff, int yLen) { while (--yLen >= 0) @@ -477,57 +262,10 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm } } - internal static void Xor(byte[] x, byte[] y, byte[] z) - { - int i = 0; - do - { - z[i] = (byte)(x[i] ^ y[i]); ++i; - z[i] = (byte)(x[i] ^ y[i]); ++i; - z[i] = (byte)(x[i] ^ y[i]); ++i; - z[i] = (byte)(x[i] ^ y[i]); ++i; - } - while (i < 16); - } - - internal static void Xor(uint[] x, uint[] y) - { - x[0] ^= y[0]; - x[1] ^= y[1]; - x[2] ^= y[2]; - x[3] ^= y[3]; - } - - internal static void Xor(uint[] x, uint[] y, uint[] z) - { - z[0] = x[0] ^ y[0]; - z[1] = x[1] ^ y[1]; - z[2] = x[2] ^ y[2]; - z[3] = x[3] ^ y[3]; - } - - internal static void Xor(ulong[] x, ulong[] y) - { - x[0] ^= y[0]; - x[1] ^= y[1]; - } - - internal static void Xor(ulong[] x, int xOff, ulong[] y, int yOff) - { - x[xOff + 0] ^= y[yOff + 0]; - x[xOff + 1] ^= y[yOff + 1]; - } - - internal static void Xor(ulong[] x, ulong[] y, ulong[] z) - { - z[0] = x[0] ^ y[0]; - z[1] = x[1] ^ y[1]; - } - - internal static void Xor(ulong[] x, int xOff, ulong[] y, int yOff, ulong[] z, int zOff) + internal static void Xor(ref FieldElement x, ref FieldElement y) { - z[zOff + 0] = x[xOff + 0] ^ y[yOff + 0]; - z[zOff + 1] = x[xOff + 1] ^ y[yOff + 1]; + x.n0 ^= y.n0; + x.n1 ^= y.n1; } internal static void Xor(ref FieldElement x, ref FieldElement y, out FieldElement z) @@ -536,12 +274,6 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z.n1 = x.n1 ^ y.n1; } - internal static void Xor(ref FieldElement x, ref FieldElement y) - { - x.n0 ^= y.n0; - x.n1 ^= y.n1; - } - private static ulong ImplMul64(ulong x, ulong y) { ulong x0 = x & 0x1111111111111111UL; diff --git a/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs index 7867a0b99..117558b45 100644 --- a/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs +++ b/crypto/src/crypto/modes/gcm/Tables4kGcmMultiplier.cs @@ -62,8 +62,7 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z0 = T[pos].n0 ^ (z0 >> 8) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); } - Pack.UInt64_To_BE(z0, x, 0); - Pack.UInt64_To_BE(z1, x, 8); + GcmUtilities.AsBytes(z0, z1, x); } } } diff --git a/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs index 364c070e7..52a9d4e82 100644 --- a/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs +++ b/crypto/src/crypto/modes/gcm/Tables64kGcmMultiplier.cs @@ -74,8 +74,7 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z1 ^= t[tPos].n1; } - Pack.UInt64_To_BE(z0, x, 0); - Pack.UInt64_To_BE(z1, x, 8); + GcmUtilities.AsBytes(z0, z1, x); } } } diff --git a/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs b/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs index 67a709a75..7fe122526 100644 --- a/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs +++ b/crypto/src/crypto/modes/gcm/Tables8kGcmMultiplier.cs @@ -82,8 +82,7 @@ namespace Org.BouncyCastle.Crypto.Modes.Gcm z0 = T0[uPos].n0 ^ T1[vPos].n0 ^ (z0 >> 16) ^ c ^ (c >> 1) ^ (c >> 2) ^ (c >> 7); } - Pack.UInt64_To_BE(z0, x, 0); - Pack.UInt64_To_BE(z1, x, 8); + GcmUtilities.AsBytes(z0, z1, x); } } } |