summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-10-11 20:10:12 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-10-11 20:10:12 +0700
commitfe3881ecbf35eb8d09b1a367be09eb359ff8b050 (patch)
tree9d7adfe060d13361c1e46e34dcd4a23c7124f3ba
parentSupport custom round constants (diff)
downloadBouncyCastle.NET-ed25519-fe3881ecbf35eb8d09b1a367be09eb359ff8b050.tar.xz
Hardware-accelerate Haraka engine for SPHINCS+
-rw-r--r--crypto/src/crypto/util/Pack.cs30
-rw-r--r--crypto/src/pqc/crypto/sphincsplus/HarakaS_X86.cs203
-rw-r--r--crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusEngine.cs129
-rw-r--r--crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusKeyPairGenerator.cs6
-rw-r--r--crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusParameters.cs5
5 files changed, 372 insertions, 1 deletions
diff --git a/crypto/src/crypto/util/Pack.cs b/crypto/src/crypto/util/Pack.cs
index e281f1818..7b9ce496f 100644
--- a/crypto/src/crypto/util/Pack.cs
+++ b/crypto/src/crypto/util/Pack.cs
@@ -503,6 +503,21 @@ namespace Org.BouncyCastle.Crypto.Utilities
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static uint BE_To_UInt32_Partial(ReadOnlySpan<byte> bs)
+        {
+            int len = bs.Length;
+            Debug.Assert(1 <= len && len <= 4);
+
+            uint result = bs[0];
+            for (int i = 1; i < len; ++i)
+            {
+                result <<= 8;
+                result |= bs[i];
+            }
+            return result;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static ulong BE_To_UInt64(ReadOnlySpan<byte> bs)
         {
             uint hi = BE_To_UInt32(bs);
@@ -521,6 +536,21 @@ namespace Org.BouncyCastle.Crypto.Utilities
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static ulong BE_To_UInt64_Partial(ReadOnlySpan<byte> bs)
+        {
+            int len = bs.Length;
+            Debug.Assert(1 <= len && len <= 8);
+
+            ulong result = bs[0];
+            for (int i = 1; i < len; ++i)
+            {
+                result <<= 8;
+                result |= bs[i];
+            }
+            return result;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static uint LE_To_UInt32(ReadOnlySpan<byte> bs)
         {
             return      bs[0]
diff --git a/crypto/src/pqc/crypto/sphincsplus/HarakaS_X86.cs b/crypto/src/pqc/crypto/sphincsplus/HarakaS_X86.cs
new file mode 100644
index 000000000..a625cb32d
--- /dev/null
+++ b/crypto/src/pqc/crypto/sphincsplus/HarakaS_X86.cs
@@ -0,0 +1,203 @@
+#if NETCOREAPP3_0_OR_GREATER
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+
+using Org.BouncyCastle.Crypto;
+using Org.BouncyCastle.Crypto.Digests;
+using Org.BouncyCastle.Utilities;
+
+namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
+{
+    internal class HarakaS_X86
+        : IXof
+    {
+        public static bool IsSupported => Haraka512_X86.IsSupported;
+
+        private enum State { Absorbing, Squeezing };
+
+        private readonly Vector128<byte>[] m_roundConstants = new Vector128<byte>[40];
+
+        private readonly byte[] m_buf = new byte[64];
+        private int m_bufPos = 0;
+        private State m_state = State.Absorbing;
+
+        internal HarakaS_X86(ReadOnlySpan<byte> pkSeed)
+        {
+            if (!IsSupported)
+                throw new PlatformNotSupportedException(nameof(HarakaS_X86));
+
+            // Absorb PKSeed
+            Span<byte> buf = stackalloc byte[64];
+            while (pkSeed.Length >= 32)
+            {
+                XorWith(pkSeed[..32], buf);
+                Haraka512_X86.Permute(buf, buf);
+                pkSeed = pkSeed[32..];
+            }
+            XorWith(pkSeed, buf);
+            buf[pkSeed.Length] ^= 0x1F;
+            buf[           31] ^= 0x80;
+
+            // Squeeze round constants
+            int rc = 0;
+            while (rc < 40)
+            {
+                Haraka512_X86.Permute(buf, buf);
+                m_roundConstants[rc++] = Load128(buf[  ..16]);
+                m_roundConstants[rc++] = Load128(buf[16..32]);
+            }
+        }
+
+        internal ReadOnlySpan<Vector128<byte>> RoundConstants => m_roundConstants;
+
+        public string AlgorithmName => "HarakaS";
+
+        public int GetDigestSize() => 32;
+
+        public int GetByteLength() => 32;
+
+        public void Update(byte input)
+        {
+            if (m_state != State.Absorbing)
+                throw new InvalidOperationException();
+
+            m_buf[m_bufPos++] ^= input;
+            if (m_bufPos == 32)
+            {
+                Haraka512_X86.Permute(m_buf, m_buf, m_roundConstants);
+                m_bufPos = 0;
+            }
+        }
+
+        public void BlockUpdate(byte[] input, int inOff, int inLen)
+        {
+            BlockUpdate(input.AsSpan(inOff, inLen));
+        }
+
+        public void BlockUpdate(ReadOnlySpan<byte> input)
+        {
+            if (m_state != State.Absorbing)
+                throw new InvalidOperationException();
+
+            int available = 32 - m_bufPos;
+            if (input.Length < available)
+            {
+                XorWith(input, m_buf.AsSpan(m_bufPos));
+                m_bufPos += input.Length;
+                return;
+            }
+
+            XorWith(input[..available], m_buf.AsSpan(m_bufPos));
+            input = input[available..];
+            Haraka512_X86.Permute(m_buf, m_buf, m_roundConstants);
+
+            while (input.Length >= 32)
+            {
+                XorWith(input[..32], m_buf);
+                input = input[32..];
+                Haraka512_X86.Permute(m_buf, m_buf, m_roundConstants);
+            }
+
+            XorWith(input, m_buf);
+            m_bufPos = input.Length;
+        }
+
+        public int DoFinal(byte[] output, int outOff)
+        {
+            return OutputFinal(output.AsSpan(outOff, 32));
+        }
+
+        public int DoFinal(Span<byte> output)
+        {
+            return OutputFinal(output[..32]);
+        }
+
+        public int DoOutput(byte[] output, int outOff, int outLen)
+        {
+            return Output(output.AsSpan(outOff, outLen));
+        }
+
+        public int Output(Span<byte> output)
+        {
+            if (m_state != State.Squeezing)
+            {
+                m_buf[m_bufPos] ^= 0x1F;
+                m_buf[31] ^= 0x80;
+                m_bufPos = 32;
+                m_state = State.Squeezing;
+            }
+
+            int result = output.Length;
+
+            int available = 32 - m_bufPos;
+            if (output.Length <= available)
+            {
+                output.CopyFrom(m_buf.AsSpan(m_bufPos));
+                m_bufPos += available;
+                return result;
+            }
+
+            output[..available].CopyFrom(m_buf.AsSpan(m_bufPos));
+            output = output[available..];
+            Debug.Assert(!output.IsEmpty);
+
+            while (output.Length > 32)
+            {
+                Haraka512_X86.Permute(m_buf, m_buf, m_roundConstants);
+                output[..32].CopyFrom(m_buf);
+                output = output[32..];
+            }
+
+            Haraka512_X86.Permute(m_buf, m_buf, m_roundConstants);
+            output.CopyFrom(m_buf);
+            m_bufPos = output.Length;
+
+            return result;
+        }
+
+        public int DoFinal(byte[] output, int outOff, int outLen)
+        {
+            return OutputFinal(output.AsSpan(outOff, outLen));
+        }
+
+        public int OutputFinal(Span<byte> output)
+        {
+            int result = Output(output);
+            Reset();
+            return result;
+        }
+
+        public void Reset()
+        {
+            Array.Clear(m_buf);
+            m_bufPos = 0;
+            m_state = State.Absorbing;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static Vector128<byte> Load128(ReadOnlySpan<byte> t)
+        {
+#if NET7_0_OR_GREATER
+            return Vector128.Create<byte>(t);
+#else
+            if (BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<byte>>() == 16)
+                return Unsafe.ReadUnaligned<Vector128<byte>>(ref Unsafe.AsRef(t[0]));
+
+            return Vector128.Create(t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8], t[9], t[10], t[11], t[12],
+                t[13], t[14], t[15]);
+#endif
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        private static void XorWith(ReadOnlySpan<byte> x, Span<byte> z)
+        {
+            for (int i = 0; i < x.Length; i++)
+            {
+                z[i] ^= x[i];
+            }
+        }
+    }
+}
+#endif
diff --git a/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusEngine.cs b/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusEngine.cs
index f7617f3c1..86e93383c 100644
--- a/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusEngine.cs
+++ b/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusEngine.cs
@@ -628,5 +628,134 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
                 return m;
             }
         }
+
+#if NETCOREAPP3_0_OR_GREATER
+        internal class HarakaSEngine_X86
+            : SphincsPlusEngine
+        {
+            public static bool IsSupported => Haraka256_X86.IsSupported && Haraka512_X86.IsSupported
+                && HarakaS_X86.IsSupported;
+
+            private HarakaS_X86 m_harakaS;
+
+            public HarakaSEngine_X86(bool robust, int n, uint w, uint d, int a, int k, uint h)
+                : base(robust, n, w, d, a, k, h)
+            {
+            }
+
+            public override void Init(byte[] pkSeed)
+            {
+                m_harakaS = new HarakaS_X86(pkSeed);
+            }
+
+            public override byte[] F(byte[] pkSeed, Adrs adrs, byte[] m1)
+            {
+                Span<byte> buf = stackalloc byte[64];
+                adrs.value.CopyTo(buf);
+
+                if (robust)
+                {
+                    Span<byte> mask = stackalloc byte[32];
+                    Haraka256_X86.Hash(adrs.value, mask, m_harakaS.RoundConstants);
+                    for (int i = 0; i < m1.Length; ++i)
+                    {
+                        buf[32 + i] = (byte)(m1[i] ^ mask[i]);
+                    }
+                }
+                else
+                {
+                    m1.CopyTo(buf[32..]);
+                }
+                Haraka512_X86.Hash(buf, buf, m_harakaS.RoundConstants);
+                return buf[..N].ToArray();
+            }
+
+            public override byte[] H(byte[] pkSeed, Adrs adrs, byte[] m1, byte[] m2)
+            {
+                Span<byte> m = stackalloc byte[m1.Length + m2.Length];
+                m1.CopyTo(m);
+                m2.CopyTo(m[m1.Length..]);
+                Bitmask(adrs, m);
+
+                byte[] rv = new byte[N];
+                m_harakaS.BlockUpdate(adrs.value);
+                m_harakaS.BlockUpdate(m);
+                m_harakaS.OutputFinal(rv);
+                return rv;
+            }
+
+            public override IndexedDigest H_msg(byte[] prf, byte[] pkSeed, byte[] pkRoot, byte[] message)
+            {
+                int forsMsgBytes = ((A * K) + 7) >> 3;
+                int leafBits = (int)(FH / D);
+                int treeBits = (int)FH - leafBits;
+                int leafBytes = (leafBits + 7) >> 3;
+                int treeBytes = (treeBits + 7) >> 3;
+
+                byte[] output = new byte[forsMsgBytes];
+                Span<byte> indices = stackalloc byte[treeBytes + leafBytes];
+
+                m_harakaS.BlockUpdate(prf);
+                m_harakaS.BlockUpdate(pkRoot);
+                m_harakaS.BlockUpdate(message);
+                m_harakaS.Output(output);
+                m_harakaS.OutputFinal(indices);
+
+                // tree index
+                // currently, only indexes up to 64 bits are supported
+                ulong treeIndex = Pack.BE_To_UInt64_Partial(indices[..treeBytes])
+                                & ulong.MaxValue >> (64 - treeBits);
+
+                uint leafIndex = Pack.BE_To_UInt32_Partial(indices[treeBytes..])
+                               & uint.MaxValue >> (32 - leafBits);
+
+                return new IndexedDigest(treeIndex, leafIndex, output);
+            }
+
+            public override byte[] T_l(byte[] pkSeed, Adrs adrs, byte[] m)
+            {
+                Bitmask(adrs, m);
+
+                byte[] rv = new byte[N];
+                m_harakaS.BlockUpdate(adrs.value);
+                m_harakaS.BlockUpdate(m);
+                m_harakaS.OutputFinal(rv);
+                return rv;
+            }
+
+            public override void PRF(byte[] pkSeed, byte[] skSeed, Adrs adrs, byte[] prf, int prfOff)
+            {
+                Span<byte> buf = stackalloc byte[64];
+                adrs.value.CopyTo(buf);
+                skSeed.CopyTo(buf[32..]);
+                Haraka512_X86.Hash(buf, buf, m_harakaS.RoundConstants);
+                buf[..N].CopyTo(prf.AsSpan(prfOff));
+            }
+
+            public override byte[] PRF_msg(byte[] prf, byte[] randomiser, byte[] message)
+            {
+                byte[] rv = new byte[N];
+                m_harakaS.BlockUpdate(prf);
+                m_harakaS.BlockUpdate(randomiser);
+                m_harakaS.BlockUpdate(message);
+                m_harakaS.OutputFinal(rv);
+                return rv;
+            }
+
+            protected void Bitmask(Adrs adrs, Span<byte> m)
+            {
+                if (robust)
+                {
+                    Span<byte> mask = stackalloc byte[m.Length];
+                    m_harakaS.BlockUpdate(adrs.value);
+                    m_harakaS.OutputFinal(mask);
+                    for (int i = 0; i < m.Length; ++i)
+                    {
+                        m[i] ^= mask[i];
+                    }
+                }
+            }
+        }
+#endif
     }
 }
diff --git a/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusKeyPairGenerator.cs b/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusKeyPairGenerator.cs
index ed96b70e5..86493657d 100644
--- a/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusKeyPairGenerator.cs
+++ b/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusKeyPairGenerator.cs
@@ -24,7 +24,11 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             byte[] pkSeed;
             SK sk;
 
-            if (engine is SphincsPlusEngine.HarakaSEngine)
+            if (engine is SphincsPlusEngine.HarakaSEngine
+#if NETCOREAPP3_0_OR_GREATER
+                || engine is SphincsPlusEngine.HarakaSEngine_X86
+#endif
+                )
             {
                 // required to pass kat tests
                 byte[] tmparray = SecRand(engine.N * 3);
diff --git a/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusParameters.cs b/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusParameters.cs
index 97a9fe71a..e8a95fd2f 100644
--- a/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusParameters.cs
+++ b/crypto/src/pqc/crypto/sphincsplus/SPHINCSPlusParameters.cs
@@ -373,6 +373,11 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
 
         public SphincsPlusEngine Get()
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (SphincsPlusEngine.HarakaSEngine_X86.IsSupported)
+                return new SphincsPlusEngine.HarakaSEngine_X86(robust, n, w, d, a, k, h);
+#endif
+
             return new SphincsPlusEngine.HarakaSEngine(robust, n, w, d, a, k, h);
         }
     }