summary refs log tree commit diff
path: root/crypto/src/math/raw
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2015-03-23 19:05:36 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2015-03-23 19:05:36 +0700
commit94bd14f0d17e617ca1a8b9c4b188416758820c78 (patch)
tree103e2ecf5dc888408790700493cfac98634a6db1 /crypto/src/math/raw
parentImprove handling of extensions for session resumption (diff)
downloadBouncyCastle.NET-ed25519-94bd14f0d17e617ca1a8b9c4b188416758820c78.tar.xz
Math.Raw support for custom binary curves
- Also migrate the 'raw' stuff into Math.Raw following Java build
Diffstat (limited to 'crypto/src/math/raw')
-rw-r--r--crypto/src/math/raw/Interleave.cs70
-rw-r--r--crypto/src/math/raw/Mod.cs184
-rw-r--r--crypto/src/math/raw/Nat.cs1053
-rw-r--r--crypto/src/math/raw/Nat128.cs856
-rw-r--r--crypto/src/math/raw/Nat160.cs874
-rw-r--r--crypto/src/math/raw/Nat192.cs1048
-rw-r--r--crypto/src/math/raw/Nat224.cs1176
-rw-r--r--crypto/src/math/raw/Nat256.cs1387
-rw-r--r--crypto/src/math/raw/Nat320.cs98
-rw-r--r--crypto/src/math/raw/Nat384.cs46
-rw-r--r--crypto/src/math/raw/Nat448.cs100
-rw-r--r--crypto/src/math/raw/Nat512.cs46
-rw-r--r--crypto/src/math/raw/Nat576.cs102
13 files changed, 7040 insertions, 0 deletions
diff --git a/crypto/src/math/raw/Interleave.cs b/crypto/src/math/raw/Interleave.cs
new file mode 100644
index 000000000..9755c9d6f
--- /dev/null
+++ b/crypto/src/math/raw/Interleave.cs
@@ -0,0 +1,70 @@
+using System;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Interleave
+    {
+        /*
+         * This expands 8 bit indices into 16 bit contents (high bit 14), by inserting 0s between bits.
+         * In a binary field, this operation is the same as squaring an 8 bit number.
+         */
+        private static readonly ushort[] INTERLEAVE2_TABLE = new ushort[]
+        {
+            0x0000, 0x0001, 0x0004, 0x0005, 0x0010, 0x0011, 0x0014, 0x0015,
+            0x0040, 0x0041, 0x0044, 0x0045, 0x0050, 0x0051, 0x0054, 0x0055,
+            0x0100, 0x0101, 0x0104, 0x0105, 0x0110, 0x0111, 0x0114, 0x0115,
+            0x0140, 0x0141, 0x0144, 0x0145, 0x0150, 0x0151, 0x0154, 0x0155,
+            0x0400, 0x0401, 0x0404, 0x0405, 0x0410, 0x0411, 0x0414, 0x0415,
+            0x0440, 0x0441, 0x0444, 0x0445, 0x0450, 0x0451, 0x0454, 0x0455,
+            0x0500, 0x0501, 0x0504, 0x0505, 0x0510, 0x0511, 0x0514, 0x0515,
+            0x0540, 0x0541, 0x0544, 0x0545, 0x0550, 0x0551, 0x0554, 0x0555,
+            0x1000, 0x1001, 0x1004, 0x1005, 0x1010, 0x1011, 0x1014, 0x1015,
+            0x1040, 0x1041, 0x1044, 0x1045, 0x1050, 0x1051, 0x1054, 0x1055,
+            0x1100, 0x1101, 0x1104, 0x1105, 0x1110, 0x1111, 0x1114, 0x1115,
+            0x1140, 0x1141, 0x1144, 0x1145, 0x1150, 0x1151, 0x1154, 0x1155,
+            0x1400, 0x1401, 0x1404, 0x1405, 0x1410, 0x1411, 0x1414, 0x1415,
+            0x1440, 0x1441, 0x1444, 0x1445, 0x1450, 0x1451, 0x1454, 0x1455,
+            0x1500, 0x1501, 0x1504, 0x1505, 0x1510, 0x1511, 0x1514, 0x1515,
+            0x1540, 0x1541, 0x1544, 0x1545, 0x1550, 0x1551, 0x1554, 0x1555,
+            0x4000, 0x4001, 0x4004, 0x4005, 0x4010, 0x4011, 0x4014, 0x4015,
+            0x4040, 0x4041, 0x4044, 0x4045, 0x4050, 0x4051, 0x4054, 0x4055,
+            0x4100, 0x4101, 0x4104, 0x4105, 0x4110, 0x4111, 0x4114, 0x4115,
+            0x4140, 0x4141, 0x4144, 0x4145, 0x4150, 0x4151, 0x4154, 0x4155,
+            0x4400, 0x4401, 0x4404, 0x4405, 0x4410, 0x4411, 0x4414, 0x4415,
+            0x4440, 0x4441, 0x4444, 0x4445, 0x4450, 0x4451, 0x4454, 0x4455,
+            0x4500, 0x4501, 0x4504, 0x4505, 0x4510, 0x4511, 0x4514, 0x4515,
+            0x4540, 0x4541, 0x4544, 0x4545, 0x4550, 0x4551, 0x4554, 0x4555,
+            0x5000, 0x5001, 0x5004, 0x5005, 0x5010, 0x5011, 0x5014, 0x5015,
+            0x5040, 0x5041, 0x5044, 0x5045, 0x5050, 0x5051, 0x5054, 0x5055,
+            0x5100, 0x5101, 0x5104, 0x5105, 0x5110, 0x5111, 0x5114, 0x5115,
+            0x5140, 0x5141, 0x5144, 0x5145, 0x5150, 0x5151, 0x5154, 0x5155,
+            0x5400, 0x5401, 0x5404, 0x5405, 0x5410, 0x5411, 0x5414, 0x5415,
+            0x5440, 0x5441, 0x5444, 0x5445, 0x5450, 0x5451, 0x5454, 0x5455,
+            0x5500, 0x5501, 0x5504, 0x5505, 0x5510, 0x5511, 0x5514, 0x5515,
+            0x5540, 0x5541, 0x5544, 0x5545, 0x5550, 0x5551, 0x5554, 0x5555
+        };
+
+        internal static uint Expand8to16(uint x)
+        {
+            return INTERLEAVE2_TABLE[x & 0xFF];
+        }
+
+        internal static uint Expand16to32(uint x)
+        {
+            return (uint)(INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >> 8) & 0xFF] << 16);
+        }
+
+        internal static ulong Expand32to64(uint x)
+        {
+            uint r00 = (uint)(INTERLEAVE2_TABLE[x & 0xFF] | INTERLEAVE2_TABLE[(x >> 8) & 0xFF] << 16);
+            uint r32 = (uint)(INTERLEAVE2_TABLE[(x >> 16) & 0xFF] | INTERLEAVE2_TABLE[x >> 24] << 16);
+            return (ulong)r32 << 32 | (ulong)r00;
+        }
+
+        internal static void Expand64To128(ulong x, ulong[] z, int zOff)
+        {
+            z[zOff    ] = Expand32to64((uint)x);
+            z[zOff + 1] = Expand32to64((uint)(x >> 32));
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Mod.cs b/crypto/src/math/raw/Mod.cs
new file mode 100644
index 000000000..63467e668
--- /dev/null
+++ b/crypto/src/math/raw/Mod.cs
@@ -0,0 +1,184 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+using Org.BouncyCastle.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Mod
+    {
+        public static void Invert(uint[] p, uint[] x, uint[] z)
+        {
+            int len = p.Length;
+            if (Nat.IsZero(len, x))
+                throw new ArgumentException("cannot be 0", "x");
+            if (Nat.IsOne(len, x))
+            {
+                Array.Copy(x, 0, z, 0, len);
+                return;
+            }
+
+            uint[] u = Nat.Copy(len, x);
+            uint[] a = Nat.Create(len);
+            a[0] = 1;
+            int ac = 0;
+
+            if ((u[0] & 1) == 0)
+            {
+                InversionStep(p, u, len, a, ref ac);
+            }
+            if (Nat.IsOne(len, u))
+            {
+                InversionResult(p, ac, a, z);
+                return;
+            }
+
+            uint[] v = Nat.Copy(len, p);
+            uint[] b = Nat.Create(len);
+            int bc = 0;
+
+            int uvLen = len;
+
+            for (;;)
+            {
+                while (u[uvLen - 1] == 0 && v[uvLen - 1] == 0)
+                {
+                    --uvLen;
+                }
+
+                if (Nat.Gte(len, u, v))
+                {
+                    Nat.SubFrom(len, v, u);
+                    Debug.Assert((u[0] & 1) == 0);
+                    ac += Nat.SubFrom(len, b, a) - bc;
+                    InversionStep(p, u, uvLen, a, ref ac);
+                    if (Nat.IsOne(len, u))
+                    {
+                        InversionResult(p, ac, a, z);
+                        return;
+                    }
+                }
+                else
+                {
+                    Nat.SubFrom(len, u, v);
+                    Debug.Assert((v[0] & 1) == 0);
+                    bc += Nat.SubFrom(len, a, b) - ac;
+                    InversionStep(p, v, uvLen, b, ref bc);
+                    if (Nat.IsOne(len, v))
+                    {
+                        InversionResult(p, bc, b, z);
+                        return;
+                    }
+                }
+            }
+        }
+
+        public static uint[] Random(uint[] p)
+        {
+            int len = p.Length;
+            Random rand = new Random();
+            uint[] s = Nat.Create(len);
+
+            uint m = p[len - 1];
+            m |= m >> 1;
+            m |= m >> 2;
+            m |= m >> 4;
+            m |= m >> 8;
+            m |= m >> 16;
+
+            do
+            {
+                byte[] bytes = new byte[len << 2];
+                rand.NextBytes(bytes);
+                Pack.BE_To_UInt32(bytes, 0, s);
+                s[len - 1] &= m;
+            }
+            while (Nat.Gte(len, s, p));
+
+            return s;
+        }
+
+        public static void Add(uint[] p, uint[] x, uint[] y, uint[] z)
+        {
+            int len = p.Length;
+            uint c = Nat.Add(len, x, y, z);
+            if (c != 0)
+            {
+                Nat.SubFrom(len, p, z);
+            }
+        }
+
+        public static void Subtract(uint[] p, uint[] x, uint[] y, uint[] z)
+        {
+            int len = p.Length;
+            int c = Nat.Sub(len, x, y, z);
+            if (c != 0)
+            {
+                Nat.AddTo(len, p, z);
+            }
+        }
+
+        private static void InversionResult(uint[] p, int ac, uint[] a, uint[] z)
+        {
+            if (ac < 0)
+            {
+                Nat.Add(p.Length, a, p, z);
+            }
+            else
+            {
+                Array.Copy(a, 0, z, 0, p.Length);
+            }
+        }
+
+        private static void InversionStep(uint[] p, uint[] u, int uLen, uint[] x, ref int xc)
+        {
+            int len = p.Length;
+            int count = 0;
+            while (u[0] == 0)
+            {
+                Nat.ShiftDownWord(uLen, u, 0);
+                count += 32;
+            }
+
+            {
+                int zeroes = GetTrailingZeroes(u[0]);
+                if (zeroes > 0)
+                {
+                    Nat.ShiftDownBits(uLen, u, zeroes, 0);
+                    count += zeroes;
+                }
+            }
+
+            for (int i = 0; i < count; ++i)
+            {
+                if ((x[0] & 1) != 0)
+                {
+                    if (xc < 0)
+                    {
+                        xc += (int)Nat.AddTo(len, p, x);
+                    }
+                    else
+                    {
+                        xc += Nat.SubFrom(len, p, x);
+                    }
+                }
+
+                Debug.Assert(xc == 0 || xc == -1);
+                Nat.ShiftDownBit(len, x, (uint)xc);
+            }
+        }
+
+        private static int GetTrailingZeroes(uint x)
+        {
+            Debug.Assert(x != 0);
+            int count = 0;
+            while ((x & 1) == 0)
+            {
+                x >>= 1;
+                ++count;
+            }
+            return count;
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat.cs b/crypto/src/math/raw/Nat.cs
new file mode 100644
index 000000000..1f9ab00ec
--- /dev/null
+++ b/crypto/src/math/raw/Nat.cs
@@ -0,0 +1,1053 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat
+    {
+        private const ulong M = 0xFFFFFFFFUL;
+
+        public static uint Add(int len, uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)x[i] + y[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+        public static uint Add33At(int len, uint x, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            ulong c = (ulong)z[zPos + 0] + x;
+            z[zPos + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zPos + 1] + 1;
+            z[zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zPos + 2);
+        }
+
+        public static uint Add33At(int len, uint x, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            ulong c = (ulong)z[zOff + zPos] + x;
+            z[zOff + zPos] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zOff + zPos + 1] + 1;
+            z[zOff + zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zOff, zPos + 2);
+        }
+
+        public static uint Add33To(int len, uint x, uint[] z)
+        {
+            ulong c = (ulong)z[0] + x;
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[1] + 1;
+            z[1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, 2);
+        }
+
+        public static uint Add33To(int len, uint x, uint[] z, int zOff)
+        {
+            ulong c = (ulong)z[zOff + 0] + x;
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zOff + 1] + 1;
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zOff, 2);
+        }
+
+        public static uint AddBothTo(int len, uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)x[i] + y[i] + z[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)x[xOff + i] + y[yOff + i] + z[zOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+        public static uint AddDWordAt(int len, ulong x, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            ulong c = (ulong)z[zPos + 0] + (x & M);
+            z[zPos + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zPos + 1] + (x >> 32);
+            z[zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zPos + 2);
+        }
+
+        public static uint AddDWordAt(int len, ulong x, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            ulong c = (ulong)z[zOff + zPos] + (x & M);
+            z[zOff + zPos] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zOff + zPos + 1] + (x >> 32);
+            z[zOff + zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zOff, zPos + 2);
+        }
+
+        public static uint AddDWordTo(int len, ulong x, uint[] z)
+        {
+            ulong c = (ulong)z[0] + (x & M);
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[1] + (x >> 32);
+            z[1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, 2);
+        }
+
+        public static uint AddDWordTo(int len, ulong x, uint[] z, int zOff)
+        {
+            ulong c = (ulong)z[zOff + 0] + (x & M);
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zOff + 1] + (x >> 32);
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zOff, 2);
+        }
+
+        public static uint AddTo(int len, uint[] x, uint[] z)
+        {
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)x[i] + z[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+        public static uint AddTo(int len, uint[] x, int xOff, uint[] z, int zOff)
+        {
+            ulong c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (ulong)x[xOff + i] + z[zOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            return (uint)c;
+        }
+
+        public static uint AddWordAt(int len, uint x, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 1));
+            ulong c = (ulong)x + z[zPos];
+            z[zPos] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zPos + 1);
+        }
+
+        public static uint AddWordAt(int len, uint x, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 1));
+            ulong c = (ulong)x + z[zOff + zPos];
+            z[zOff + zPos] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zOff, zPos + 1);
+        }
+
+        public static uint AddWordTo(int len, uint x, uint[] z)
+        {
+            ulong c = (ulong)x + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, 1);
+        }
+
+        public static uint AddWordTo(int len, uint x, uint[] z, int zOff)
+        {
+            ulong c = (ulong)x + z[zOff];
+            z[zOff] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zOff, 1);
+        }
+
+        public static void Copy(int len, uint[] x, uint[] z)
+        {
+            Array.Copy(x, 0, z, 0, len);
+        }
+
+        public static uint[] Copy(int len, uint[] x)
+        {
+            uint[] z = new uint[len];
+            Array.Copy(x, 0, z, 0, len);
+            return z;
+        }
+
+        public static uint[] Create(int len)
+        {
+            return new uint[len];
+        }
+
+        public static ulong[] Create64(int len)
+        {
+            return new ulong[len];
+        }
+
+        public static int Dec(int len, uint[] z)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                if (--z[i] != uint.MaxValue)
+                {
+                    return 0;
+                }
+            }
+            return -1;
+        }
+
+        public static int Dec(int len, uint[] x, uint[] z)
+        {
+            int i = 0;
+            while (i < len)
+            {
+                uint c = x[i] - 1;
+                z[i] = c;
+                ++i;
+                if (c != uint.MaxValue)
+                {
+                    while (i < len)
+                    {
+                        z[i] = x[i];
+                        ++i;
+                    }
+                    return 0;
+                }
+            }
+            return -1;
+        }
+
+        public static int DecAt(int len, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= len);
+            for (int i = zPos; i < len; ++i)
+            {
+                if (--z[i] != uint.MaxValue)
+                {
+                    return 0;
+                }
+            }
+            return -1;
+        }
+
+        public static int DecAt(int len, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= len);
+            for (int i = zPos; i < len; ++i)
+            {
+                if (--z[zOff + i] != uint.MaxValue)
+                {
+                    return 0;
+                }
+            }
+            return -1;
+        }
+
+        public static bool Eq(int len, uint[] x, uint[] y)
+        {
+            for (int i = len - 1; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static uint[] FromBigInteger(int bits, BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > bits)
+                throw new ArgumentException();
+
+            int len = (bits + 31) >> 5;
+            uint[] z = Create(len);
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (uint)x.IntValue;
+                x = x.ShiftRight(32);
+            }
+            return z;
+        }
+
+        public static uint GetBit(uint[] x, int bit)
+        {
+            if (bit == 0)
+            {
+                return x[0] & 1;
+            }
+            int w = bit >> 5;
+            if (w < 0 || w >= x.Length)
+            {
+                return 0;
+            }
+            int b = bit & 31;
+            return (x[w] >> b) & 1;
+        }
+
+        public static bool Gte(int len, uint[] x, uint[] y)
+        {
+            for (int i = len - 1; i >= 0; --i)
+            {
+                uint x_i = x[i], y_i = y[i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static uint Inc(int len, uint[] z)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                if (++z[i] != uint.MinValue)
+                {
+                    return 0;
+                }
+            }
+            return 1;
+        }
+
+        public static uint Inc(int len, uint[] x, uint[] z)
+        {
+            int i = 0;
+            while (i < len)
+            {
+                uint c = x[i] + 1;
+                z[i] = c;
+                ++i;
+                if (c != 0)
+                {
+                    while (i < len)
+                    {
+                        z[i] = x[i];
+                        ++i;
+                    }
+                    return 0;
+                }
+            }
+            return 1;
+        }
+
+        public static uint IncAt(int len, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= len);
+            for (int i = zPos; i < len; ++i)
+            {
+                if (++z[i] != uint.MinValue)
+                {
+                    return 0;
+                }
+            }
+            return 1;
+        }
+
+        public static uint IncAt(int len, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= len);
+            for (int i = zPos; i < len; ++i)
+            {
+                if (++z[zOff + i] != uint.MinValue)
+                {
+                    return 0;
+                }
+            }
+            return 1;
+        }
+
+        public static bool IsOne(int len, uint[] x)
+        {
+            if (x[0] != 1)
+            {
+                return false;
+            }
+            for (int i = 1; i < len; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero(int len, uint[] x)
+        {
+            if (x[0] != 0)
+            {
+                return false;
+            }
+            for (int i = 1; i < len; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static void Mul(int len, uint[] x, uint[] y, uint[] zz)
+        {
+            zz[len] = (uint)MulWord(len, x[0], y, zz);
+
+            for (int i = 1; i < len; ++i)
+            {
+                zz[i + len] = (uint)MulWordAddTo(len, x[i], y, 0, zz, i);
+            }
+        }
+
+        public static void Mul(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            zz[zzOff + len] = (uint)MulWord(len, x[xOff], y, yOff, zz, zzOff);
+
+            for (int i = 1; i < len; ++i)
+            {
+                zz[zzOff + i + len] = (uint)MulWordAddTo(len, x[xOff + i], y, yOff, zz, zzOff + i);
+            }
+        }
+
+        public static uint Mul31BothAdd(int len, uint a, uint[] x, uint b, uint[] y, uint[] z, int zOff)
+        {
+            ulong c = 0, aVal = (ulong)a, bVal = (ulong)b;
+            int i = 0;
+            do
+            {
+                c += aVal * x[i] + bVal * y[i] + z[zOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < len);
+            return (uint)c;
+        }
+
+        public static uint MulWord(int len, uint x, uint[] y, uint[] z)
+        {
+            ulong c = 0, xVal = (ulong)x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < len);
+            return (uint)c;
+        }
+
+        public static uint MulWord(int len, uint x, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = (ulong)x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[yOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < len);
+            return (uint)c;
+        }
+
+        public static uint MulWordAddTo(int len, uint x, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = (ulong)x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[yOff + i] + z[zOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < len);
+            return (uint)c;
+        }
+
+        public static uint MulWordDwordAddAt(int len, uint x, ulong y, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 3));
+            ulong c = 0, xVal = (ulong)x;
+            c += xVal * (uint)y + z[zPos + 0];
+            z[zPos + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * (y >> 32) + z[zPos + 1];
+            z[zPos + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)z[zPos + 2];
+            z[zPos + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : IncAt(len, z, zPos + 3);
+        }
+
+        public static uint ShiftDownBit(int len, uint[] z, uint c)
+        {
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = z[i];
+                z[i] = (next >> 1) | (c << 31);
+                c = next;
+            }
+            return c << 31;
+        }
+
+        public static uint ShiftDownBit(int len, uint[] z, int zOff, uint c)
+        {
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = z[zOff + i];
+                z[zOff + i] = (next >> 1) | (c << 31);
+                c = next;
+            }
+            return c << 31;
+        }
+
+        public static uint ShiftDownBit(int len, uint[] x, uint c, uint[] z)
+        {
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = x[i];
+                z[i] = (next >> 1) | (c << 31);
+                c = next;
+            }
+            return c << 31;
+        }
+
+        public static uint ShiftDownBit(int len, uint[] x, int xOff, uint c, uint[] z, int zOff)
+        {
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = x[xOff + i];
+                z[zOff + i] = (next >> 1) | (c << 31);
+                c = next;
+            }
+            return c << 31;
+        }
+
+        public static uint ShiftDownBits(int len, uint[] z, int bits, uint c)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = z[i];
+                z[i] = (next >> bits) | (c << -bits);
+                c = next;
+            }
+            return c << -bits;
+        }
+
+        public static uint ShiftDownBits(int len, uint[] z, int zOff, int bits, uint c)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = z[zOff + i];
+                z[zOff + i] = (next >> bits) | (c << -bits);
+                c = next;
+            }
+            return c << -bits;
+        }
+
+        public static uint ShiftDownBits(int len, uint[] x, int bits, uint c, uint[] z)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = x[i];
+                z[i] = (next >> bits) | (c << -bits);
+                c = next;
+            }
+            return c << -bits;
+        }
+
+        public static uint ShiftDownBits(int len, uint[] x, int xOff, int bits, uint c, uint[] z, int zOff)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = x[xOff + i];
+                z[zOff + i] = (next >> bits) | (c << -bits);
+                c = next;
+            }
+            return c << -bits;
+        }
+
+        public static uint ShiftDownWord(int len, uint[] z, uint c)
+        {
+            int i = len;
+            while (--i >= 0)
+            {
+                uint next = z[i];
+                z[i] = c;
+                c = next;
+            }
+            return c;
+        }
+
+        public static uint ShiftUpBit(int len, uint[] z, uint c)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = z[i];
+                z[i] = (next << 1) | (c >> 31);
+                c = next;
+            }
+            return c >> 31;
+        }
+
+        public static uint ShiftUpBit(int len, uint[] z, int zOff, uint c)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = z[zOff + i];
+                z[zOff + i] = (next << 1) | (c >> 31);
+                c = next;
+            }
+            return c >> 31;
+        }
+
+        public static uint ShiftUpBit(int len, uint[] x, uint c, uint[] z)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = x[i];
+                z[i] = (next << 1) | (c >> 31);
+                c = next;
+            }
+            return c >> 31;
+        }
+
+        public static uint ShiftUpBit(int len, uint[] x, int xOff, uint c, uint[] z, int zOff)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = x[xOff + i];
+                z[zOff + i] = (next << 1) | (c >> 31);
+                c = next;
+            }
+            return c >> 31;
+        }
+
+        public static ulong ShiftUpBit64(int len, ulong[] x, int xOff, ulong c, ulong[] z, int zOff)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                ulong next = x[xOff + i];
+                z[zOff + i] = (next << 1) | (c >> 63);
+                c = next;
+            }
+            return c >> 63;
+        }
+
+        public static uint ShiftUpBits(int len, uint[] z, int bits, uint c)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = z[i];
+                z[i] = (next << bits) | (c >> -bits);
+                c = next;
+            }
+            return c >> -bits;
+        }
+
+        public static uint ShiftUpBits(int len, uint[] z, int zOff, int bits, uint c)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = z[zOff + i];
+                z[zOff + i] = (next << bits) | (c >> -bits);
+                c = next;
+            }
+            return c >> -bits;
+        }
+
+        public static ulong ShiftUpBits64(int len, ulong[] z, int zOff, int bits, ulong c)
+        {
+            Debug.Assert(bits > 0 && bits < 64);
+            for (int i = 0; i < len; ++i)
+            {
+                ulong next = z[zOff + i];
+                z[zOff + i] = (next << bits) | (c >> -bits);
+                c = next;
+            }
+            return c >> -bits;
+        }
+
+        public static uint ShiftUpBits(int len, uint[] x, int bits, uint c, uint[] z)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = x[i];
+                z[i] = (next << bits) | (c >> -bits);
+                c = next;
+            }
+            return c >> -bits;
+        }
+
+        public static uint ShiftUpBits(int len, uint[] x, int xOff, int bits, uint c, uint[] z, int zOff)
+        {
+            Debug.Assert(bits > 0 && bits < 32);
+            for (int i = 0; i < len; ++i)
+            {
+                uint next = x[xOff + i];
+                z[zOff + i] = (next << bits) | (c >> -bits);
+                c = next;
+            }
+            return c >> -bits;
+        }
+
+        public static ulong ShiftUpBits64(int len, ulong[] x, int xOff, int bits, ulong c, ulong[] z, int zOff)
+        {
+            Debug.Assert(bits > 0 && bits < 64);
+            for (int i = 0; i < len; ++i)
+            {
+                ulong next = x[xOff + i];
+                z[zOff + i] = (next << bits) | (c >> -bits);
+                c = next;
+            }
+            return c >> -bits;
+        }
+
+        public static void Square(int len, uint[] x, uint[] zz)
+        {
+            int extLen = len << 1;
+            uint c = 0;
+            int j = len, k = extLen;
+            do
+            {
+                ulong xVal = (ulong)x[--j];
+                ulong p = xVal * xVal;
+                zz[--k] = (c << 31) | (uint)(p >> 33);
+                zz[--k] = (uint)(p >> 1);
+                c = (uint)p;
+            }
+            while (j > 0);
+
+            for (int i = 1; i < len; ++i)
+            {
+                c = SquareWordAdd(x, i, zz);
+                AddWordAt(extLen, c, zz, i << 1);
+            }
+
+            ShiftUpBit(extLen, zz, x[0] << 31);
+        }
+
+        public static void Square(int len, uint[] x, int xOff, uint[] zz, int zzOff)
+        {
+            int extLen = len << 1;
+            uint c = 0;
+            int j = len, k = extLen;
+            do
+            {
+                ulong xVal = (ulong)x[xOff + --j];
+                ulong p = xVal * xVal;
+                zz[zzOff + --k] = (c << 31) | (uint)(p >> 33);
+                zz[zzOff + --k] = (uint)(p >> 1);
+                c = (uint)p;
+            }
+            while (j > 0);
+
+            for (int i = 1; i < len; ++i)
+            {
+                c = SquareWordAdd(x, xOff, i, zz, zzOff);
+                AddWordAt(extLen, c, zz, zzOff, i << 1);
+            }
+
+            ShiftUpBit(extLen, zz, zzOff, x[xOff] << 31);
+        }
+
+        public static uint SquareWordAdd(uint[] x, int xPos, uint[] z)
+        {
+            ulong c = 0, xVal = (ulong)x[xPos];
+            int i = 0;
+            do
+            {
+                c += xVal * x[i] + z[xPos + i];
+                z[xPos + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < xPos);
+            return (uint)c;
+        }
+
+        public static uint SquareWordAdd(uint[] x, int xOff, int xPos, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = (ulong)x[xOff + xPos];
+            int i = 0;
+            do
+            {
+                c += xVal * (x[xOff + i] & M) + (z[xPos + zOff] & M);
+                z[xPos + zOff] = (uint)c;
+                c >>= 32;
+                ++zOff;
+            }
+            while (++i < xPos);
+            return (uint)c;
+        }
+
+        public static int Sub(int len, uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (long)x[i] - y[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (int)c;
+        }
+
+        public static int Sub(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (long)x[xOff + i] - y[yOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            return (int)c;
+        }
+        public static int Sub33At(int len, uint x, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            long c = (long)z[zPos + 0] - x;
+            z[zPos + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zPos + 1] - 1;
+            z[zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zPos + 2);
+        }
+
+        public static int Sub33At(int len, uint x, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            long c = (long)z[zOff + zPos] - x;
+            z[zOff + zPos] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + zPos + 1] - 1;
+            z[zOff + zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zOff, zPos + 2);
+        }
+
+        public static int Sub33From(int len, uint x, uint[] z)
+        {
+            long c = (long)z[0] - x;
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - 1;
+            z[1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, 2);
+        }
+
+        public static int Sub33From(int len, uint x, uint[] z, int zOff)
+        {
+            long c = (long)z[zOff + 0] - x;
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - 1;
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zOff, 2);
+        }
+
+        public static int SubBothFrom(int len, uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (long)z[i] - x[i] - y[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (int)c;
+        }
+
+        public static int SubBothFrom(int len, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (long)z[zOff + i] - x[xOff + i] - y[yOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            return (int)c;
+        }
+
+        public static int SubDWordAt(int len, ulong x, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            long c = (long)z[zPos + 0] - (long)(x & M);
+            z[zPos + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zPos + 1] - (long)(x >> 32);
+            z[zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zPos + 2);
+        }
+
+        public static int SubDWordAt(int len, ulong x, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 2));
+            long c = (long)z[zOff + zPos] - (long)(x & M);
+            z[zOff + zPos] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + zPos + 1] - (long)(x >> 32);
+            z[zOff + zPos + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z,  zOff, zPos + 2);
+        }
+
+        public static int SubDWordFrom(int len, ulong x, uint[] z)
+        {
+            long c = (long)z[0] - (long)(x & M);
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - (long)(x >> 32);
+            z[1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, 2);
+        }
+
+        public static int SubDWordFrom(int len, ulong x, uint[] z, int zOff)
+        {
+            long c = (long)z[zOff + 0] - (long)(x & M);
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - (long)(x >> 32);
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zOff, 2);
+        }
+
+        public static int SubFrom(int len, uint[] x, uint[] z)
+        {
+            long c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (long)z[i] - x[i];
+                z[i] = (uint)c;
+                c >>= 32;
+            }
+            return (int)c;
+        }
+
+        public static int SubFrom(int len, uint[] x, int xOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            for (int i = 0; i < len; ++i)
+            {
+                c += (long)z[zOff + i] - x[xOff + i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            return (int)c;
+        }
+
+        public static int SubWordAt(int len, uint x, uint[] z, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 1));
+            long c = (long)z[zPos] - x;
+            z[zPos] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zPos + 1);
+        }
+
+        public static int SubWordAt(int len, uint x, uint[] z, int zOff, int zPos)
+        {
+            Debug.Assert(zPos <= (len - 1));
+            long c = (long)z[zOff + zPos] - x;
+            z[zOff + zPos] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zOff, zPos + 1);
+        }
+
+        public static int SubWordFrom(int len, uint x, uint[] z)
+        {
+            long c = (long)z[0] - x;
+            z[0] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, 1);
+        }
+
+        public static int SubWordFrom(int len, uint x, uint[] z, int zOff)
+        {
+            long c = (long)z[zOff + 0] - x;
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : DecAt(len, z, zOff, 1);
+        }
+
+        public static BigInteger ToBigInteger(int len, uint[] x)
+        {
+            byte[] bs = new byte[len << 2];
+            for (int i = 0; i < len; ++i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                {
+                    Pack.UInt32_To_BE(x_i, bs, (len - 1 - i) << 2);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static void Zero(int len, uint[] z)
+        {
+            for (int i = 0; i < len; ++i)
+            {
+                z[i] = 0;
+            }
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat128.cs b/crypto/src/math/raw/Nat128.cs
new file mode 100644
index 000000000..819c52062
--- /dev/null
+++ b/crypto/src/math/raw/Nat128.cs
@@ -0,0 +1,856 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat128
+    {
+        private const ulong M = 0xFFFFFFFFUL;
+
+        public static uint Add(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, int xOff, uint[] z, int zOff, uint cIn)
+        {
+            ulong c = cIn;
+            c += (ulong)x[xOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddToEachOther(uint[] u, int uOff, uint[] v, int vOff)
+        {
+            ulong c = 0;
+            c += (ulong)u[uOff + 0] + v[vOff + 0];
+            u[uOff + 0] = (uint)c;
+            v[vOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 1] + v[vOff + 1];
+            u[uOff + 1] = (uint)c;
+            v[vOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 2] + v[vOff + 2];
+            u[uOff + 2] = (uint)c;
+            v[vOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 3] + v[vOff + 3];
+            u[uOff + 3] = (uint)c;
+            v[vOff + 3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static void Copy(uint[] x, uint[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+        }
+
+        public static void Copy64(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+        }
+
+        public static uint[] Create()
+        {
+            return new uint[4];
+        }
+
+        public static ulong[] Create64()
+        {
+            return new ulong[2];
+        }
+
+        public static uint[] CreateExt()
+        {
+            return new uint[8];
+        }
+
+        public static ulong[] CreateExt64()
+        {
+            return new ulong[4];
+        }
+
+        public static bool Diff(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            bool pos = Gte(x, xOff, y, yOff);
+            if (pos)
+            {
+                Sub(x, xOff, y, yOff, z, zOff);
+            }
+            else
+            {
+                Sub(y, yOff, x, xOff, z, zOff);
+            }
+            return pos;
+        }
+
+        public static bool Eq(uint[] x, uint[] y)
+        {
+            for (int i = 3; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                    return false;
+            }
+            return true;
+        }
+
+        public static bool Eq64(ulong[] x, ulong[] y)
+        {
+            for (int i = 1; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                    return false;
+            }
+            return true;
+        }
+
+        public static uint[] FromBigInteger(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 128)
+                throw new ArgumentException();
+
+            uint[] z = Create();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (uint)x.IntValue;
+                x = x.ShiftRight(32);
+            }
+            return z;
+        }
+
+        public static ulong[] FromBigInteger64(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 128)
+                throw new ArgumentException();
+
+            ulong[] z = Create64();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (ulong)x.LongValue;
+                x = x.ShiftRight(64);
+            }
+            return z;
+        }
+
+        public static uint GetBit(uint[] x, int bit)
+        {
+            if (bit == 0)
+            {
+                return x[0] & 1;
+            }
+            if ((bit & 127) != bit)
+            {
+                return 0;
+            }
+            int w = bit >> 5;
+            int b = bit & 31;
+            return (x[w] >> b) & 1;
+        }
+
+        public static bool Gte(uint[] x, uint[] y)
+        {
+            for (int i = 3; i >= 0; --i)
+            {
+                uint x_i = x[i], y_i = y[i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool Gte(uint[] x, int xOff, uint[] y, int yOff)
+        {
+            for (int i = 3; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i], y_i = y[yOff + i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool IsOne(uint[] x)
+        {
+            if (x[0] != 1)
+            {
+                return false;
+            }
+            for (int i = 1; i < 4; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsOne64(ulong[] x)
+        {
+            if (x[0] != 1UL)
+            {
+                return false;
+            }
+            for (int i = 1; i < 2; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero(uint[] x)
+        {
+            for (int i = 0; i < 4; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero64(ulong[] x)
+        {
+            for (int i = 0; i < 2; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+
+            {
+                ulong c = 0, x_0 = x[0];
+                c += x_0 * y_0;
+                zz[0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[3] = (uint)c;
+                c >>= 32;
+                zz[4] = (uint)c;
+            }
+
+            for (int i = 1; i < 4; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                zz[i + 4] = (uint)c;
+            }
+        }
+
+        public static void Mul(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+
+            {
+                ulong c = 0, x_0 = x[xOff + 0];
+                c += x_0 * y_0;
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 4] = (uint)c;
+            }
+
+            for (int i = 1; i < 4; ++i)
+            {
+                ++zzOff;
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 4] = (uint)c;
+            }
+        }
+
+        public static uint MulAddTo(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+
+            ulong zc = 0;
+            for (int i = 0; i < 4; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += zc + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                zc = c >> 32;
+            }
+            return (uint)zc;
+        }
+
+        public static uint MulAddTo(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+
+            ulong zc = 0;
+            for (int i = 0; i < 4; ++i)
+            {
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += zc + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                zc = c >> 32;
+                ++zzOff;
+            }
+            return (uint)zc;
+        }
+
+        public static ulong Mul33Add(uint w, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            Debug.Assert(w >> 31 == 0);
+
+            ulong c = 0, wVal = w;
+            ulong x0 = x[xOff + 0];
+            c += wVal * x0 + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong x1 = x[xOff + 1];
+            c += wVal * x1 + x0 + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            ulong x2 = x[xOff + 2];
+            c += wVal * x2 + x1 + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            ulong x3 = x[xOff + 3];
+            c += wVal * x3 + x2 + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += x3;
+            return c;
+        }
+
+        public static uint MulWordAddExt(uint x, uint[] yy, int yyOff, uint[] zz, int zzOff)
+        {
+            Debug.Assert(yyOff <= 4);
+            Debug.Assert(zzOff <= 4);
+
+            ulong c = 0, xVal = x;
+            c += xVal * yy[yyOff + 0] + zz[zzOff + 0];
+            zz[zzOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 1] + zz[zzOff + 1];
+            zz[zzOff + 1] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 2] + zz[zzOff + 2];
+            zz[zzOff + 2] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 3] + zz[zzOff + 3];
+            zz[zzOff + 3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Mul33DWordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 0);
+            ulong c = 0, xVal = x;
+            ulong y00 = y & M;
+            c += xVal * y00 + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong y01 = y >> 32;
+            c += xVal * y01 + y00 + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += y01 + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Mul33WordAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 1);
+            ulong c = 0, yVal = y;
+            c += yVal * x + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += yVal + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(4, z, zOff, 3);
+        }
+
+        public static uint MulWordDwordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 1);
+            ulong c = 0, xVal = x;
+            c += xVal * y + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * (y >> 32) + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(4, z, zOff, 3);
+        }
+
+        public static uint MulWordsAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 2);
+
+            ulong c = 0, xVal = x, yVal = y;
+            c += yVal * xVal + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(4, z, zOff, 2);
+        }
+
+        public static uint MulWord(uint x, uint[] y, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < 4);
+            return (uint)c;
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            ulong x_0 = x[0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 3, j = 8;
+                do
+                {
+                    ulong xVal = x[i--];
+                    ulong p = xVal * xVal;
+                    zz[--j] = (c << 31) | (uint)(p >> 33);
+                    zz[--j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[1];
+            ulong zz_2 = zz[2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[2];
+            ulong zz_3 = zz[3];
+            ulong zz_4 = zz[4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[3];
+            ulong zz_5 = zz[5];
+            ulong zz_6 = zz[6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_6 += zz_5 >> 32;
+            }
+
+            w = (uint)zz_4;
+            zz[4] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_5;
+            zz[5] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_6;
+            zz[6] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[7] + (uint)(zz_6 >> 32);
+            zz[7] = (w << 1) | c;
+        }
+
+        public static void Square(uint[] x, int xOff, uint[] zz, int zzOff)
+        {
+            ulong x_0 = x[xOff + 0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 3, j = 8;
+                do
+                {
+                    ulong xVal = x[xOff + i--];
+                    ulong p = xVal * xVal;
+                    zz[zzOff + --j] = (c << 31) | (uint)(p >> 33);
+                    zz[zzOff + --j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[zzOff + 0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[xOff + 1];
+            ulong zz_2 = zz[zzOff + 2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[zzOff + 1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[xOff + 2];
+            ulong zz_3 = zz[zzOff + 3];
+            ulong zz_4 = zz[zzOff + 4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[zzOff + 2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[xOff + 3];
+            ulong zz_5 = zz[zzOff + 5];
+            ulong zz_6 = zz[zzOff + 6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[zzOff + 3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_6 += zz_5 >> 32;
+            }
+
+            w = (uint)zz_4;
+            zz[zzOff + 4] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_5;
+            zz[zzOff + 5] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_6;
+            zz[zzOff + 6] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[zzOff + 7] + (uint)(zz_6 >> 32);
+            zz[zzOff + 7] = (w << 1) | c;
+        }
+
+        public static int Sub(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int Sub(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)x[xOff + 0] - y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 1] - y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 2] - y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 3] - y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubBothFrom(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, int xOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)z[zOff + 0] - x[xOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - x[xOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 2] - x[xOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 3] - x[xOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static BigInteger ToBigInteger(uint[] x)
+        {
+            byte[] bs = new byte[16];
+            for (int i = 0; i < 4; ++i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                {
+                    Pack.UInt32_To_BE(x_i, bs, (3 - i) << 2);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static BigInteger ToBigInteger64(ulong[] x)
+        {
+            byte[] bs = new byte[16];
+            for (int i = 0; i < 2; ++i)
+            {
+                ulong x_i = x[i];
+                if (x_i != 0UL)
+                {
+                    Pack.UInt64_To_BE(x_i, bs, (1 - i) << 3);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static void Zero(uint[] z)
+        {
+            z[0] = 0;
+            z[1] = 0;
+            z[2] = 0;
+            z[3] = 0;
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat160.cs b/crypto/src/math/raw/Nat160.cs
new file mode 100644
index 000000000..153ac0a43
--- /dev/null
+++ b/crypto/src/math/raw/Nat160.cs
@@ -0,0 +1,874 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat160
+    {
+        private const ulong M = 0xFFFFFFFFUL;
+
+        public static uint Add(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, int xOff, uint[] z, int zOff, uint cIn)
+        {
+            ulong c = cIn;
+            c += (ulong)x[xOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + z[zOff + 5];
+            return (uint)c;
+        }
+
+        public static uint AddToEachOther(uint[] u, int uOff, uint[] v, int vOff)
+        {
+            ulong c = 0;
+            c += (ulong)u[uOff + 0] + v[vOff + 0];
+            u[uOff + 0] = (uint)c;
+            v[vOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 1] + v[vOff + 1];
+            u[uOff + 1] = (uint)c;
+            v[vOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 2] + v[vOff + 2];
+            u[uOff + 2] = (uint)c;
+            v[vOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 3] + v[vOff + 3];
+            u[uOff + 3] = (uint)c;
+            v[vOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 4] + v[vOff + 4];
+            u[uOff + 4] = (uint)c;
+            v[vOff + 4] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static void Copy(uint[] x, uint[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+        }
+
+        public static uint[] Create()
+        {
+            return new uint[5];
+        }
+
+        public static uint[] CreateExt()
+        {
+            return new uint[10];
+        }
+
+        public static bool Diff(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            bool pos = Gte(x, xOff, y, yOff);
+            if (pos)
+            {
+                Sub(x, xOff, y, yOff, z, zOff);
+            }
+            else
+            {
+                Sub(y, yOff, x, xOff, z, zOff);
+            }
+            return pos;
+        }
+
+        public static bool Eq(uint[] x, uint[] y)
+        {
+            for (int i = 4; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                    return false;
+            }
+            return true;
+        }
+
+        public static uint[] FromBigInteger(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 160)
+                throw new ArgumentException();
+
+            uint[] z = Create();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (uint)x.IntValue;
+                x = x.ShiftRight(32);
+            }
+            return z;
+        }
+
+        public static uint GetBit(uint[] x, int bit)
+        {
+            if (bit == 0)
+            {
+                return x[0] & 1;
+            }
+            int w = bit >> 5;
+            if (w < 0 || w >= 5)
+            {
+                return 0;
+            }
+            int b = bit & 31;
+            return (x[w] >> b) & 1;
+        }
+
+        public static bool Gte(uint[] x, uint[] y)
+        {
+            for (int i = 4; i >= 0; --i)
+            {
+                uint x_i = x[i], y_i = y[i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool Gte(uint[] x, int xOff, uint[] y, int yOff)
+        {
+            for (int i = 4; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i], y_i = y[yOff + i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool IsOne(uint[] x)
+        {
+            if (x[0] != 1)
+            {
+                return false;
+            }
+            for (int i = 1; i < 5; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero(uint[] x)
+        {
+            for (int i = 0; i < 5; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+
+            {
+                ulong c = 0, x_0 = x[0];
+                c += x_0 * y_0;
+                zz[0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[4] = (uint)c;
+                c >>= 32;
+                zz[5] = (uint)c;
+            }
+
+            for (int i = 1; i < 5; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                zz[i + 5] = (uint)c;
+            }
+        }
+
+        public static void Mul(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+
+            {
+                ulong c = 0, x_0 = x[xOff + 0];
+                c += x_0 * y_0;
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 5] = (uint)c;
+            }
+
+            for (int i = 1; i < 5; ++i)
+            {
+                ++zzOff;
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 5] = (uint)c;
+            }
+        }
+
+        public static uint MulAddTo(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+
+            ulong zc = 0;
+            for (int i = 0; i < 5; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += zc + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                zc = c >> 32;
+            }
+            return (uint)zc;
+        }
+
+        public static uint MulAddTo(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+
+            ulong zc = 0;
+            for (int i = 0; i < 5; ++i)
+            {
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += zc + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                zc = c >> 32;
+                ++zzOff;
+            }
+            return (uint)zc;
+        }
+
+        public static ulong Mul33Add(uint w, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            Debug.Assert(w >> 31 == 0);
+
+            ulong c = 0, wVal = w;
+            ulong x0 = x[xOff + 0];
+            c += wVal * x0 + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong x1 = x[xOff + 1];
+            c += wVal * x1 + x0 + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            ulong x2 = x[xOff + 2];
+            c += wVal * x2 + x1 + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            ulong x3 = x[xOff + 3];
+            c += wVal * x3 + x2 + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            ulong x4 = x[xOff + 4];
+            c += wVal * x4 + x3 + y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += x4;
+            return c;
+        }
+
+        public static uint MulWordAddExt(uint x, uint[] yy, int yyOff, uint[] zz, int zzOff)
+        {
+            Debug.Assert(yyOff <= 5);
+            Debug.Assert(zzOff <= 5);
+
+            ulong c = 0, xVal = x;
+            c += xVal * yy[yyOff + 0] + zz[zzOff + 0];
+            zz[zzOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 1] + zz[zzOff + 1];
+            zz[zzOff + 1] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 2] + zz[zzOff + 2];
+            zz[zzOff + 2] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 3] + zz[zzOff + 3];
+            zz[zzOff + 3] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 4] + zz[zzOff + 4];
+            zz[zzOff + 4] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Mul33DWordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 1);
+            ulong c = 0, xVal = x;
+            ulong y00 = y & M;
+            c += xVal * y00 + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong y01 = y >> 32;
+            c += xVal * y01 + y00 + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += y01 + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(5, z, zOff, 4);
+        }
+
+        public static uint Mul33WordAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 2);
+            ulong c = 0, yVal = y;
+            c += yVal * x + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += yVal + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(5, z, zOff, 3);
+        }
+
+        public static uint MulWordDwordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 2);
+            ulong c = 0, xVal = x;
+            c += xVal * y + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * (y >> 32) + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(5, z, zOff, 3);
+        }
+
+        public static uint MulWordsAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 3);
+
+            ulong c = 0, xVal = x, yVal = y;
+            c += yVal * xVal + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(5, z, zOff, 2);
+        }
+
+        public static uint MulWord(uint x, uint[] y, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < 5);
+            return (uint)c;
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            ulong x_0 = x[0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 4, j = 10;
+                do
+                {
+                    ulong xVal = x[i--];
+                    ulong p = xVal * xVal;
+                    zz[--j] = (c << 31) | (uint)(p >> 33);
+                    zz[--j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[1];
+            ulong zz_2 = zz[2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[2];
+            ulong zz_3 = zz[3];
+            ulong zz_4 = zz[4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[3];
+            ulong zz_5 = zz[5];
+            ulong zz_6 = zz[6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[4];
+            ulong zz_7 = zz[7];
+            ulong zz_8 = zz[8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_8 += zz_7 >> 32;
+            }
+
+            w = (uint)zz_5;
+            zz[5] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_6;
+            zz[6] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_7;
+            zz[7] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_8;
+            zz[8] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[9] + (uint)(zz_8 >> 32);
+            zz[9] = (w << 1) | c;
+        }
+
+        public static void Square(uint[] x, int xOff, uint[] zz, int zzOff)
+        {
+            ulong x_0 = x[xOff + 0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 4, j = 10;
+                do
+                {
+                    ulong xVal = x[xOff + i--];
+                    ulong p = xVal * xVal;
+                    zz[zzOff + --j] = (c << 31) | (uint)(p >> 33);
+                    zz[zzOff + --j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[zzOff + 0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[xOff + 1];
+            ulong zz_2 = zz[zzOff + 2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[zzOff + 1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[xOff + 2];
+            ulong zz_3 = zz[zzOff + 3];
+            ulong zz_4 = zz[zzOff + 4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[zzOff + 2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[xOff + 3];
+            ulong zz_5 = zz[zzOff + 5];
+            ulong zz_6 = zz[zzOff + 6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[zzOff + 3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[xOff + 4];
+            ulong zz_7 = zz[zzOff + 7];
+            ulong zz_8 = zz[zzOff + 8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[zzOff + 4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_8 += zz_7 >> 32;
+            }
+
+            w = (uint)zz_5;
+            zz[zzOff + 5] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_6;
+            zz[zzOff + 6] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_7;
+            zz[zzOff + 7] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_8;
+            zz[zzOff + 8] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[zzOff + 9] + (uint)(zz_8 >> 32);
+            zz[zzOff + 9] = (w << 1) | c;
+        }
+
+        public static int Sub(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int Sub(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)x[xOff + 0] - y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 1] - y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 2] - y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 3] - y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 4] - y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubBothFrom(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, int xOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)z[zOff + 0] - x[xOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - x[xOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 2] - x[xOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 3] - x[xOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 4] - x[xOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static BigInteger ToBigInteger(uint[] x)
+        {
+            byte[] bs = new byte[20];
+            for (int i = 0; i < 5; ++i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                {
+                    Pack.UInt32_To_BE(x_i, bs, (4 - i) << 2);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static void Zero(uint[] z)
+        {
+            z[0] = 0;
+            z[1] = 0;
+            z[2] = 0;
+            z[3] = 0;
+            z[4] = 0;
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat192.cs b/crypto/src/math/raw/Nat192.cs
new file mode 100644
index 000000000..4797609ee
--- /dev/null
+++ b/crypto/src/math/raw/Nat192.cs
@@ -0,0 +1,1048 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat192
+    {
+        private const ulong M = 0xFFFFFFFFUL;
+
+        public static uint Add(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + y[5] + z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, int xOff, uint[] z, int zOff, uint cIn)
+        {
+            ulong c = cIn;
+            c += (ulong)x[xOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddToEachOther(uint[] u, int uOff, uint[] v, int vOff)
+        {
+            ulong c = 0;
+            c += (ulong)u[uOff + 0] + v[vOff + 0];
+            u[uOff + 0] = (uint)c;
+            v[vOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 1] + v[vOff + 1];
+            u[uOff + 1] = (uint)c;
+            v[vOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 2] + v[vOff + 2];
+            u[uOff + 2] = (uint)c;
+            v[vOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 3] + v[vOff + 3];
+            u[uOff + 3] = (uint)c;
+            v[vOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 4] + v[vOff + 4];
+            u[uOff + 4] = (uint)c;
+            v[vOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 5] + v[vOff + 5];
+            u[uOff + 5] = (uint)c;
+            v[vOff + 5] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static void Copy(uint[] x, uint[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+            z[5] = x[5];
+        }
+
+        public static void Copy64(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+        }
+
+        public static uint[] Create()
+        {
+            return new uint[6];
+        }
+
+        public static ulong[] Create64()
+        {
+            return new ulong[3];
+        }
+
+        public static uint[] CreateExt()
+        {
+            return new uint[12];
+        }
+
+        public static ulong[] CreateExt64()
+        {
+            return new ulong[6];
+        }
+
+        public static bool Diff(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            bool pos = Gte(x, xOff, y, yOff);
+            if (pos)
+            {
+                Sub(x, xOff, y, yOff, z, zOff);
+            }
+            else
+            {
+                Sub(y, yOff, x, xOff, z, zOff);
+            }
+            return pos;
+        }
+
+        public static bool Eq(uint[] x, uint[] y)
+        {
+            for (int i = 5; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                    return false;
+            }
+            return true;
+        }
+
+        public static bool Eq64(ulong[] x, ulong[] y)
+        {
+            for (int i = 2; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static uint[] FromBigInteger(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 192)
+                throw new ArgumentException();
+
+            uint[] z = Create();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (uint)x.IntValue;
+                x = x.ShiftRight(32);
+            }
+            return z;
+        }
+
+        public static ulong[] FromBigInteger64(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 192)
+                throw new ArgumentException();
+
+            ulong[] z = Create64();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (ulong)x.LongValue;
+                x = x.ShiftRight(64);
+            }
+            return z;
+        }
+
+        public static uint GetBit(uint[] x, int bit)
+        {
+            if (bit == 0)
+            {
+                return x[0] & 1;
+            }
+            int w = bit >> 5;
+            if (w < 0 || w >= 6)
+            {
+                return 0;
+            }
+            int b = bit & 31;
+            return (x[w] >> b) & 1;
+        }
+
+        public static bool Gte(uint[] x, uint[] y)
+        {
+            for (int i = 5; i >= 0; --i)
+            {
+                uint x_i = x[i], y_i = y[i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool Gte(uint[] x, int xOff, uint[] y, int yOff)
+        {
+            for (int i = 5; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i], y_i = y[yOff + i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool IsOne(uint[] x)
+        {
+            if (x[0] != 1)
+            {
+                return false;
+            }
+            for (int i = 1; i < 6; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsOne64(ulong[] x)
+        {
+            if (x[0] != 1UL)
+            {
+                return false;
+            }
+            for (int i = 1; i < 3; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero(uint[] x)
+        {
+            for (int i = 0; i < 6; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero64(ulong[] x)
+        {
+            for (int i = 0; i < 3; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+            ulong y_5 = y[5];
+
+            {
+                ulong c = 0, x_0 = x[0];
+                c += x_0 * y_0;
+                zz[0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[4] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_5;
+                zz[5] = (uint)c;
+                c >>= 32;
+                zz[6] = (uint)c;
+            }
+
+            for (int i = 1; i < 6; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                c >>= 32;
+                zz[i + 6] = (uint)c;
+            }
+        }
+
+        public static void Mul(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+            ulong y_5 = y[yOff + 5];
+
+            {
+                ulong c = 0, x_0 = x[xOff + 0];
+                c += x_0 * y_0;
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_5;
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 6] = (uint)c;
+            }
+
+            for (int i = 1; i < 6; ++i)
+            {
+                ++zzOff;
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 6] = (uint)c;
+            }
+        }
+
+        public static uint MulAddTo(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+            ulong y_5 = y[5];
+
+            ulong zc = 0;
+            for (int i = 0; i < 6; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                c >>= 32;
+                c += zc + zz[i + 6];
+                zz[i + 6] = (uint)c;
+                zc = c >> 32;
+            }
+            return (uint)zc;
+        }
+
+        public static uint MulAddTo(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+            ulong y_5 = y[yOff + 5];
+
+            ulong zc = 0;
+            for (int i = 0; i < 6; ++i)
+            {
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += zc + zz[zzOff + 6];
+                zz[zzOff + 6] = (uint)c;
+                zc = c >> 32;
+                ++zzOff;
+            }
+            return (uint)zc;
+        }
+
+        public static ulong Mul33Add(uint w, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            Debug.Assert(w >> 31 == 0);
+
+            ulong c = 0, wVal = w;
+            ulong x0 = x[xOff + 0];
+            c += wVal * x0 + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong x1 = x[xOff + 1];
+            c += wVal * x1 + x0 + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            ulong x2 = x[xOff + 2];
+            c += wVal * x2 + x1 + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            ulong x3 = x[xOff + 3];
+            c += wVal * x3 + x2 + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            ulong x4 = x[xOff + 4];
+            c += wVal * x4 + x3 + y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            ulong x5 = x[xOff + 5];
+            c += wVal * x5 + x4 + y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += x5;
+            return c;
+        }
+
+        public static uint MulWordAddExt(uint x, uint[] yy, int yyOff, uint[] zz, int zzOff)
+        {
+            Debug.Assert(yyOff <= 6);
+            Debug.Assert(zzOff <= 6);
+            ulong c = 0, xVal = x;
+            c += xVal * yy[yyOff + 0] + zz[zzOff + 0];
+            zz[zzOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 1] + zz[zzOff + 1];
+            zz[zzOff + 1] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 2] + zz[zzOff + 2];
+            zz[zzOff + 2] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 3] + zz[zzOff + 3];
+            zz[zzOff + 3] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 4] + zz[zzOff + 4];
+            zz[zzOff + 4] = (uint)c;
+            c >>= 32;
+            c += xVal * yy[yyOff + 5] + zz[zzOff + 5];
+            zz[zzOff + 5] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Mul33DWordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 2);
+            ulong c = 0, xVal = x;
+            ulong y00 = y & M;
+            c += xVal * y00 + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong y01 = y >> 32;
+            c += xVal * y01 + y00 + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += y01 + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(6, z, zOff, 4);
+        }
+
+        public static uint Mul33WordAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <=3);
+            ulong c = 0, yVal = y;
+            c += yVal * x + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += yVal + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(6, z, zOff, 3);
+        }
+
+        public static uint MulWordDwordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 3);
+            ulong c = 0, xVal = x;
+            c += xVal * y + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * (y >> 32) + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(6, z, zOff, 3);
+        }
+
+        public static uint MulWord(uint x, uint[] y, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < 6);
+            return (uint)c;
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            ulong x_0 = x[0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 5, j = 12;
+                do
+                {
+                    ulong xVal = x[i--];
+                    ulong p = xVal * xVal;
+                    zz[--j] = (c << 31) | (uint)(p >> 33);
+                    zz[--j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[1];
+            ulong zz_2 = zz[2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[2];
+            ulong zz_3 = zz[3];
+            ulong zz_4 = zz[4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[3];
+            ulong zz_5 = zz[5];
+            ulong zz_6 = zz[6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[4];
+            ulong zz_7 = zz[7];
+            ulong zz_8 = zz[8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_5 &= M;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_6 &= M;
+                zz_8 += zz_7 >> 32;
+                zz_7 &= M;
+            }
+
+            ulong x_5 = x[5];
+            ulong zz_9 = zz[9];
+            ulong zz_10 = zz[10];
+            {
+                zz_5 += x_5 * x_0;
+                w = (uint)zz_5;
+                zz[5] = (w << 1) | c;
+                c = w >> 31;
+                zz_6 += (zz_5 >> 32) + x_5 * x_1;
+                zz_7 += (zz_6 >> 32) + x_5 * x_2;
+                zz_8 += (zz_7 >> 32) + x_5 * x_3;
+                zz_9 += (zz_8 >> 32) + x_5 * x_4;
+                zz_10 += zz_9 >> 32;
+            }
+
+            w = (uint)zz_6;
+            zz[6] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_7;
+            zz[7] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_8;
+            zz[8] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_9;
+            zz[9] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_10;
+            zz[10] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[11] + (uint)(zz_10 >> 32);
+            zz[11] = (w << 1) | c;
+        }
+
+        public static void Square(uint[] x, int xOff, uint[] zz, int zzOff)
+        {
+            ulong x_0 = x[xOff + 0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 5, j = 12;
+                do
+                {
+                    ulong xVal = x[xOff + i--];
+                    ulong p = xVal * xVal;
+                    zz[zzOff + --j] = (c << 31) | (uint)(p >> 33);
+                    zz[zzOff + --j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[zzOff + 0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[xOff + 1];
+            ulong zz_2 = zz[zzOff + 2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[zzOff + 1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[xOff + 2];
+            ulong zz_3 = zz[zzOff + 3];
+            ulong zz_4 = zz[zzOff + 4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[zzOff + 2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[xOff + 3];
+            ulong zz_5 = zz[zzOff + 5];
+            ulong zz_6 = zz[zzOff + 6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[zzOff + 3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[xOff + 4];
+            ulong zz_7 = zz[zzOff + 7];
+            ulong zz_8 = zz[zzOff + 8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[zzOff + 4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_5 &= M;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_6 &= M;
+                zz_8 += zz_7 >> 32;
+                zz_7 &= M;
+            }
+
+            ulong x_5 = x[xOff + 5];
+            ulong zz_9 = zz[zzOff + 9];
+            ulong zz_10 = zz[zzOff + 10];
+            {
+                zz_5 += x_5 * x_0;
+                w = (uint)zz_5;
+                zz[zzOff + 5] = (w << 1) | c;
+                c = w >> 31;
+                zz_6 += (zz_5 >> 32) + x_5 * x_1;
+                zz_7 += (zz_6 >> 32) + x_5 * x_2;
+                zz_8 += (zz_7 >> 32) + x_5 * x_3;
+                zz_9 += (zz_8 >> 32) + x_5 * x_4;
+                zz_10 += zz_9 >> 32;
+            }
+
+            w = (uint)zz_6;
+            zz[zzOff + 6] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_7;
+            zz[zzOff + 7] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_8;
+            zz[zzOff + 8] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_9;
+            zz[zzOff + 9] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_10;
+            zz[zzOff + 10] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[zzOff + 11] + (uint)(zz_10 >> 32);
+            zz[zzOff + 11] = (w << 1) | c;
+        }
+
+        public static int Sub(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)x[5] - y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int Sub(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)x[xOff + 0] - y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 1] - y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 2] - y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 3] - y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 4] - y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 5] - y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubBothFrom(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)z[5] - x[5] - y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)z[5] - x[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, int xOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)z[zOff + 0] - x[xOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - x[xOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 2] - x[xOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 3] - x[xOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 4] - x[xOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 5] - x[xOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static BigInteger ToBigInteger(uint[] x)
+        {
+            byte[] bs = new byte[24];
+            for (int i = 0; i < 6; ++i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                {
+                    Pack.UInt32_To_BE(x_i, bs, (5 - i) << 2);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static BigInteger ToBigInteger64(ulong[] x)
+        {
+            byte[] bs = new byte[24];
+            for (int i = 0; i < 3; ++i)
+            {
+                ulong x_i = x[i];
+                if (x_i != 0L)
+                {
+                    Pack.UInt64_To_BE(x_i, bs, (2 - i) << 3);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static void Zero(uint[] z)
+        {
+            z[0] = 0;
+            z[1] = 0;
+            z[2] = 0;
+            z[3] = 0;
+            z[4] = 0;
+            z[5] = 0;
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat224.cs b/crypto/src/math/raw/Nat224.cs
new file mode 100644
index 000000000..940e930ac
--- /dev/null
+++ b/crypto/src/math/raw/Nat224.cs
@@ -0,0 +1,1176 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat224
+    {
+        private const ulong M = 0xFFFFFFFFUL;
+
+        public static uint Add(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[6] + y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Add(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0;
+            c += (ulong)x[xOff + 0] + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 6] + y[yOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + y[5] + z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[6] + y[6] + z[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0;
+            c += (ulong)x[xOff + 0] + y[yOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + y[yOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + y[yOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + y[yOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + y[yOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + y[yOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 6] + y[yOff + 6] + z[zOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[6] + z[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, int xOff, uint[] z, int zOff, uint cIn)
+        {
+            ulong c = cIn;
+            c += (ulong)x[xOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 6] + z[zOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddToEachOther(uint[] u, int uOff, uint[] v, int vOff)
+        {
+            ulong c = 0;
+            c += (ulong)u[uOff + 0] + v[vOff + 0];
+            u[uOff + 0] = (uint)c;
+            v[vOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 1] + v[vOff + 1];
+            u[uOff + 1] = (uint)c;
+            v[vOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 2] + v[vOff + 2];
+            u[uOff + 2] = (uint)c;
+            v[vOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 3] + v[vOff + 3];
+            u[uOff + 3] = (uint)c;
+            v[vOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 4] + v[vOff + 4];
+            u[uOff + 4] = (uint)c;
+            v[vOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 5] + v[vOff + 5];
+            u[uOff + 5] = (uint)c;
+            v[vOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 6] + v[vOff + 6];
+            u[uOff + 6] = (uint)c;
+            v[vOff + 6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static void Copy(uint[] x, uint[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+            z[5] = x[5];
+            z[6] = x[6];
+        }
+
+        public static uint[] Create()
+        {
+            return new uint[7];
+        }
+
+        public static uint[] CreateExt()
+        {
+            return new uint[14];
+        }
+
+        public static bool Diff(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            bool pos = Gte(x, xOff, y, yOff);
+            if (pos)
+            {
+                Sub(x, xOff, y, yOff, z, zOff);
+            }
+            else
+            {
+                Sub(y, yOff, x, xOff, z, zOff);
+            }
+            return pos;
+        }
+
+        public static bool Eq(uint[] x, uint[] y)
+        {
+            for (int i = 6; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                    return false;
+            }
+            return true;
+        }
+
+        public static uint[] FromBigInteger(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 224)
+                throw new ArgumentException();
+
+            uint[] z = Create();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (uint)x.IntValue;
+                x = x.ShiftRight(32);
+            }
+            return z;
+        }
+
+        public static uint GetBit(uint[] x, int bit)
+        {
+            if (bit == 0)
+            {
+                return x[0] & 1;
+            }
+            int w = bit >> 5;
+            if (w < 0 || w >= 7)
+            {
+                return 0;
+            }
+            int b = bit & 31;
+            return (x[w] >> b) & 1;
+        }
+
+        public static bool Gte(uint[] x, uint[] y)
+        {
+            for (int i = 6; i >= 0; --i)
+            {
+                uint x_i = x[i], y_i = y[i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool Gte(uint[] x, int xOff, uint[] y, int yOff)
+        {
+            for (int i = 6; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i], y_i = y[yOff + i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool IsOne(uint[] x)
+        {
+            if (x[0] != 1)
+            {
+                return false;
+            }
+            for (int i = 1; i < 7; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero(uint[] x)
+        {
+            for (int i = 0; i < 7; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+            ulong y_5 = y[5];
+            ulong y_6 = y[6];
+
+            {
+                ulong c = 0, x_0 = x[0];
+                c += x_0 * y_0;
+                zz[0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[4] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_5;
+                zz[5] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_6;
+                zz[6] = (uint)c;
+                c >>= 32;
+                zz[7] = (uint)c;
+            }
+
+            for (int i = 1; i < 7; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[i + 6];
+                zz[i + 6] = (uint)c;
+                c >>= 32;
+                zz[i + 7] = (uint)c;
+            }
+        }
+
+        public static void Mul(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+            ulong y_5 = y[yOff + 5];
+            ulong y_6 = y[yOff + 6];
+
+            {
+                ulong c = 0, x_0 = x[xOff + 0];
+                c += x_0 * y_0;
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_5;
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_6;
+                zz[zzOff + 6] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 7] = (uint)c;
+            }
+
+            for (int i = 1; i < 7; ++i)
+            {
+                ++zzOff;
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[zzOff + 6];
+                zz[zzOff + 6] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 7] = (uint)c;
+            }
+        }
+
+        public static uint MulAddTo(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+            ulong y_5 = y[5];
+            ulong y_6 = y[6];
+
+            ulong zc = 0;
+            for (int i = 0; i < 7; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[i + 6];
+                zz[i + 6] = (uint)c;
+                c >>= 32;
+                c += zc + zz[i + 7];
+                zz[i + 7] = (uint)c;
+                zc = c >> 32;
+            }
+            return (uint)zc;
+        }
+
+        public static uint MulAddTo(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+            ulong y_5 = y[yOff + 5];
+            ulong y_6 = y[yOff + 6];
+
+            ulong zc = 0;
+            for (int i = 0; i < 7; ++i)
+            {
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[zzOff + 6];
+                zz[zzOff + 6] = (uint)c;
+                c >>= 32;
+                c += zc + zz[zzOff + 7];
+                zz[zzOff + 7] = (uint)c;
+                zc = c >> 32;
+                ++zzOff;
+            }
+            return (uint)zc;
+        }
+
+        public static ulong Mul33Add(uint w, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            Debug.Assert(w >> 31 == 0);
+
+            ulong c = 0, wVal = w;
+            ulong x0 = x[xOff + 0];
+            c += wVal * x0 + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong x1 = x[xOff + 1];
+            c += wVal * x1 + x0 + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            ulong x2 = x[xOff + 2];
+            c += wVal * x2 + x1 + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            ulong x3 = x[xOff + 3];
+            c += wVal * x3 + x2 + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            ulong x4 = x[xOff + 4];
+            c += wVal * x4 + x3 + y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            ulong x5 = x[xOff + 5];
+            c += wVal * x5 + x4 + y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            ulong x6 = x[xOff + 6];
+            c += wVal * x6 + x5 + y[yOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += x6;
+            return c;
+        }
+
+        public static uint MulByWord(uint x, uint[] z)
+        {
+            ulong c = 0, xVal = x;
+            c += xVal * (ulong)z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint MulByWordAddTo(uint x, uint[] y, uint[] z)
+        {
+            ulong c = 0, xVal = x;
+            c += xVal * (ulong)z[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[4] + y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[5] + y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[6] + y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint MulWordAddTo(uint x, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            c += xVal * y[yOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 6] + z[zOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Mul33DWordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 3);
+            ulong c = 0, xVal = x;
+            ulong y00 = y & M;
+            c += xVal * y00 + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong y01 = y >> 32;
+            c += xVal * y01 + y00 + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += y01 + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(7, z, zOff, 4);
+        }
+
+        public static uint Mul33WordAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 4);
+            ulong c = 0, yVal = y;
+            c += yVal * x + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += yVal + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(7, z, zOff, 3);
+        }
+
+        public static uint MulWordDwordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 4);
+            ulong c = 0, xVal = x;
+            c += xVal * y + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * (y >> 32) + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(7, z, zOff, 3);
+        }
+
+        public static uint MulWord(uint x, uint[] y, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < 7);
+            return (uint)c;
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            ulong x_0 = x[0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 6, j = 14;
+                do
+                {
+                    ulong xVal = x[i--];
+                    ulong p = xVal * xVal;
+                    zz[--j] = (c << 31) | (uint)(p >> 33);
+                    zz[--j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[1];
+            ulong zz_2 = zz[2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[2];
+            ulong zz_3 = zz[3];
+            ulong zz_4 = zz[4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[3];
+            ulong zz_5 = zz[5];
+            ulong zz_6 = zz[6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[4];
+            ulong zz_7 = zz[7];
+            ulong zz_8 = zz[8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_5 &= M;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_6 &= M;
+                zz_8 += zz_7 >> 32;
+                zz_7 &= M;
+            }
+
+            ulong x_5 = x[5];
+            ulong zz_9 = zz[9];
+            ulong zz_10 = zz[10];
+            {
+                zz_5 += x_5 * x_0;
+                w = (uint)zz_5;
+                zz[5] = (w << 1) | c;
+                c = w >> 31;
+                zz_6 += (zz_5 >> 32) + x_5 * x_1;
+                zz_7 += (zz_6 >> 32) + x_5 * x_2;
+                zz_6 &= M;
+                zz_8 += (zz_7 >> 32) + x_5 * x_3;
+                zz_7 &= M;
+                zz_9 += (zz_8 >> 32) + x_5 * x_4;
+                zz_8 &= M;
+                zz_10 += zz_9 >> 32;
+                zz_9 &= M;
+            }
+
+            ulong x_6 = x[6];
+            ulong zz_11 = zz[11];
+            ulong zz_12 = zz[12];
+            {
+                zz_6 += x_6 * x_0;
+                w = (uint)zz_6;
+                zz[6] = (w << 1) | c;
+                c = w >> 31;
+                zz_7 += (zz_6 >> 32) + x_6 * x_1;
+                zz_8 += (zz_7 >> 32) + x_6 * x_2;
+                zz_9 += (zz_8 >> 32) + x_6 * x_3;
+                zz_10 += (zz_9 >> 32) + x_6 * x_4;
+                zz_11 += (zz_10 >> 32) + x_6 * x_5;
+                zz_12 += zz_11 >> 32;
+            }
+
+            w = (uint)zz_7;
+            zz[7] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_8;
+            zz[8] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_9;
+            zz[9] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_10;
+            zz[10] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_11;
+            zz[11] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_12;
+            zz[12] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[13] + (uint)(zz_12 >> 32);
+            zz[13] = (w << 1) | c;
+        }
+
+        public static void Square(uint[] x, int xOff, uint[] zz, int zzOff)
+        {
+            ulong x_0 = x[xOff + 0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 6, j = 14;
+                do
+                {
+                    ulong xVal = x[xOff + i--];
+                    ulong p = xVal * xVal;
+                    zz[zzOff + --j] = (c << 31) | (uint)(p >> 33);
+                    zz[zzOff + --j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[zzOff + 0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[xOff + 1];
+            ulong zz_2 = zz[zzOff + 2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[zzOff + 1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[xOff + 2];
+            ulong zz_3 = zz[zzOff + 3];
+            ulong zz_4 = zz[zzOff + 4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[zzOff + 2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[xOff + 3];
+            ulong zz_5 = zz[zzOff + 5];
+            ulong zz_6 = zz[zzOff + 6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[zzOff + 3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[xOff + 4];
+            ulong zz_7 = zz[zzOff + 7];
+            ulong zz_8 = zz[zzOff + 8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[zzOff + 4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_5 &= M;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_6 &= M;
+                zz_8 += zz_7 >> 32;
+                zz_7 &= M;
+            }
+
+            ulong x_5 = x[xOff + 5];
+            ulong zz_9 = zz[zzOff + 9];
+            ulong zz_10 = zz[zzOff + 10];
+            {
+                zz_5 += x_5 * x_0;
+                w = (uint)zz_5;
+                zz[zzOff + 5] = (w << 1) | c;
+                c = w >> 31;
+                zz_6 += (zz_5 >> 32) + x_5 * x_1;
+                zz_7 += (zz_6 >> 32) + x_5 * x_2;
+                zz_6 &= M;
+                zz_8 += (zz_7 >> 32) + x_5 * x_3;
+                zz_7 &= M;
+                zz_9 += (zz_8 >> 32) + x_5 * x_4;
+                zz_8 &= M;
+                zz_10 += zz_9 >> 32;
+                zz_9 &= M;
+            }
+
+            ulong x_6 = x[xOff + 6];
+            ulong zz_11 = zz[zzOff + 11];
+            ulong zz_12 = zz[zzOff + 12];
+            {
+                zz_6 += x_6 * x_0;
+                w = (uint)zz_6;
+                zz[zzOff + 6] = (w << 1) | c;
+                c = w >> 31;
+                zz_7 += (zz_6 >> 32) + x_6 * x_1;
+                zz_8 += (zz_7 >> 32) + x_6 * x_2;
+                zz_9 += (zz_8 >> 32) + x_6 * x_3;
+                zz_10 += (zz_9 >> 32) + x_6 * x_4;
+                zz_11 += (zz_10 >> 32) + x_6 * x_5;
+                zz_12 += zz_11 >> 32;
+            }
+
+            w = (uint)zz_7;
+            zz[zzOff + 7] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_8;
+            zz[zzOff + 8] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_9;
+            zz[zzOff + 9] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_10;
+            zz[zzOff + 10] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_11;
+            zz[zzOff + 11] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_12;
+            zz[zzOff + 12] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[zzOff + 13] + (uint)(zz_12 >> 32);
+            zz[zzOff + 13] = (w << 1) | c;
+        }
+
+        public static int Sub(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)x[5] - y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (long)x[6] - y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int Sub(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)x[xOff + 0] - y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 1] - y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 2] - y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 3] - y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 4] - y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 5] - y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 6] - y[yOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubBothFrom(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)z[5] - x[5] - y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (long)z[6] - x[6] - y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)z[5] - x[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (long)z[6] - x[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, int xOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)z[zOff + 0] - x[xOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - x[xOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 2] - x[xOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 3] - x[xOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 4] - x[xOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 5] - x[xOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 6] - x[xOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static BigInteger ToBigInteger(uint[] x)
+        {
+            byte[] bs = new byte[28];
+            for (int i = 0; i < 7; ++i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                {
+                    Pack.UInt32_To_BE(x_i, bs, (6 - i) << 2);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static void Zero(uint[] z)
+        {
+            z[0] = 0;
+            z[1] = 0;
+            z[2] = 0;
+            z[3] = 0;
+            z[4] = 0;
+            z[5] = 0;
+            z[6] = 0;
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat256.cs b/crypto/src/math/raw/Nat256.cs
new file mode 100644
index 000000000..19455031a
--- /dev/null
+++ b/crypto/src/math/raw/Nat256.cs
@@ -0,0 +1,1387 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat256
+    {
+        private const ulong M = 0xFFFFFFFFUL;
+
+        public static uint Add(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[6] + y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[7] + y[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Add(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0;
+            c += (ulong)x[xOff + 0] + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 6] + y[yOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 7] + y[yOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, uint[] y, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + y[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + y[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + y[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + y[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + y[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + y[5] + z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[6] + y[6] + z[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[7] + y[7] + z[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddBothTo(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0;
+            c += (ulong)x[xOff + 0] + y[yOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + y[yOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + y[yOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + y[yOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + y[yOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + y[yOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 6] + y[yOff + 6] + z[zOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 7] + y[yOff + 7] + z[zOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, uint[] z)
+        {
+            ulong c = 0;
+            c += (ulong)x[0] + z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[1] + z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[2] + z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[3] + z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[4] + z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[5] + z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[6] + z[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[7] + z[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddTo(uint[] x, int xOff, uint[] z, int zOff, uint cIn)
+        {
+            ulong c = cIn;
+            c += (ulong)x[xOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 6] + z[zOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += (ulong)x[xOff + 7] + z[zOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint AddToEachOther(uint[] u, int uOff, uint[] v, int vOff)
+        {
+            ulong c = 0;
+            c += (ulong)u[uOff + 0] + v[vOff + 0];
+            u[uOff + 0] = (uint)c;
+            v[vOff + 0] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 1] + v[vOff + 1];
+            u[uOff + 1] = (uint)c;
+            v[vOff + 1] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 2] + v[vOff + 2];
+            u[uOff + 2] = (uint)c;
+            v[vOff + 2] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 3] + v[vOff + 3];
+            u[uOff + 3] = (uint)c;
+            v[vOff + 3] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 4] + v[vOff + 4];
+            u[uOff + 4] = (uint)c;
+            v[vOff + 4] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 5] + v[vOff + 5];
+            u[uOff + 5] = (uint)c;
+            v[vOff + 5] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 6] + v[vOff + 6];
+            u[uOff + 6] = (uint)c;
+            v[vOff + 6] = (uint)c;
+            c >>= 32;
+            c += (ulong)u[uOff + 7] + v[vOff + 7];
+            u[uOff + 7] = (uint)c;
+            v[vOff + 7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static void Copy(uint[] x, uint[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+            z[5] = x[5];
+            z[6] = x[6];
+            z[7] = x[7];
+        }
+
+        public static void Copy64(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+        }
+
+        public static uint[] Create()
+        {
+            return new uint[8];
+        }
+
+        public static ulong[] Create64()
+        {
+            return new ulong[4];
+        }
+
+        public static uint[] CreateExt()
+        {
+            return new uint[16];
+        }
+
+        public static ulong[] CreateExt64()
+        {
+            return new ulong[8];
+        }
+
+        public static bool Diff(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            bool pos = Gte(x, xOff, y, yOff);
+            if (pos)
+            {
+                Sub(x, xOff, y, yOff, z, zOff);
+            }
+            else
+            {
+                Sub(y, yOff, x, xOff, z, zOff);
+            }
+            return pos;
+        }
+
+        public static bool Eq(uint[] x, uint[] y)
+        {
+            for (int i = 7; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                    return false;
+            }
+            return true;
+        }
+
+        public static bool Eq64(ulong[] x, ulong[] y)
+        {
+            for (int i = 3; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static uint[] FromBigInteger(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 256)
+                throw new ArgumentException();
+
+            uint[] z = Create();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (uint)x.IntValue;
+                x = x.ShiftRight(32);
+            }
+            return z;
+        }
+
+        public static ulong[] FromBigInteger64(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 256)
+                throw new ArgumentException();
+
+            ulong[] z = Create64();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (ulong)x.LongValue;
+                x = x.ShiftRight(64);
+            }
+            return z;
+        }
+
+        public static uint GetBit(uint[] x, int bit)
+        {
+            if (bit == 0)
+            {
+                return x[0] & 1;
+            }
+            if ((bit & 255) != bit)
+            {
+                return 0;
+            }
+            int w = bit >> 5;
+            int b = bit & 31;
+            return (x[w] >> b) & 1;
+        }
+
+        public static bool Gte(uint[] x, uint[] y)
+        {
+            for (int i = 7; i >= 0; --i)
+            {
+                uint x_i = x[i], y_i = y[i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool Gte(uint[] x, int xOff, uint[] y, int yOff)
+        {
+            for (int i = 7; i >= 0; --i)
+            {
+                uint x_i = x[xOff + i], y_i = y[yOff + i];
+                if (x_i < y_i)
+                    return false;
+                if (x_i > y_i)
+                    return true;
+            }
+            return true;
+        }
+
+        public static bool IsOne(uint[] x)
+        {
+            if (x[0] != 1)
+            {
+                return false;
+            }
+            for (int i = 1; i < 8; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsOne64(ulong[] x)
+        {
+            if (x[0] != 1UL)
+            {
+                return false;
+            }
+            for (int i = 1; i < 4; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero(uint[] x)
+        {
+            for (int i = 0; i < 8; ++i)
+            {
+                if (x[i] != 0)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero64(ulong[] x)
+        {
+            for (int i = 0; i < 4; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+            ulong y_5 = y[5];
+            ulong y_6 = y[6];
+            ulong y_7 = y[7];
+
+            {
+                ulong c = 0, x_0 = x[0];
+                c += x_0 * y_0;
+                zz[0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[4] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_5;
+                zz[5] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_6;
+                zz[6] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_7;
+                zz[7] = (uint)c;
+                c >>= 32;
+                zz[8] = (uint)c;
+            }
+
+            for (int i = 1; i < 8; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[i + 6];
+                zz[i + 6] = (uint)c;
+                c >>= 32;
+                c += x_i * y_7 + zz[i + 7];
+                zz[i + 7] = (uint)c;
+                c >>= 32;
+                zz[i + 8] = (uint)c;
+            }
+        }
+
+        public static void Mul(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+            ulong y_5 = y[yOff + 5];
+            ulong y_6 = y[yOff + 6];
+            ulong y_7 = y[yOff + 7];
+
+            {
+                ulong c = 0, x_0 = x[xOff + 0];
+                c += x_0 * y_0;
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_1;
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_2;
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_3;
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_4;
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_5;
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_6;
+                zz[zzOff + 6] = (uint)c;
+                c >>= 32;
+                c += x_0 * y_7;
+                zz[zzOff + 7] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 8] = (uint)c;
+            }
+
+            for (int i = 1; i < 8; ++i)
+            {
+                ++zzOff;
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[zzOff + 6];
+                zz[zzOff + 6] = (uint)c;
+                c >>= 32;
+                c += x_i * y_7 + zz[zzOff + 7];
+                zz[zzOff + 7] = (uint)c;
+                c >>= 32;
+                zz[zzOff + 8] = (uint)c;
+            }
+        }
+
+        public static uint MulAddTo(uint[] x, uint[] y, uint[] zz)
+        {
+            ulong y_0 = y[0];
+            ulong y_1 = y[1];
+            ulong y_2 = y[2];
+            ulong y_3 = y[3];
+            ulong y_4 = y[4];
+            ulong y_5 = y[5];
+            ulong y_6 = y[6];
+            ulong y_7 = y[7];
+
+            ulong zc = 0;
+            for (int i = 0; i < 8; ++i)
+            {
+                ulong c = 0, x_i = x[i];
+                c += x_i * y_0 + zz[i + 0];
+                zz[i + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[i + 1];
+                zz[i + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[i + 2];
+                zz[i + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[i + 3];
+                zz[i + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[i + 4];
+                zz[i + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[i + 5];
+                zz[i + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[i + 6];
+                zz[i + 6] = (uint)c;
+                c >>= 32;
+                c += x_i * y_7 + zz[i + 7];
+                zz[i + 7] = (uint)c;
+                c >>= 32;
+                c += zc + zz[i + 8];
+                zz[i + 8] = (uint)c;
+                zc = c >> 32;
+            }
+            return (uint)zc;
+        }
+
+        public static uint MulAddTo(uint[] x, int xOff, uint[] y, int yOff, uint[] zz, int zzOff)
+        {
+            ulong y_0 = y[yOff + 0];
+            ulong y_1 = y[yOff + 1];
+            ulong y_2 = y[yOff + 2];
+            ulong y_3 = y[yOff + 3];
+            ulong y_4 = y[yOff + 4];
+            ulong y_5 = y[yOff + 5];
+            ulong y_6 = y[yOff + 6];
+            ulong y_7 = y[yOff + 7];
+
+            ulong zc = 0;
+            for (int i = 0; i < 8; ++i)
+            {
+                ulong c = 0, x_i = x[xOff + i];
+                c += x_i * y_0 + zz[zzOff + 0];
+                zz[zzOff + 0] = (uint)c;
+                c >>= 32;
+                c += x_i * y_1 + zz[zzOff + 1];
+                zz[zzOff + 1] = (uint)c;
+                c >>= 32;
+                c += x_i * y_2 + zz[zzOff + 2];
+                zz[zzOff + 2] = (uint)c;
+                c >>= 32;
+                c += x_i * y_3 + zz[zzOff + 3];
+                zz[zzOff + 3] = (uint)c;
+                c >>= 32;
+                c += x_i * y_4 + zz[zzOff + 4];
+                zz[zzOff + 4] = (uint)c;
+                c >>= 32;
+                c += x_i * y_5 + zz[zzOff + 5];
+                zz[zzOff + 5] = (uint)c;
+                c >>= 32;
+                c += x_i * y_6 + zz[zzOff + 6];
+                zz[zzOff + 6] = (uint)c;
+                c >>= 32;
+                c += x_i * y_7 + zz[zzOff + 7];
+                zz[zzOff + 7] = (uint)c;
+                c >>= 32;
+                c += zc + zz[zzOff + 8];
+                zz[zzOff + 8] = (uint)c;
+                zc = c >> 32;
+                ++zzOff;
+            }
+            return (uint)zc;
+        }
+
+        public static ulong Mul33Add(uint w, uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            Debug.Assert(w >> 31 == 0);
+
+            ulong c = 0, wVal = w;
+            ulong x0 = x[xOff + 0];
+            c += wVal * x0 + y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong x1 = x[xOff + 1];
+            c += wVal * x1 + x0 + y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            ulong x2 = x[xOff + 2];
+            c += wVal * x2 + x1 + y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            ulong x3 = x[xOff + 3];
+            c += wVal * x3 + x2 + y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            ulong x4 = x[xOff + 4];
+            c += wVal * x4 + x3 + y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            ulong x5 = x[xOff + 5];
+            c += wVal * x5 + x4 + y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            ulong x6 = x[xOff + 6];
+            c += wVal * x6 + x5 + y[yOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            ulong x7 = x[xOff + 7];
+            c += wVal * x7 + x6 + y[yOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            c += x7;
+            return c;
+        }
+
+        public static uint MulByWord(uint x, uint[] z)
+        {
+            ulong c = 0, xVal = x;
+            c += xVal * (ulong)z[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint MulByWordAddTo(uint x, uint[] y, uint[] z)
+        {
+            ulong c = 0, xVal = x;
+            c += xVal * (ulong)z[0] + y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[1] + y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[2] + y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[3] + y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[4] + y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[5] + y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[6] + y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += xVal * (ulong)z[7] + y[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint MulWordAddTo(uint x, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            c += xVal * y[yOff + 0] + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 1] + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 2] + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 3] + z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 4] + z[zOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 5] + z[zOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 6] + z[zOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += xVal * y[yOff + 7] + z[zOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            return (uint)c;
+        }
+
+        public static uint Mul33DWordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 4);
+            ulong c = 0, xVal = x;
+            ulong y00 = y & M;
+            c += xVal * y00 + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            ulong y01 = y >> 32;
+            c += xVal * y01 + y00 + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += y01 + z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(8, z, zOff, 4);
+        }
+
+        public static uint Mul33WordAdd(uint x, uint y, uint[] z, int zOff)
+        {
+            Debug.Assert(x >> 31 == 0);
+            Debug.Assert(zOff <= 5);
+            ulong c = 0, yVal = y;
+            c += yVal * x + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += yVal + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(8, z, zOff, 3);
+        }
+
+        public static uint MulWordDwordAdd(uint x, ulong y, uint[] z, int zOff)
+        {
+            Debug.Assert(zOff <= 5);
+            ulong c = 0, xVal = x;
+            c += xVal * y + z[zOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += xVal * (y >> 32) + z[zOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += z[zOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            return c == 0 ? 0 : Nat.IncAt(8, z, zOff, 3);
+        }
+
+        public static uint MulWord(uint x, uint[] y, uint[] z, int zOff)
+        {
+            ulong c = 0, xVal = x;
+            int i = 0;
+            do
+            {
+                c += xVal * y[i];
+                z[zOff + i] = (uint)c;
+                c >>= 32;
+            }
+            while (++i < 8);
+            return (uint)c;
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            ulong x_0 = x[0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 7, j = 16;
+                do
+                {
+                    ulong xVal = x[i--];
+                    ulong p = xVal * xVal;
+                    zz[--j] = (c << 31) | (uint)(p >> 33);
+                    zz[--j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[1];
+            ulong zz_2 = zz[2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[2];
+            ulong zz_3 = zz[3];
+            ulong zz_4 = zz[4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[3];
+            ulong zz_5 = zz[5];
+            ulong zz_6 = zz[6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[4];
+            ulong zz_7 = zz[7];
+            ulong zz_8 = zz[8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_5 &= M;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_6 &= M;
+                zz_8 += zz_7 >> 32;
+                zz_7 &= M;
+            }
+
+            ulong x_5 = x[5];
+            ulong zz_9 = zz[9];
+            ulong zz_10 = zz[10];
+            {
+                zz_5 += x_5 * x_0;
+                w = (uint)zz_5;
+                zz[5] = (w << 1) | c;
+                c = w >> 31;
+                zz_6 += (zz_5 >> 32) + x_5 * x_1;
+                zz_7 += (zz_6 >> 32) + x_5 * x_2;
+                zz_6 &= M;
+                zz_8 += (zz_7 >> 32) + x_5 * x_3;
+                zz_7 &= M;
+                zz_9 += (zz_8 >> 32) + x_5 * x_4;
+                zz_8 &= M;
+                zz_10 += zz_9 >> 32;
+                zz_9 &= M;
+            }
+
+            ulong x_6 = x[6];
+            ulong zz_11 = zz[11];
+            ulong zz_12 = zz[12];
+            {
+                zz_6 += x_6 * x_0;
+                w = (uint)zz_6;
+                zz[6] = (w << 1) | c;
+                c = w >> 31;
+                zz_7 += (zz_6 >> 32) + x_6 * x_1;
+                zz_8 += (zz_7 >> 32) + x_6 * x_2;
+                zz_7 &= M;
+                zz_9 += (zz_8 >> 32) + x_6 * x_3;
+                zz_8 &= M;
+                zz_10 += (zz_9 >> 32) + x_6 * x_4;
+                zz_9 &= M;
+                zz_11 += (zz_10 >> 32) + x_6 * x_5;
+                zz_10 &= M;
+                zz_12 += zz_11 >> 32;
+                zz_11 &= M;
+            }
+
+            ulong x_7 = x[7];
+            ulong zz_13 = zz[13];
+            ulong zz_14 = zz[14];
+            {
+                zz_7 += x_7 * x_0;
+                w = (uint)zz_7;
+                zz[7] = (w << 1) | c;
+                c = w >> 31;
+                zz_8 += (zz_7 >> 32) + x_7 * x_1;
+                zz_9 += (zz_8 >> 32) + x_7 * x_2;
+                zz_10 += (zz_9 >> 32) + x_7 * x_3;
+                zz_11 += (zz_10 >> 32) + x_7 * x_4;
+                zz_12 += (zz_11 >> 32) + x_7 * x_5;
+                zz_13 += (zz_12 >> 32) + x_7 * x_6;
+                zz_14 += zz_13 >> 32;
+            }
+
+            w = (uint)zz_8;
+            zz[8] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_9;
+            zz[9] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_10;
+            zz[10] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_11;
+            zz[11] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_12;
+            zz[12] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_13;
+            zz[13] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_14;
+            zz[14] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[15] + (uint)(zz_14 >> 32);
+            zz[15] = (w << 1) | c;
+        }
+
+        public static void Square(uint[] x, int xOff, uint[] zz, int zzOff)
+        {
+            ulong x_0 = x[xOff + 0];
+            ulong zz_1;
+
+            uint c = 0, w;
+            {
+                int i = 7, j = 16;
+                do
+                {
+                    ulong xVal = x[xOff + i--];
+                    ulong p = xVal * xVal;
+                    zz[zzOff + --j] = (c << 31) | (uint)(p >> 33);
+                    zz[zzOff + --j] = (uint)(p >> 1);
+                    c = (uint)p;
+                }
+                while (i > 0);
+
+                {
+                    ulong p = x_0 * x_0;
+                    zz_1 = (ulong)(c << 31) | (p >> 33);
+                    zz[zzOff + 0] = (uint)p;
+                    c = (uint)(p >> 32) & 1;
+                }
+            }
+
+            ulong x_1 = x[xOff + 1];
+            ulong zz_2 = zz[zzOff + 2];
+
+            {
+                zz_1 += x_1 * x_0;
+                w = (uint)zz_1;
+                zz[zzOff + 1] = (w << 1) | c;
+                c = w >> 31;
+                zz_2 += zz_1 >> 32;
+            }
+
+            ulong x_2 = x[xOff + 2];
+            ulong zz_3 = zz[zzOff + 3];
+            ulong zz_4 = zz[zzOff + 4];
+            {
+                zz_2 += x_2 * x_0;
+                w = (uint)zz_2;
+                zz[zzOff + 2] = (w << 1) | c;
+                c = w >> 31;
+                zz_3 += (zz_2 >> 32) + x_2 * x_1;
+                zz_4 += zz_3 >> 32;
+                zz_3 &= M;
+            }
+
+            ulong x_3 = x[xOff + 3];
+            ulong zz_5 = zz[zzOff + 5];
+            ulong zz_6 = zz[zzOff + 6];
+            {
+                zz_3 += x_3 * x_0;
+                w = (uint)zz_3;
+                zz[zzOff + 3] = (w << 1) | c;
+                c = w >> 31;
+                zz_4 += (zz_3 >> 32) + x_3 * x_1;
+                zz_5 += (zz_4 >> 32) + x_3 * x_2;
+                zz_4 &= M;
+                zz_6 += zz_5 >> 32;
+                zz_5 &= M;
+            }
+
+            ulong x_4 = x[xOff + 4];
+            ulong zz_7 = zz[zzOff + 7];
+            ulong zz_8 = zz[zzOff + 8];
+            {
+                zz_4 += x_4 * x_0;
+                w = (uint)zz_4;
+                zz[zzOff + 4] = (w << 1) | c;
+                c = w >> 31;
+                zz_5 += (zz_4 >> 32) + x_4 * x_1;
+                zz_6 += (zz_5 >> 32) + x_4 * x_2;
+                zz_5 &= M;
+                zz_7 += (zz_6 >> 32) + x_4 * x_3;
+                zz_6 &= M;
+                zz_8 += zz_7 >> 32;
+                zz_7 &= M;
+            }
+
+            ulong x_5 = x[xOff + 5];
+            ulong zz_9 = zz[zzOff + 9];
+            ulong zz_10 = zz[zzOff + 10];
+            {
+                zz_5 += x_5 * x_0;
+                w = (uint)zz_5;
+                zz[zzOff + 5] = (w << 1) | c;
+                c = w >> 31;
+                zz_6 += (zz_5 >> 32) + x_5 * x_1;
+                zz_7 += (zz_6 >> 32) + x_5 * x_2;
+                zz_6 &= M;
+                zz_8 += (zz_7 >> 32) + x_5 * x_3;
+                zz_7 &= M;
+                zz_9 += (zz_8 >> 32) + x_5 * x_4;
+                zz_8 &= M;
+                zz_10 += zz_9 >> 32;
+                zz_9 &= M;
+            }
+
+            ulong x_6 = x[xOff + 6];
+            ulong zz_11 = zz[zzOff + 11];
+            ulong zz_12 = zz[zzOff + 12];
+            {
+                zz_6 += x_6 * x_0;
+                w = (uint)zz_6;
+                zz[zzOff + 6] = (w << 1) | c;
+                c = w >> 31;
+                zz_7 += (zz_6 >> 32) + x_6 * x_1;
+                zz_8 += (zz_7 >> 32) + x_6 * x_2;
+                zz_7 &= M;
+                zz_9 += (zz_8 >> 32) + x_6 * x_3;
+                zz_8 &= M;
+                zz_10 += (zz_9 >> 32) + x_6 * x_4;
+                zz_9 &= M;
+                zz_11 += (zz_10 >> 32) + x_6 * x_5;
+                zz_10 &= M;
+                zz_12 += zz_11 >> 32;
+                zz_11 &= M;
+            }
+
+            ulong x_7 = x[xOff + 7];
+            ulong zz_13 = zz[zzOff + 13];
+            ulong zz_14 = zz[zzOff + 14];
+            {
+                zz_7 += x_7 * x_0;
+                w = (uint)zz_7;
+                zz[zzOff + 7] = (w << 1) | c;
+                c = w >> 31;
+                zz_8 += (zz_7 >> 32) + x_7 * x_1;
+                zz_9 += (zz_8 >> 32) + x_7 * x_2;
+                zz_10 += (zz_9 >> 32) + x_7 * x_3;
+                zz_11 += (zz_10 >> 32) + x_7 * x_4;
+                zz_12 += (zz_11 >> 32) + x_7 * x_5;
+                zz_13 += (zz_12 >> 32) + x_7 * x_6;
+                zz_14 += zz_13 >> 32;
+            }
+
+            w = (uint)zz_8;
+            zz[zzOff + 8] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_9;
+            zz[zzOff + 9] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_10;
+            zz[zzOff + 10] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_11;
+            zz[zzOff + 11] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_12;
+            zz[zzOff + 12] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_13;
+            zz[zzOff + 13] = (w << 1) | c;
+            c = w >> 31;
+            w = (uint)zz_14;
+            zz[zzOff + 14] = (w << 1) | c;
+            c = w >> 31;
+            w = zz[zzOff + 15] + (uint)(zz_14 >> 32);
+            zz[zzOff + 15] = (w << 1) | c;
+        }
+
+        public static int Sub(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)x[5] - y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (long)x[6] - y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += (long)x[7] - y[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int Sub(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)x[xOff + 0] - y[yOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 1] - y[yOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 2] - y[yOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 3] - y[yOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 4] - y[yOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 5] - y[yOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 6] - y[yOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += (long)x[xOff + 7] - y[yOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubBothFrom(uint[] x, uint[] y, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0] - y[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1] - y[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2] - y[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3] - y[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4] - y[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)z[5] - x[5] - y[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (long)z[6] - x[6] - y[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += (long)z[7] - x[7] - y[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, uint[] z)
+        {
+            long c = 0;
+            c += (long)z[0] - x[0];
+            z[0] = (uint)c;
+            c >>= 32;
+            c += (long)z[1] - x[1];
+            z[1] = (uint)c;
+            c >>= 32;
+            c += (long)z[2] - x[2];
+            z[2] = (uint)c;
+            c >>= 32;
+            c += (long)z[3] - x[3];
+            z[3] = (uint)c;
+            c >>= 32;
+            c += (long)z[4] - x[4];
+            z[4] = (uint)c;
+            c >>= 32;
+            c += (long)z[5] - x[5];
+            z[5] = (uint)c;
+            c >>= 32;
+            c += (long)z[6] - x[6];
+            z[6] = (uint)c;
+            c >>= 32;
+            c += (long)z[7] - x[7];
+            z[7] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static int SubFrom(uint[] x, int xOff, uint[] z, int zOff)
+        {
+            long c = 0;
+            c += (long)z[zOff + 0] - x[xOff + 0];
+            z[zOff + 0] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 1] - x[xOff + 1];
+            z[zOff + 1] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 2] - x[xOff + 2];
+            z[zOff + 2] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 3] - x[xOff + 3];
+            z[zOff + 3] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 4] - x[xOff + 4];
+            z[zOff + 4] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 5] - x[xOff + 5];
+            z[zOff + 5] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 6] - x[xOff + 6];
+            z[zOff + 6] = (uint)c;
+            c >>= 32;
+            c += (long)z[zOff + 7] - x[xOff + 7];
+            z[zOff + 7] = (uint)c;
+            c >>= 32;
+            return (int)c;
+        }
+
+        public static BigInteger ToBigInteger(uint[] x)
+        {
+            byte[] bs = new byte[32];
+            for (int i = 0; i < 8; ++i)
+            {
+                uint x_i = x[i];
+                if (x_i != 0)
+                {
+                    Pack.UInt32_To_BE(x_i, bs, (7 - i) << 2);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static BigInteger ToBigInteger64(ulong[] x)
+        {
+            byte[] bs = new byte[32];
+            for (int i = 0; i < 4; ++i)
+            {
+                ulong x_i = x[i];
+                if (x_i != 0L)
+                {
+                    Pack.UInt64_To_BE(x_i, bs, (3 - i) << 3);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+
+        public static void Zero(uint[] z)
+        {
+            z[0] = 0;
+            z[1] = 0;
+            z[2] = 0;
+            z[3] = 0;
+            z[4] = 0;
+            z[5] = 0;
+            z[6] = 0;
+            z[7] = 0;
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat320.cs b/crypto/src/math/raw/Nat320.cs
new file mode 100644
index 000000000..c7daa71e2
--- /dev/null
+++ b/crypto/src/math/raw/Nat320.cs
@@ -0,0 +1,98 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat320
+    {
+        public static void Copy64(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+        }
+
+        public static ulong[] Create64()
+        {
+            return new ulong[5];
+        }
+
+        public static ulong[] CreateExt64()
+        {
+            return new ulong[10];
+        }
+
+        public static bool Eq64(ulong[] x, ulong[] y)
+        {
+            for (int i = 4; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static ulong[] FromBigInteger64(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 320)
+                throw new ArgumentException();
+
+            ulong[] z = Create64();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (ulong)x.LongValue;
+                x = x.ShiftRight(64);
+            }
+            return z;
+        }
+
+        public static bool IsOne64(ulong[] x)
+        {
+            if (x[0] != 1UL)
+            {
+                return false;
+            }
+            for (int i = 1; i < 5; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero64(ulong[] x)
+        {
+            for (int i = 0; i < 5; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static BigInteger ToBigInteger64(ulong[] x)
+        {
+            byte[] bs = new byte[40];
+            for (int i = 0; i < 5; ++i)
+            {
+                ulong x_i = x[i];
+                if (x_i != 0L)
+                {
+                    Pack.UInt64_To_BE(x_i, bs, (4 - i) << 3);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat384.cs b/crypto/src/math/raw/Nat384.cs
new file mode 100644
index 000000000..ed1c47e8c
--- /dev/null
+++ b/crypto/src/math/raw/Nat384.cs
@@ -0,0 +1,46 @@
+using System;
+using System.Diagnostics;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat384
+    {
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            Nat192.Mul(x, y, zz);
+            Nat192.Mul(x, 6, y, 6, zz, 12);
+
+            uint c18 = Nat192.AddToEachOther(zz, 6, zz, 12);
+            uint c12 = c18 + Nat192.AddTo(zz, 0, zz, 6, 0);
+            c18 += Nat192.AddTo(zz, 18, zz, 12, c12);
+
+            uint[] dx = Nat192.Create(), dy = Nat192.Create();
+            bool neg = Nat192.Diff(x, 6, x, 0, dx, 0) != Nat192.Diff(y, 6, y, 0, dy, 0);
+
+            uint[] tt = Nat192.CreateExt();
+            Nat192.Mul(dx, dy, tt);
+
+            c18 += neg ? Nat.AddTo(12, tt, 0, zz, 6) : (uint)Nat.SubFrom(12, tt, 0, zz, 6);
+            Nat.AddWordAt(24, c18, zz, 18);
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            Nat192.Square(x, zz);
+            Nat192.Square(x, 6, zz, 12);
+
+            uint c18 = Nat192.AddToEachOther(zz, 6, zz, 12);
+            uint c12 = c18 + Nat192.AddTo(zz, 0, zz, 6, 0);
+            c18 += Nat192.AddTo(zz, 18, zz, 12, c12);
+
+            uint[] dx = Nat192.Create();
+            Nat192.Diff(x, 6, x, 0, dx, 0);
+
+            uint[] m = Nat192.CreateExt();
+            Nat192.Square(dx, m);
+
+            c18 += (uint)Nat.SubFrom(12, m, 0, zz, 6);
+            Nat.AddWordAt(24, c18, zz, 18);
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat448.cs b/crypto/src/math/raw/Nat448.cs
new file mode 100644
index 000000000..52a253f1b
--- /dev/null
+++ b/crypto/src/math/raw/Nat448.cs
@@ -0,0 +1,100 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat448
+    {
+        public static void Copy64(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+            z[5] = x[5];
+            z[6] = x[6];
+        }
+
+        public static ulong[] Create64()
+        {
+            return new ulong[7];
+        }
+
+        public static ulong[] CreateExt64()
+        {
+            return new ulong[14];
+        }
+
+        public static bool Eq64(ulong[] x, ulong[] y)
+        {
+            for (int i = 6; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static ulong[] FromBigInteger64(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 448)
+                throw new ArgumentException();
+
+            ulong[] z = Create64();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (ulong)x.LongValue;
+                x = x.ShiftRight(64);
+            }
+            return z;
+        }
+
+        public static bool IsOne64(ulong[] x)
+        {
+            if (x[0] != 1UL)
+            {
+                return false;
+            }
+            for (int i = 1; i < 7; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero64(ulong[] x)
+        {
+            for (int i = 0; i < 7; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static BigInteger ToBigInteger64(ulong[] x)
+        {
+            byte[] bs = new byte[56];
+            for (int i = 0; i < 7; ++i)
+            {
+                ulong x_i = x[i];
+                if (x_i != 0L)
+                {
+                    Pack.UInt64_To_BE(x_i, bs, (6 - i) << 3);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat512.cs b/crypto/src/math/raw/Nat512.cs
new file mode 100644
index 000000000..a9ef2b3b6
--- /dev/null
+++ b/crypto/src/math/raw/Nat512.cs
@@ -0,0 +1,46 @@
+using System;
+using System.Diagnostics;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat512
+    {
+        public static void Mul(uint[] x, uint[] y, uint[] zz)
+        {
+            Nat256.Mul(x, y, zz);
+            Nat256.Mul(x, 8, y, 8, zz, 16);
+
+            uint c24 = Nat256.AddToEachOther(zz, 8, zz, 16);
+            uint c16 = c24 + Nat256.AddTo(zz, 0, zz, 8, 0);
+            c24 += Nat256.AddTo(zz, 24, zz, 16, c16);
+
+            uint[] dx = Nat256.Create(), dy = Nat256.Create();
+            bool neg = Nat256.Diff(x, 8, x, 0, dx, 0) != Nat256.Diff(y, 8, y, 0, dy, 0);
+
+            uint[] tt = Nat256.CreateExt();
+            Nat256.Mul(dx, dy, tt);
+
+            c24 += neg ? Nat.AddTo(16, tt, 0, zz, 8) : (uint)Nat.SubFrom(16, tt, 0, zz, 8);
+            Nat.AddWordAt(32, c24, zz, 24); 
+        }
+
+        public static void Square(uint[] x, uint[] zz)
+        {
+            Nat256.Square(x, zz);
+            Nat256.Square(x, 8, zz, 16);
+
+            uint c24 = Nat256.AddToEachOther(zz, 8, zz, 16);
+            uint c16 = c24 + Nat256.AddTo(zz, 0, zz, 8, 0);
+            c24 += Nat256.AddTo(zz, 24, zz, 16, c16);
+
+            uint[] dx = Nat256.Create();
+            Nat256.Diff(x, 8, x, 0, dx, 0);
+
+            uint[] m = Nat256.CreateExt();
+            Nat256.Square(dx, m);
+
+            c24 += (uint)Nat.SubFrom(16, m, 0, zz, 8);
+            Nat.AddWordAt(32, c24, zz, 24); 
+        }
+    }
+}
diff --git a/crypto/src/math/raw/Nat576.cs b/crypto/src/math/raw/Nat576.cs
new file mode 100644
index 000000000..813fb86be
--- /dev/null
+++ b/crypto/src/math/raw/Nat576.cs
@@ -0,0 +1,102 @@
+using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Crypto.Utilities;
+
+namespace Org.BouncyCastle.Math.Raw
+{
+    internal abstract class Nat576
+    {
+        public static void Copy64(ulong[] x, ulong[] z)
+        {
+            z[0] = x[0];
+            z[1] = x[1];
+            z[2] = x[2];
+            z[3] = x[3];
+            z[4] = x[4];
+            z[5] = x[5];
+            z[6] = x[6];
+            z[7] = x[7];
+            z[8] = x[8];
+        }
+
+        public static ulong[] Create64()
+        {
+            return new ulong[9];
+        }
+
+        public static ulong[] CreateExt64()
+        {
+            return new ulong[18];
+        }
+
+        public static bool Eq64(ulong[] x, ulong[] y)
+        {
+            for (int i = 8; i >= 0; --i)
+            {
+                if (x[i] != y[i])
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static ulong[] FromBigInteger64(BigInteger x)
+        {
+            if (x.SignValue < 0 || x.BitLength > 576)
+                throw new ArgumentException();
+
+            ulong[] z = Create64();
+            int i = 0;
+            while (x.SignValue != 0)
+            {
+                z[i++] = (ulong)x.LongValue;
+                x = x.ShiftRight(64);
+            }
+            return z;
+        }
+
+        public static bool IsOne64(ulong[] x)
+        {
+            if (x[0] != 1UL)
+            {
+                return false;
+            }
+            for (int i = 1; i < 9; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static bool IsZero64(ulong[] x)
+        {
+            for (int i = 0; i < 9; ++i)
+            {
+                if (x[i] != 0UL)
+                {
+                    return false;
+                }
+            }
+            return true;
+        }
+
+        public static BigInteger ToBigInteger64(ulong[] x)
+        {
+            byte[] bs = new byte[72];
+            for (int i = 0; i < 9; ++i)
+            {
+                ulong x_i = x[i];
+                if (x_i != 0L)
+                {
+                    Pack.UInt64_To_BE(x_i, bs, (8 - i) << 3);
+                }
+            }
+            return new BigInteger(1, bs);
+        }
+    }
+}