diff --git a/crypto/src/math/ec/rfc7748/X25519.cs b/crypto/src/math/ec/rfc7748/X25519.cs
new file mode 100644
index 000000000..862b8e0d0
--- /dev/null
+++ b/crypto/src/math/ec/rfc7748/X25519.cs
@@ -0,0 +1,249 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace Org.BouncyCastle.Math.EC.Rfc7748
+{
+ public abstract class X25519
+ {
+ private const int C_A = 486662;
+ private const int C_A24 = (C_A + 2)/4;
+
+ // 0x1
+ //private static readonly int[] S_x = new int[] { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ // 0x215132111D8354CB52385F46DCA2B71D440F6A51EB4D1207816B1E0137D48290
+ private static readonly int[] PsubS_x = new int[]{ 0x03D48290, 0x02C7804D, 0x01207816, 0x028F5A68, 0x00881ED4, 0x00A2B71D,
+ 0x0217D1B7, 0x014CB523, 0x0088EC1A, 0x0042A264 };
+
+ private static int[] precompBase = null;
+
+ private static uint Decode32(byte[] bs, int off)
+ {
+ uint n = bs[off];
+ n |= (uint)bs[++off] << 8;
+ n |= (uint)bs[++off] << 16;
+ n |= (uint)bs[++off] << 24;
+ return n;
+ }
+
+ private static void DecodeScalar(byte[] k, int kOff, uint[] n)
+ {
+ for (int i = 0; i < 8; ++i)
+ {
+ n[i] = Decode32(k, kOff + i * 4);
+ }
+
+ n[0] &= 0xFFFFFFF8U;
+ n[7] &= 0x7FFFFFFFU;
+ n[7] |= 0x40000000U;
+ }
+
+ private static void PointDouble(int[] x, int[] z)
+ {
+ int[] A = X25519Field.Create();
+ int[] B = X25519Field.Create();
+
+ X25519Field.Apm(x, z, A, B);
+ X25519Field.Sqr(A, A);
+ X25519Field.Sqr(B, B);
+ X25519Field.Mul(A, B, x);
+ X25519Field.Sub(A, B, A);
+ X25519Field.Mul(A, C_A24, z);
+ X25519Field.Add(z, B, z);
+ X25519Field.Mul(z, A, z);
+ }
+
+ [MethodImpl(MethodImplOptions.Synchronized)]
+ public static void Precompute()
+ {
+ if (precompBase != null)
+ return;
+
+ precompBase = new int[X25519Field.Size * 252];
+
+ int[] xs = precompBase;
+ int[] zs = new int[X25519Field.Size * 251];
+
+ int[] x = X25519Field.Create(); x[0] = 9;
+ int[] z = X25519Field.Create(); z[0] = 1;
+
+ int[] n = X25519Field.Create();
+ int[] d = X25519Field.Create();
+
+ X25519Field.Apm(x, z, n, d);
+
+ int[] c = X25519Field.Create(); X25519Field.Copy(d, 0, c, 0);
+
+ int off = 0;
+ for (;;)
+ {
+ X25519Field.Copy(n, 0, xs, off);
+
+ if (off == (X25519Field.Size * 251))
+ break;
+
+ PointDouble(x, z);
+
+ X25519Field.Apm(x, z, n, d);
+ X25519Field.Mul(n, c, n);
+ X25519Field.Mul(c, d, c);
+
+ X25519Field.Copy(d, 0, zs, off);
+
+ off += X25519Field.Size;
+ }
+
+ int[] u = X25519Field.Create();
+ X25519Field.Inv(c, u);
+
+ for (;;)
+ {
+ X25519Field.Copy(xs, off, x, 0);
+
+ X25519Field.Mul(x, u, x);
+ //X25519Field.Normalize(x);
+ X25519Field.Copy(x, 0, precompBase, off);
+
+ if (off == 0)
+ break;
+
+ off -= X25519Field.Size;
+ X25519Field.Copy(zs, off, z, 0);
+ X25519Field.Mul(u, z, u);
+ }
+ }
+
+ public static void ScalarMult(byte[] k, int kOff, byte[] u, int uOff, byte[] r, int rOff)
+ {
+ uint[] n = new uint[8]; DecodeScalar(k, kOff, n);
+
+ int[] x1 = X25519Field.Create(); X25519Field.Decode(u, uOff, x1);
+ int[] x2 = X25519Field.Create(); X25519Field.Copy(x1, 0, x2, 0);
+ int[] z2 = X25519Field.Create(); z2[0] = 1;
+ int[] x3 = X25519Field.Create(); x3[0] = 1;
+ int[] z3 = X25519Field.Create();
+
+ int[] t1 = X25519Field.Create();
+ int[] t2 = X25519Field.Create();
+
+ Debug.Assert(n[7] >> 30 == 1U);
+
+ int bit = 254, swap = 1;
+ do
+ {
+ X25519Field.Apm(x3, z3, t1, x3);
+ X25519Field.Apm(x2, z2, z3, x2);
+ X25519Field.Mul(t1, x2, t1);
+ X25519Field.Mul(x3, z3, x3);
+ X25519Field.Sqr(z3, z3);
+ X25519Field.Sqr(x2, x2);
+
+ X25519Field.Sub(z3, x2, t2);
+ X25519Field.Mul(t2, C_A24, z2);
+ X25519Field.Add(z2, x2, z2);
+ X25519Field.Mul(z2, t2, z2);
+ X25519Field.Mul(x2, z3, x2);
+
+ X25519Field.Apm(t1, x3, x3, z3);
+ X25519Field.Sqr(x3, x3);
+ X25519Field.Sqr(z3, z3);
+ X25519Field.Mul(z3, x1, z3);
+
+ --bit;
+
+ int word = bit >> 5, shift = bit & 0x1F;
+ int kt = (int)(n[word] >> shift) & 1;
+ swap ^= kt;
+ X25519Field.CSwap(swap, x2, x3);
+ X25519Field.CSwap(swap, z2, z3);
+ swap = kt;
+ }
+ while (bit >= 3);
+
+ Debug.Assert(swap == 0);
+
+ for (int i = 0; i < 3; ++i)
+ {
+ PointDouble(x2, z2);
+ }
+
+ X25519Field.Inv(z2, z2);
+ X25519Field.Mul(x2, z2, x2);
+
+ X25519Field.Normalize(x2);
+ X25519Field.Encode(x2, r, rOff);
+ }
+
+ private static void Dump(string label, int[] x)
+ {
+ int[] y = X25519Field.Create();
+ X25519Field.Copy(x, 0, y, 0);
+ X25519Field.Normalize(y);
+
+ byte[] r = new byte[32];
+ X25519Field.Encode(y, r, 0);
+ string val = Org.BouncyCastle.Utilities.Encoders.Hex.ToHexString(r);
+ Console.WriteLine(" " + label + ": " + val);
+ }
+
+ public static void ScalarMultBase(byte[] k, int kOff, byte[] r, int rOff)
+ {
+ Precompute();
+
+ uint[] n = new uint[8]; DecodeScalar(k, kOff, n);
+
+ int[] x0 = X25519Field.Create();
+ //int[] x1 = X25519Field.Create(); X25519Field.Copy(S_x, 0, x1, 0);
+ int[] x1 = X25519Field.Create(); x1[0] = 1;
+ int[] z1 = X25519Field.Create(); z1[0] = 1;
+ int[] x2 = X25519Field.Create(); X25519Field.Copy(PsubS_x, 0, x2, 0);
+ int[] z2 = X25519Field.Create(); z2[0] = 1;
+
+ int[] A = x1;
+ int[] B = z1;
+ int[] C = x0;
+ int[] D = A;
+ int[] E = B;
+
+ Debug.Assert(n[7] >> 30 == 1U);
+
+ int off = 0, bit = 3, swap = 1;
+ do
+ {
+ X25519Field.Copy(precompBase, off, x0, 0);
+ off += X25519Field.Size;
+
+ int word = bit >> 5, shift = bit & 0x1F;
+ int kt = (int)(n[word] >> shift) & 1;
+ swap ^= kt;
+ X25519Field.CSwap(swap, x1, x2);
+ X25519Field.CSwap(swap, z1, z2);
+ swap = kt;
+
+ X25519Field.Apm(x1, z1, A, B);
+ X25519Field.Mul(x0, B, C);
+ X25519Field.Carry(A);
+ X25519Field.Apm(A, C, D, E);
+ X25519Field.Sqr(D, D);
+ X25519Field.Sqr(E, E);
+ X25519Field.Mul(z2, D, x1);
+ X25519Field.Mul(x2, E, z1);
+ }
+ while (++bit < 255);
+
+ Debug.Assert(swap == 1);
+
+ for (int i = 0; i < 3; ++i)
+ {
+ PointDouble(x1, z1);
+ }
+
+ X25519Field.Inv(z1, z1);
+ X25519Field.Mul(x1, z1, x1);
+
+ X25519Field.Normalize(x1);
+ X25519Field.Encode(x1, r, rOff);
+ }
+ }
+}
diff --git a/crypto/src/math/ec/rfc7748/X25519Field.cs b/crypto/src/math/ec/rfc7748/X25519Field.cs
new file mode 100644
index 000000000..282f41628
--- /dev/null
+++ b/crypto/src/math/ec/rfc7748/X25519Field.cs
@@ -0,0 +1,520 @@
+using System;
+using System.Diagnostics;
+
+namespace Org.BouncyCastle.Math.EC.Rfc7748
+{
+ public abstract class X25519Field
+ {
+ public const int Size = 10;
+
+ private const int M24 = 0x00FFFFFF;
+ private const int M25 = 0x01FFFFFF;
+ private const int M26 = 0x03FFFFFF;
+
+ private X25519Field() {}
+
+ public static void Add(int[] x, int[] y, int[] z)
+ {
+ for (int i = 0; i < Size; ++i)
+ {
+ z[i] = x[i] + y[i];
+ }
+ }
+
+ public static void Apm(int[] x, int[] y, int[] zp, int[] zm)
+ {
+ for (int i = 0; i < Size; ++i)
+ {
+ int xi = x[i], yi = y[i];
+ zp[i] = xi + yi;
+ zm[i] = xi - yi;
+ }
+ }
+
+ public static void Carry(int[] z)
+ {
+ int z0 = z[0], z1 = z[1], z2 = z[2], z3 = z[3], z4 = z[4];
+ int z5 = z[5], z6 = z[6], z7 = z[7], z8 = z[8], z9 = z[9];
+
+ z3 += (z2 >> 25); z2 &= M25;
+ z5 += (z4 >> 25); z4 &= M25;
+ z8 += (z7 >> 25); z7 &= M25;
+ //z0 += (z9 >> 24) * 19; z9 &= M24;
+ z0 += (z9 >> 25) * 38; z9 &= M25;
+
+ z1 += (z0 >> 26); z0 &= M26;
+ z6 += (z5 >> 26); z5 &= M26;
+
+ z2 += (z1 >> 26); z1 &= M26;
+ z4 += (z3 >> 26); z3 &= M26;
+ z7 += (z6 >> 26); z6 &= M26;
+ z9 += (z8 >> 26); z8 &= M26;
+
+ z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4;
+ z[5] = z5; z[6] = z6; z[7] = z7; z[8] = z8; z[9] = z9;
+ }
+
+ public static void Copy(int[] x, int xOff, int[] z, int zOff)
+ {
+ for (int i = 0; i < Size; ++i)
+ {
+ z[zOff + i] = x[xOff + i];
+ }
+ }
+
+ public static int[] Create()
+ {
+ return new int[Size];
+ }
+
+ public static void CSwap(int swap, int[] a, int[] b)
+ {
+ Debug.Assert(swap >> 1 == 0);
+ Debug.Assert(a != b);
+
+ int mask = 0 - swap;
+ for (int i = 0; i < Size; ++i)
+ {
+ int ai = a[i], bi = b[i];
+ int dummy = mask & (ai ^ bi);
+ a[i] = ai ^ dummy;
+ b[i] = bi ^ dummy;
+ }
+ }
+
+ public static void Decode(byte[] x, int xOff, int[] z)
+ {
+ Decode128(x, xOff, z, 0);
+ Decode128(x, xOff + 16, z, 5);
+ z[9] &= M24;
+ }
+
+ private static void Decode128(byte[] bs, int off, int[] z, int zOff)
+ {
+ uint t0 = Decode32(bs, off + 0);
+ uint t1 = Decode32(bs, off + 4);
+ uint t2 = Decode32(bs, off + 8);
+ uint t3 = Decode32(bs, off + 12);
+
+ z[zOff + 0] = (int)t0 & M26;
+ z[zOff + 1] = (int)((t1 << 6) | (t0 >> 26)) & M26;
+ z[zOff + 2] = (int)((t2 << 12) | (t1 >> 20)) & M25;
+ z[zOff + 3] = (int)((t3 << 19) | (t2 >> 13)) & M26;
+ z[zOff + 4] = (int)(t3 >> 7);
+ }
+
+ private static uint Decode32(byte[] bs, int off)
+ {
+ uint n = bs[off];
+ n |= (uint)bs[++off] << 8;
+ n |= (uint)bs[++off] << 16;
+ n |= (uint)bs[++off] << 24;
+ return n;
+ }
+
+ public static void Encode(int[] x, byte[] z, int zOff)
+ {
+ Encode128(x, 0, z, zOff);
+ Encode128(x, 5, z, zOff + 16);
+ }
+
+ private static void Encode128(int[] x, int xOff, byte[] bs, int off)
+ {
+ uint x0 = (uint)x[xOff + 0], x1 = (uint)x[xOff + 1], x2 = (uint)x[xOff + 2];
+ uint x3 = (uint)x[xOff + 3], x4 = (uint)x[xOff + 4];
+
+ uint t0 = x0 | (x1 << 26); Encode32(t0, bs, off + 0);
+ uint t1 = (x1 >> 6) | (x2 << 20); Encode32(t1, bs, off + 4);
+ uint t2 = (x2 >> 12) | (x3 << 13); Encode32(t2, bs, off + 8);
+ uint t3 = (x3 >> 19) | (x4 << 7); Encode32(t3, bs, off + 12);
+ }
+
+ private static void Encode32(uint n, byte[] bs, int off)
+ {
+ bs[ off] = (byte)(n );
+ bs[++off] = (byte)(n >> 8);
+ bs[++off] = (byte)(n >> 16);
+ bs[++off] = (byte)(n >> 24);
+ }
+
+ public static void Inv(int[] x, int[] z)
+ {
+ // z = x^(p-2) = x^7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEB
+ // (250 1s) (1 0s) (1 1s) (1 0s) (2 1s)
+ // Addition chain: [1] [2] 3 5 10 15 25 50 75 125 [250]
+
+ int[] x2 = Create(); Sqr(x, x2); Mul(x, x2, x2);
+ int[] x3 = Create(); Sqr(x2, x3); Mul(x, x3, x3);
+ int[] x5 = x3; Sqr(x3, 2, x5); Mul(x2, x5, x5);
+ int[] x10 = Create(); Sqr(x5, 5, x10); Mul(x5, x10, x10);
+ int[] x15 = Create(); Sqr(x10, 5, x15); Mul(x5, x15, x15);
+ int[] x25 = x5; Sqr(x15, 10, x25); Mul(x10, x25, x25);
+ int[] x50 = x10; Sqr(x25, 25, x50); Mul(x25, x50, x50);
+ int[] x75 = x15; Sqr(x50, 25, x75); Mul(x25, x75, x75);
+ int[] x125 = x25; Sqr(x75, 50, x125); Mul(x50, x125, x125);
+ int[] x250 = x50; Sqr(x125, 125, x250); Mul(x125, x250, x250);
+
+ int[] t = x125;
+ Sqr(x250, 2, t);
+ Mul(t, x, t);
+ Sqr(t, 3, t);
+ Mul(t, x2, z);
+ }
+
+ public static void Mul(int[] x, int y, int[] z)
+ {
+ int x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4];
+ int x5 = x[5], x6 = x[6], x7 = x[7], x8 = x[8], x9 = x[9];
+ long c0, c1, c2, c3;
+
+ c0 = (long)x2 * y; x2 = (int)c0 & M25; c0 >>= 25;
+ c1 = (long)x4 * y; x4 = (int)c1 & M25; c1 >>= 25;
+ c2 = (long)x7 * y; x7 = (int)c2 & M25; c2 >>= 25;
+ //c3 = (long)x9 * y; x9 = (int)c3 & M24; c3 >>= 24;
+ //c3 *= 19;
+ c3 = (long)x9 * y; x9 = (int)c3 & M25; c3 >>= 25;
+ c3 *= 38;
+
+ c3 += (long)x0 * y; z[0] = (int)c3 & M26; c3 >>= 26;
+ c1 += (long)x5 * y; z[5] = (int)c1 & M26; c1 >>= 26;
+
+ c3 += (long)x1 * y; z[1] = (int)c3 & M26; c3 >>= 26;
+ c0 += (long)x3 * y; z[3] = (int)c0 & M26; c0 >>= 26;
+ c1 += (long)x6 * y; z[6] = (int)c1 & M26; c1 >>= 26;
+ c2 += (long)x8 * y; z[8] = (int)c2 & M26; c2 >>= 26;
+
+ z[2] = x2 + (int)c3;
+ z[4] = x4 + (int)c0;
+ z[7] = x7 + (int)c1;
+ z[9] = x9 + (int)c2;
+ }
+
+ public static void Mul(int[] x, int[] y, int[] z)
+ {
+ int x0 = x[0], y0 = y[0];
+ int x1 = x[1], y1 = y[1];
+ int x2 = x[2], y2 = y[2];
+ int x3 = x[3], y3 = y[3];
+ int x4 = x[4], y4 = y[4];
+
+ int u0 = x[5], v0 = y[5];
+ int u1 = x[6], v1 = y[6];
+ int u2 = x[7], v2 = y[7];
+ int u3 = x[8], v3 = y[8];
+ int u4 = x[9], v4 = y[9];
+
+ long a0 = (long)x0 * y0;
+ long a1 = (long)x0 * y1
+ + (long)x1 * y0;
+ long a2 = (long)x0 * y2
+ + (long)x1 * y1
+ + (long)x2 * y0;
+ long a3 = (long)x1 * y2
+ + (long)x2 * y1;
+ a3 <<= 1;
+ a3 += (long)x0 * y3
+ + (long)x3 * y0;
+ long a4 = (long)x2 * y2;
+ a4 <<= 1;
+ a4 += (long)x0 * y4
+ + (long)x1 * y3
+ + (long)x3 * y1
+ + (long)x4 * y0;
+ long a5 = (long)x1 * y4
+ + (long)x2 * y3
+ + (long)x3 * y2
+ + (long)x4 * y1;
+ a5 <<= 1;
+ long a6 = (long)x2 * y4
+ + (long)x4 * y2;
+ a6 <<= 1;
+ a6 += (long)x3 * y3;
+ long a7 = (long)x3 * y4
+ + (long)x4 * y3;
+ long a8 = (long)x4 * y4;
+ a8 <<= 1;
+
+ long b0 = (long)u0 * v0;
+ long b1 = (long)u0 * v1
+ + (long)u1 * v0;
+ long b2 = (long)u0 * v2
+ + (long)u1 * v1
+ + (long)u2 * v0;
+ long b3 = (long)u1 * v2
+ + (long)u2 * v1;
+ b3 <<= 1;
+ b3 += (long)u0 * v3
+ + (long)u3 * v0;
+ long b4 = (long)u2 * v2;
+ b4 <<= 1;
+ b4 += (long)u0 * v4
+ + (long)u1 * v3
+ + (long)u3 * v1
+ + (long)u4 * v0;
+ long b5 = (long)u1 * v4
+ + (long)u2 * v3
+ + (long)u3 * v2
+ + (long)u4 * v1;
+ //b5 <<= 1;
+ long b6 = (long)u2 * v4
+ + (long)u4 * v2;
+ b6 <<= 1;
+ b6 += (long)u3 * v3;
+ long b7 = (long)u3 * v4
+ + (long)u4 * v3;
+ long b8 = (long)u4 * v4;
+ //b8 <<= 1;
+
+ a0 -= b5 * 76;
+ a1 -= b6 * 38;
+ a2 -= b7 * 38;
+ a3 -= b8 * 76;
+
+ a5 -= b0;
+ a6 -= b1;
+ a7 -= b2;
+ a8 -= b3;
+ //long a9 = -b4;
+
+ x0 += u0; y0 += v0;
+ x1 += u1; y1 += v1;
+ x2 += u2; y2 += v2;
+ x3 += u3; y3 += v3;
+ x4 += u4; y4 += v4;
+
+ long c0 = (long)x0 * y0;
+ long c1 = (long)x0 * y1
+ + (long)x1 * y0;
+ long c2 = (long)x0 * y2
+ + (long)x1 * y1
+ + (long)x2 * y0;
+ long c3 = (long)x1 * y2
+ + (long)x2 * y1;
+ c3 <<= 1;
+ c3 += (long)x0 * y3
+ + (long)x3 * y0;
+ long c4 = (long)x2 * y2;
+ c4 <<= 1;
+ c4 += (long)x0 * y4
+ + (long)x1 * y3
+ + (long)x3 * y1
+ + (long)x4 * y0;
+ long c5 = (long)x1 * y4
+ + (long)x2 * y3
+ + (long)x3 * y2
+ + (long)x4 * y1;
+ c5 <<= 1;
+ long c6 = (long)x2 * y4
+ + (long)x4 * y2;
+ c6 <<= 1;
+ c6 += (long)x3 * y3;
+ long c7 = (long)x3 * y4
+ + (long)x4 * y3;
+ long c8 = (long)x4 * y4;
+ c8 <<= 1;
+
+ int z8, z9;
+ long t;
+
+ t = a8 + (c3 - a3);
+ z8 = (int)t & M26; t >>= 26;
+ //t += a9 + (c4 - a4);
+ t += (c4 - a4) - b4;
+ //z9 = (int)t & M24; t >>= 24;
+ //t = a0 + (t + ((c5 - a5) << 1)) * 19;
+ z9 = (int)t & M25; t >>= 25;
+ t = a0 + (t + c5 - a5) * 38;
+ z[0] = (int)t & M26; t >>= 26;
+ t += a1 + (c6 - a6) * 38;
+ z[1] = (int)t & M26; t >>= 26;
+ t += a2 + (c7 - a7) * 38;
+ z[2] = (int)t & M25; t >>= 25;
+ t += a3 + (c8 - a8) * 38;
+ z[3] = (int)t & M26; t >>= 26;
+ //t += a4 - a9 * 38;
+ t += a4 + b4 * 38;
+ z[4] = (int)t & M25; t >>= 25;
+ t += a5 + (c0 - a0);
+ z[5] = (int)t & M26; t >>= 26;
+ t += a6 + (c1 - a1);
+ z[6] = (int)t & M26; t >>= 26;
+ t += a7 + (c2 - a2);
+ z[7] = (int)t & M25; t >>= 25;
+ t += z8;
+ z[8] = (int)t & M26; t >>= 26;
+ z[9] = z9 + (int)t;
+ }
+
+ public static void Normalize(int[] z)
+ {
+ int x = (z[9] >> 23) & 1;
+ Reduce(z, x);
+ Reduce(z, -x);
+ Debug.Assert(z[9] >> 24 == 0);
+ }
+
+ private static void Reduce(int[] z, int c)
+ {
+ int z9 = z[9], t = z9;
+ z9 = t & M24; t >>= 24;
+ t += c;
+ t *= 19;
+ t += z[0]; z[0] = t & M26; t >>= 26;
+ t += z[1]; z[1] = t & M26; t >>= 26;
+ t += z[2]; z[2] = t & M25; t >>= 25;
+ t += z[3]; z[3] = t & M26; t >>= 26;
+ t += z[4]; z[4] = t & M25; t >>= 25;
+ t += z[5]; z[5] = t & M26; t >>= 26;
+ t += z[6]; z[6] = t & M26; t >>= 26;
+ t += z[7]; z[7] = t & M25; t >>= 25;
+ t += z[8]; z[8] = t & M26; t >>= 26;
+ t += z9; z[9] = t;
+ }
+
+ public static void Sqr(int[] x, int[] z)
+ {
+ int x0 = x[0];
+ int x1 = x[1];
+ int x2 = x[2];
+ int x3 = x[3];
+ int x4 = x[4];
+
+ int u0 = x[5];
+ int u1 = x[6];
+ int u2 = x[7];
+ int u3 = x[8];
+ int u4 = x[9];
+
+ int x1_2 = x1 * 2;
+ int x2_2 = x2 * 2;
+ int x3_2 = x3 * 2;
+ int x4_2 = x4 * 2;
+
+ long a0 = (long)x0 * x0;
+ long a1 = (long)x0 * x1_2;
+ long a2 = (long)x0 * x2_2
+ + (long)x1 * x1;
+ long a3 = (long)x1_2 * x2_2
+ + (long)x0 * x3_2;
+ long a4 = (long)x2 * x2_2
+ + (long)x0 * x4_2
+ + (long)x1 * x3_2;
+ long a5 = (long)x1_2 * x4_2
+ + (long)x2_2 * x3_2;
+ long a6 = (long)x2_2 * x4_2
+ + (long)x3 * x3;
+ long a7 = (long)x3 * x4_2;
+ long a8 = (long)x4 * x4_2;
+
+ int u1_2 = u1 * 2;
+ int u2_2 = u2 * 2;
+ int u3_2 = u3 * 2;
+ int u4_2 = u4 * 2;
+
+ long b0 = (long)u0 * u0;
+ long b1 = (long)u0 * u1_2;
+ long b2 = (long)u0 * u2_2
+ + (long)u1 * u1;
+ long b3 = (long)u1_2 * u2_2
+ + (long)u0 * u3_2;
+ long b4 = (long)u2 * u2_2
+ + (long)u0 * u4_2
+ + (long)u1 * u3_2;
+ long b5 = (long)u1_2 * u4_2
+ + (long)u2_2 * u3_2;
+ long b6 = (long)u2_2 * u4_2
+ + (long)u3 * u3;
+ long b7 = (long)u3 * u4_2;
+ long b8 = (long)u4 * u4_2;
+
+ a0 -= b5 * 38;
+ a1 -= b6 * 38;
+ a2 -= b7 * 38;
+ a3 -= b8 * 38;
+
+ a5 -= b0;
+ a6 -= b1;
+ a7 -= b2;
+ a8 -= b3;
+ //long a9 = -b4;
+
+ x0 += u0;
+ x1 += u1;
+ x2 += u2;
+ x3 += u3;
+ x4 += u4;
+
+ x1_2 = x1 * 2;
+ x2_2 = x2 * 2;
+ x3_2 = x3 * 2;
+ x4_2 = x4 * 2;
+
+ long c0 = (long)x0 * x0;
+ long c1 = (long)x0 * x1_2;
+ long c2 = (long)x0 * x2_2
+ + (long)x1 * x1;
+ long c3 = (long)x1_2 * x2_2
+ + (long)x0 * x3_2;
+ long c4 = (long)x2 * x2_2
+ + (long)x0 * x4_2
+ + (long)x1 * x3_2;
+ long c5 = (long)x1_2 * x4_2
+ + (long)x2_2 * x3_2;
+ long c6 = (long)x2_2 * x4_2
+ + (long)x3 * x3;
+ long c7 = (long)x3 * x4_2;
+ long c8 = (long)x4 * x4_2;
+
+ int z8, z9;
+ long t;
+
+ t = a8 + (c3 - a3);
+ z8 = (int)t & M26; t >>= 26;
+ //t += a9 + (c4 - a4);
+ t += (c4 - a4) - b4;
+ //z9 = (int)t & M24; t >>= 24;
+ //t = a0 + (t + ((c5 - a5) << 1)) * 19;
+ z9 = (int)t & M25; t >>= 25;
+ t = a0 + (t + c5 - a5) * 38;
+ z[0] = (int)t & M26; t >>= 26;
+ t += a1 + (c6 - a6) * 38;
+ z[1] = (int)t & M26; t >>= 26;
+ t += a2 + (c7 - a7) * 38;
+ z[2] = (int)t & M25; t >>= 25;
+ t += a3 + (c8 - a8) * 38;
+ z[3] = (int)t & M26; t >>= 26;
+ //t += a4 - a9 * 38;
+ t += a4 + b4 * 38;
+ z[4] = (int)t & M25; t >>= 25;
+ t += a5 + (c0 - a0);
+ z[5] = (int)t & M26; t >>= 26;
+ t += a6 + (c1 - a1);
+ z[6] = (int)t & M26; t >>= 26;
+ t += a7 + (c2 - a2);
+ z[7] = (int)t & M25; t >>= 25;
+ t += z8;
+ z[8] = (int)t & M26; t >>= 26;
+ z[9] = z9 + (int)t;
+ }
+
+ public static void Sqr(int[] x, int n, int[] z)
+ {
+ Debug.Assert(n > 0);
+
+ Sqr(x, z);
+
+ while (--n > 0)
+ {
+ Sqr(z, z);
+ }
+ }
+
+ public static void Sub(int[] x, int[] y, int[] z)
+ {
+ for (int i = 0; i < Size; ++i)
+ {
+ z[i] = x[i] - y[i];
+ }
+ }
+ }
+}
diff --git a/crypto/src/math/ec/rfc7748/X448.cs b/crypto/src/math/ec/rfc7748/X448.cs
new file mode 100644
index 000000000..32a4a9e2a
--- /dev/null
+++ b/crypto/src/math/ec/rfc7748/X448.cs
@@ -0,0 +1,255 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace Org.BouncyCastle.Math.EC.Rfc7748
+{
+ public abstract class X448
+ {
+ private const uint C_A = 156326;
+ private const uint C_A24 = (C_A + 2)/4;
+
+ // 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE
+ private static readonly uint[] S_x = new uint[]{ 0x0FFFFFFEU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU,
+ 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFEU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU, 0x0FFFFFFFU,
+ 0x0FFFFFFFU };
+
+ // 0xF0FAB725013244423ACF03881AFFEB7BDACDD1031C81B9672954459D84C1F823F1BD65643ACE1B5123AC33FF1C69BAF8ACB1197DC99D2720
+ private static readonly uint[] PsubS_x = new uint[]{ 0x099d2720U, 0x0b1197dcU, 0x09baf8acU, 0x033ff1c6U, 0x0b5123acU,
+ 0x0643ace1U, 0x03f1bd65U, 0x084c1f82U, 0x0954459dU, 0x081b9672U, 0x0dd1031cU, 0x0eb7bdacU, 0x03881affU, 0x0423acf0U,
+ 0x05013244U, 0x0f0fab72U };
+
+ private static uint[] precompBase = null;
+
+ private static uint Decode32(byte[] bs, int off)
+ {
+ uint n = bs[off];
+ n |= (uint)bs[++off] << 8;
+ n |= (uint)bs[++off] << 16;
+ n |= (uint)bs[++off] << 24;
+ return n;
+ }
+
+ private static void DecodeScalar(byte[] k, int kOff, uint[] n)
+ {
+ for (int i = 0; i < 14; ++i)
+ {
+ n[i] = Decode32(k, kOff + i * 4);
+ }
+
+ n[ 0] &= 0xFFFFFFFCU;
+ n[13] |= 0x80000000U;
+ }
+
+ private static void PointDouble(uint[] x, uint[] z)
+ {
+ uint[] A = X448Field.Create();
+ uint[] B = X448Field.Create();
+
+ //X448Field.Apm(x, z, A, B);
+ X448Field.Add(x, z, A);
+ X448Field.Sub(x, z, B);
+ X448Field.Sqr(A, A);
+ X448Field.Sqr(B, B);
+ X448Field.Mul(A, B, x);
+ X448Field.Sub(A, B, A);
+ X448Field.Mul(A, C_A24, z);
+ X448Field.Add(z, B, z);
+ X448Field.Mul(z, A, z);
+ }
+
+ [MethodImpl(MethodImplOptions.Synchronized)]
+ public static void Precompute()
+ {
+ if (precompBase != null)
+ return;
+
+ precompBase = new uint[X448Field.Size * 446];
+
+ uint[] xs = precompBase;
+ uint[] zs = new uint[X448Field.Size * 445];
+
+ uint[] x = X448Field.Create(); x[0] = 5;
+ uint[] z = X448Field.Create(); z[0] = 1;
+
+ uint[] n = X448Field.Create();
+ uint[] d = X448Field.Create();
+
+ //X448Field.Apm(x, z, n, d);
+ X448Field.Add(x, z, n);
+ X448Field.Sub(x, z, d);
+
+ uint[] c = X448Field.Create(); X448Field.Copy(d, 0, c, 0);
+
+ int off = 0;
+ for (;;)
+ {
+ X448Field.Copy(n, 0, xs, off);
+
+ if (off == (X448Field.Size * 445))
+ break;
+
+ PointDouble(x, z);
+
+ //X448Field.Apm(x, z, n, d);
+ X448Field.Add(x, z, n);
+ X448Field.Sub(x, z, d);
+ X448Field.Mul(n, c, n);
+ X448Field.Mul(c, d, c);
+
+ X448Field.Copy(d, 0, zs, off);
+
+ off += X448Field.Size;
+ }
+
+ uint[] u = X448Field.Create();
+ X448Field.Inv(c, u);
+
+ for (;;)
+ {
+ X448Field.Copy(xs, off, x, 0);
+
+ X448Field.Mul(x, u, x);
+ //X448Field.Normalize(x);
+ X448Field.Copy(x, 0, precompBase, off);
+
+ if (off == 0)
+ break;
+
+ off -= X448Field.Size;
+ X448Field.Copy(zs, off, z, 0);
+ X448Field.Mul(u, z, u);
+ }
+ }
+
+ public static void ScalarMult(byte[] k, int kOff, byte[] u, int uOff, byte[] r, int rOff)
+ {
+ uint[] n = new uint[14]; DecodeScalar(k, kOff, n);
+
+ uint[] x1 = X448Field.Create(); X448Field.Decode(u, uOff, x1);
+ uint[] x2 = X448Field.Create(); X448Field.Copy(x1, 0, x2, 0);
+ uint[] z2 = X448Field.Create(); z2[0] = 1;
+ uint[] x3 = X448Field.Create(); x3[0] = 1;
+ uint[] z3 = X448Field.Create();
+
+ uint[] t1 = X448Field.Create();
+ uint[] t2 = X448Field.Create();
+
+ Debug.Assert(n[13] >> 31 == 1U);
+
+ int bit = 447, swap = 1;
+ do
+ {
+ //X448Field.Apm(x3, z3, t1, x3);
+ X448Field.Add(x3, z3, t1);
+ X448Field.Sub(x3, z3, x3);
+ //X448Field.Apm(x2, z2, z3, x2);
+ X448Field.Add(x2, z2, z3);
+ X448Field.Sub(x2, z2, x2);
+
+ X448Field.Mul(t1, x2, t1);
+ X448Field.Mul(x3, z3, x3);
+ X448Field.Sqr(z3, z3);
+ X448Field.Sqr(x2, x2);
+
+ X448Field.Sub(z3, x2, t2);
+ X448Field.Mul(t2, C_A24, z2);
+ X448Field.Add(z2, x2, z2);
+ X448Field.Mul(z2, t2, z2);
+ X448Field.Mul(x2, z3, x2);
+
+ //X448Field.Apm(t1, x3, x3, z3);
+ X448Field.Sub(t1, x3, z3);
+ X448Field.Add(t1, x3, x3);
+ X448Field.Sqr(x3, x3);
+ X448Field.Sqr(z3, z3);
+ X448Field.Mul(z3, x1, z3);
+
+ --bit;
+
+ int word = bit >> 5, shift = bit & 0x1F;
+ int kt = (int)(n[word] >> shift) & 1;
+ swap ^= kt;
+ X448Field.CSwap(swap, x2, x3);
+ X448Field.CSwap(swap, z2, z3);
+ swap = kt;
+ }
+ while (bit >= 2);
+
+ Debug.Assert(swap == 0);
+
+ for (int i = 0; i < 2; ++i)
+ {
+ PointDouble(x2, z2);
+ }
+
+ X448Field.Inv(z2, z2);
+ X448Field.Mul(x2, z2, x2);
+
+ X448Field.Normalize(x2);
+ X448Field.Encode(x2, r, rOff);
+ }
+
+ public static void ScalarMultBase(byte[] k, int kOff, byte[] r, int rOff)
+ {
+ Precompute();
+
+ uint[] n = new uint[14]; DecodeScalar(k, kOff, n);
+
+ uint[] x0 = X448Field.Create();
+ uint[] x1 = X448Field.Create(); X448Field.Copy(S_x, 0, x1, 0);
+ uint[] z1 = X448Field.Create(); z1[0] = 1;
+ uint[] x2 = X448Field.Create(); X448Field.Copy(PsubS_x, 0, x2, 0);
+ uint[] z2 = X448Field.Create(); z2[0] = 1;
+
+ uint[] A = X448Field.Create();
+ uint[] B = z1;
+ uint[] C = x0;
+ uint[] D = x1;
+ uint[] E = B;
+
+ Debug.Assert(n[13] >> 31 == 1U);
+
+ int off = 0, bit = 2, swap = 1;
+ do
+ {
+ X448Field.Copy(precompBase, off, x0, 0);
+ off += X448Field.Size;
+
+ int word = bit >> 5, shift = bit & 0x1F;
+ int kt = (int)(n[word] >> shift) & 1;
+ swap ^= kt;
+ X448Field.CSwap(swap, x1, x2);
+ X448Field.CSwap(swap, z1, z2);
+ swap = kt;
+
+ //X448Field.Apm(x1, z1, A, B);
+ X448Field.Add(x1, z1, A);
+ X448Field.Sub(x1, z1, B);
+ X448Field.Mul(x0, B, C);
+ X448Field.Carry(A);
+ //X448Field.Apm(A, C, D, E);
+ X448Field.Add(A, C, D);
+ X448Field.Sub(A, C, E);
+ X448Field.Sqr(D, D);
+ X448Field.Sqr(E, E);
+ X448Field.Mul(z2, D, x1);
+ X448Field.Mul(x2, E, z1);
+ }
+ while (++bit < 448);
+
+ Debug.Assert(swap == 1);
+
+ for (int i = 0; i < 2; ++i)
+ {
+ PointDouble(x1, z1);
+ }
+
+ X448Field.Inv(z1, z1);
+ X448Field.Mul(x1, z1, x1);
+
+ X448Field.Normalize(x1);
+ X448Field.Encode(x1, r, rOff);
+ }
+ }
+}
diff --git a/crypto/src/math/ec/rfc7748/X448Field.cs b/crypto/src/math/ec/rfc7748/X448Field.cs
new file mode 100644
index 000000000..0c44f1eb5
--- /dev/null
+++ b/crypto/src/math/ec/rfc7748/X448Field.cs
@@ -0,0 +1,904 @@
+using System;
+using System.Diagnostics;
+
+namespace Org.BouncyCastle.Math.EC.Rfc7748
+{
+ [CLSCompliantAttribute(false)]
+ public abstract class X448Field
+ {
+ public const int Size = 16;
+
+ private const uint M28 = 0x0FFFFFFFU;
+
+ private X448Field() {}
+
+ public static void Add(uint[] x, uint[] y, uint[] z)
+ {
+ for (int i = 0; i < Size; ++i)
+ {
+ z[i] = x[i] + y[i];
+ }
+ }
+
+ //public static void Apm(int[] x, int[] y, int[] zp, int[] zm)
+ //{
+ // for (int i = 0; i < Size; ++i)
+ // {
+ // int xi = x[i], yi = y[i];
+ // zp[i] = xi + yi;
+ // zm[i] = xi - yi;
+ // }
+ //}
+
+ public static void Carry(uint[] z)
+ {
+ uint z0 = z[0], z1 = z[1], z2 = z[2], z3 = z[3], z4 = z[4], z5 = z[5], z6 = z[6], z7 = z[7];
+ uint z8 = z[8], z9 = z[9], z10 = z[10], z11 = z[11], z12 = z[12], z13 = z[13], z14 = z[14], z15 = z[15];
+
+ z2 += (z1 >> 28); z1 &= M28;
+ z6 += (z5 >> 28); z5 &= M28;
+ z10 += (z9 >> 28); z9 &= M28;
+ z14 += (z13 >> 28); z13 &= M28;
+
+ z3 += (z2 >> 28); z2 &= M28;
+ z7 += (z6 >> 28); z6 &= M28;
+ z11 += (z10 >> 28); z10 &= M28;
+ z15 += (z14 >> 28); z14 &= M28;
+
+ uint t = z15 >> 28; z15 &= M28;
+ z0 += t;
+ z8 += t;
+
+ z4 += (z3 >> 28); z3 &= M28;
+ z8 += (z7 >> 28); z7 &= M28;
+ z12 += (z11 >> 28); z11 &= M28;
+
+ z1 += (z0 >> 28); z0 &= M28;
+ z5 += (z4 >> 28); z4 &= M28;
+ z9 += (z8 >> 28); z8 &= M28;
+ z13 += (z12 >> 28); z12 &= M28;
+
+ z[0] = z0; z[1] = z1; z[2] = z2; z[3] = z3; z[4] = z4; z[5] = z5; z[6] = z6; z[7] = z7;
+ z[8] = z8; z[9] = z9; z[10] = z10; z[11] = z11; z[12] = z12; z[13] = z13; z[14] = z14; z[15] = z15;
+ }
+
+ public static void Copy(uint[] x, int xOff, uint[] z, int zOff)
+ {
+ for (int i = 0; i < Size; ++i)
+ {
+ z[zOff + i] = x[xOff + i];
+ }
+ }
+
+ public static uint[] Create()
+ {
+ return new uint[Size];
+ }
+
+ public static void CSwap(int swap, uint[] a, uint[] b)
+ {
+ Debug.Assert(swap >> 1 == 0);
+ Debug.Assert(a != b);
+
+ uint mask = (uint)(0 - swap);
+ for (int i = 0; i < Size; ++i)
+ {
+ uint ai = a[i], bi = b[i];
+ uint dummy = mask & (ai ^ bi);
+ a[i] = ai ^ dummy;
+ b[i] = bi ^ dummy;
+ }
+ }
+
+ public static void Decode(byte[] x, int xOff, uint[] z)
+ {
+ Decode56(x, xOff, z, 0);
+ Decode56(x, xOff + 7, z, 2);
+ Decode56(x, xOff + 14, z, 4);
+ Decode56(x, xOff + 21, z, 6);
+ Decode56(x, xOff + 28, z, 8);
+ Decode56(x, xOff + 35, z, 10);
+ Decode56(x, xOff + 42, z, 12);
+ Decode56(x, xOff + 49, z, 14);
+ }
+
+ private static uint Decode24(byte[] bs, int off)
+ {
+ uint n = bs[off];
+ n |= (uint)bs[++off] << 8;
+ n |= (uint)bs[++off] << 16;
+ return n;
+ }
+
+ private static uint Decode32(byte[] bs, int off)
+ {
+ uint n = bs[off];
+ n |= (uint)bs[++off] << 8;
+ n |= (uint)bs[++off] << 16;
+ n |= (uint)bs[++off] << 24;
+ return n;
+ }
+
+ private static void Decode56(byte[] bs, int off, uint[] z, int zOff)
+ {
+ uint lo = Decode32(bs, off);
+ uint hi = Decode24(bs, off + 4);
+ z[zOff] = lo & M28;
+ z[zOff + 1] = (lo >> 28) | (hi << 4);
+ }
+
+ public static void Encode(uint[] x, byte[] z, int zOff)
+ {
+ Encode56(x, 0, z, zOff);
+ Encode56(x, 2, z, zOff + 7);
+ Encode56(x, 4, z, zOff + 14);
+ Encode56(x, 6, z, zOff + 21);
+ Encode56(x, 8, z, zOff + 28);
+ Encode56(x, 10, z, zOff + 35);
+ Encode56(x, 12, z, zOff + 42);
+ Encode56(x, 14, z, zOff + 49);
+ }
+
+ private static void Encode24(uint n, byte[] bs, int off)
+ {
+ bs[ off] = (byte)(n );
+ bs[++off] = (byte)(n >> 8);
+ bs[++off] = (byte)(n >> 16);
+ }
+
+ private static void Encode32(uint n, byte[] bs, int off)
+ {
+ bs[ off] = (byte)(n );
+ bs[++off] = (byte)(n >> 8);
+ bs[++off] = (byte)(n >> 16);
+ bs[++off] = (byte)(n >> 24);
+ }
+
+ private static void Encode56(uint[] x, int xOff, byte[] bs, int off)
+ {
+ uint lo = x[xOff], hi = x[xOff + 1];
+ Encode32(lo | (hi << 28), bs, off);
+ Encode24(hi >> 4, bs, off + 4);
+ }
+
+ public static void Inv(uint[] x, uint[] z)
+ {
+ // z = x^(p-2) = x^(2^448 - 2^224 - 3)
+ // (223 1s) (1 0s) (222 1s) (1 0s) (1 1s)
+ // Addition chain: [1] 2 3 6 9 18 19 37 74 111 [222] [223]
+ uint[] x2 = Create(); Sqr(x, x2); Mul(x, x2, x2);
+ uint[] x3 = Create(); Sqr(x2, x3); Mul(x, x3, x3);
+ uint[] x6 = Create(); Sqr(x3, 3, x6); Mul(x3, x6, x6);
+ uint[] x9 = Create(); Sqr(x6, 3, x9); Mul(x3, x9, x9);
+ uint[] x18 = Create(); Sqr(x9, 9, x18); Mul(x9, x18, x18);
+ uint[] x19 = Create(); Sqr(x18, x19); Mul(x, x19, x19);
+ uint[] x37 = Create(); Sqr(x19, 18, x37); Mul(x18, x37, x37);
+ uint[] x74 = Create(); Sqr(x37, 37, x74); Mul(x37, x74, x74);
+ uint[] x111 = Create(); Sqr(x74, 37, x111); Mul(x37, x111, x111);
+ uint[] x222 = Create(); Sqr(x111, 111, x222); Mul(x111, x222, x222);
+ uint[] x223 = Create(); Sqr(x222, x223); Mul(x, x223, x223);
+
+ uint[] t = Create();
+ Sqr(x223, 223, t);
+ Mul(t, x222, t);
+ Sqr(t, 2, t);
+ Mul(t, x, z);
+ }
+
+ public static void Mul(uint[] x, uint y, uint[] z)
+ {
+ uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7];
+ uint x8 = x[8], x9 = x[9], x10 = x[10], x11 = x[11], x12 = x[12], x13 = x[13], x14 = x[14], x15 = x[15];
+
+ uint z1, z5, z9, z13;
+ ulong c, d, e, f;
+
+ c = (ulong)x1 * y;
+ z1 = (uint)c & M28; c >>= 28;
+ d = (ulong)x5 * y;
+ z5 = (uint)d & M28; d >>= 28;
+ e = (ulong)x9 * y;
+ z9 = (uint)e & M28; e >>= 28;
+ f = (ulong)x13 * y;
+ z13 = (uint)f & M28; f >>= 28;
+
+ c += (ulong)x2 * y;
+ z[2] = (uint)c & M28; c >>= 28;
+ d += (ulong)x6 * y;
+ z[6] = (uint)d & M28; d >>= 28;
+ e += (ulong)x10 * y;
+ z[10] = (uint)e & M28; e >>= 28;
+ f += (ulong)x14 * y;
+ z[14] = (uint)f & M28; f >>= 28;
+
+ c += (ulong)x3 * y;
+ z[3] = (uint)c & M28; c >>= 28;
+ d += (ulong)x7 * y;
+ z[7] = (uint)d & M28; d >>= 28;
+ e += (ulong)x11 * y;
+ z[11] = (uint)e & M28; e >>= 28;
+ f += (ulong)x15 * y;
+ z[15] = (uint)f & M28; f >>= 28;
+
+ d += f;
+
+ c += (ulong)x4 * y;
+ z[4] = (uint)c & M28; c >>= 28;
+ d += (ulong)x8 * y;
+ z[8] = (uint)d & M28; d >>= 28;
+ e += (ulong)x12 * y;
+ z[12] = (uint)e & M28; e >>= 28;
+ f += (ulong)x0 * y;
+ z[0] = (uint)f & M28; f >>= 28;
+
+ z[1] = z1 + (uint)f;
+ z[5] = z5 + (uint)c;
+ z[9] = z9 + (uint)d;
+ z[13] = z13 + (uint)e;
+ }
+
+ public static void Mul(uint[] x, uint[] y, uint[] z)
+ {
+ uint x0 = x[0];
+ uint x1 = x[1];
+ uint x2 = x[2];
+ uint x3 = x[3];
+ uint x4 = x[4];
+ uint x5 = x[5];
+ uint x6 = x[6];
+ uint x7 = x[7];
+
+ uint u0 = x[8];
+ uint u1 = x[9];
+ uint u2 = x[10];
+ uint u3 = x[11];
+ uint u4 = x[12];
+ uint u5 = x[13];
+ uint u6 = x[14];
+ uint u7 = x[15];
+
+ uint y0 = y[0];
+ uint y1 = y[1];
+ uint y2 = y[2];
+ uint y3 = y[3];
+ uint y4 = y[4];
+ uint y5 = y[5];
+ uint y6 = y[6];
+ uint y7 = y[7];
+
+ uint v0 = y[8];
+ uint v1 = y[9];
+ uint v2 = y[10];
+ uint v3 = y[11];
+ uint v4 = y[12];
+ uint v5 = y[13];
+ uint v6 = y[14];
+ uint v7 = y[15];
+
+ uint s0 = x0 + u0;
+ uint s1 = x1 + u1;
+ uint s2 = x2 + u2;
+ uint s3 = x3 + u3;
+ uint s4 = x4 + u4;
+ uint s5 = x5 + u5;
+ uint s6 = x6 + u6;
+ uint s7 = x7 + u7;
+
+ uint t0 = y0 + v0;
+ uint t1 = y1 + v1;
+ uint t2 = y2 + v2;
+ uint t3 = y3 + v3;
+ uint t4 = y4 + v4;
+ uint t5 = y5 + v5;
+ uint t6 = y6 + v6;
+ uint t7 = y7 + v7;
+
+ uint z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, z15;
+ ulong c, d;
+
+ ulong f0 = (ulong)x0 * y0;
+ ulong f8 = (ulong)x7 * y1
+ + (ulong)x6 * y2
+ + (ulong)x5 * y3
+ + (ulong)x4 * y4
+ + (ulong)x3 * y5
+ + (ulong)x2 * y6
+ + (ulong)x1 * y7;
+ ulong g0 = (ulong)u0 * v0;
+ ulong g8 = (ulong)u7 * v1
+ + (ulong)u6 * v2
+ + (ulong)u5 * v3
+ + (ulong)u4 * v4
+ + (ulong)u3 * v5
+ + (ulong)u2 * v6
+ + (ulong)u1 * v7;
+ ulong h0 = (ulong)s0 * t0;
+ ulong h8 = (ulong)s7 * t1
+ + (ulong)s6 * t2
+ + (ulong)s5 * t3
+ + (ulong)s4 * t4
+ + (ulong)s3 * t5
+ + (ulong)s2 * t6
+ + (ulong)s1 * t7;
+
+ c = f0 + g0 + h8 - f8;
+ z0 = (uint)c & M28; c >>= 28;
+ d = g8 + h0 - f0 + h8;
+ z8 = (uint)d & M28; d >>= 28;
+
+ ulong f1 = (ulong)x1 * y0
+ + (ulong)x0 * y1;
+ ulong f9 = (ulong)x7 * y2
+ + (ulong)x6 * y3
+ + (ulong)x5 * y4
+ + (ulong)x4 * y5
+ + (ulong)x3 * y6
+ + (ulong)x2 * y7;
+ ulong g1 = (ulong)u1 * v0
+ + (ulong)u0 * v1;
+ ulong g9 = (ulong)u7 * v2
+ + (ulong)u6 * v3
+ + (ulong)u5 * v4
+ + (ulong)u4 * v5
+ + (ulong)u3 * v6
+ + (ulong)u2 * v7;
+ ulong h1 = (ulong)s1 * t0
+ + (ulong)s0 * t1;
+ ulong h9 = (ulong)s7 * t2
+ + (ulong)s6 * t3
+ + (ulong)s5 * t4
+ + (ulong)s4 * t5
+ + (ulong)s3 * t6
+ + (ulong)s2 * t7;
+
+ c += f1 + g1 + h9 - f9;
+ z1 = (uint)c & M28; c >>= 28;
+ d += g9 + h1 - f1 + h9;
+ z9 = (uint)d & M28; d >>= 28;
+
+ ulong f2 = (ulong)x2 * y0
+ + (ulong)x1 * y1
+ + (ulong)x0 * y2;
+ ulong f10 = (ulong)x7 * y3
+ + (ulong)x6 * y4
+ + (ulong)x5 * y5
+ + (ulong)x4 * y6
+ + (ulong)x3 * y7;
+ ulong g2 = (ulong)u2 * v0
+ + (ulong)u1 * v1
+ + (ulong)u0 * v2;
+ ulong g10 = (ulong)u7 * v3
+ + (ulong)u6 * v4
+ + (ulong)u5 * v5
+ + (ulong)u4 * v6
+ + (ulong)u3 * v7;
+ ulong h2 = (ulong)s2 * t0
+ + (ulong)s1 * t1
+ + (ulong)s0 * t2;
+ ulong h10 = (ulong)s7 * t3
+ + (ulong)s6 * t4
+ + (ulong)s5 * t5
+ + (ulong)s4 * t6
+ + (ulong)s3 * t7;
+
+ c += f2 + g2 + h10 - f10;
+ z2 = (uint)c & M28; c >>= 28;
+ d += g10 + h2 - f2 + h10;
+ z10 = (uint)d & M28; d >>= 28;
+
+ ulong f3 = (ulong)x3 * y0
+ + (ulong)x2 * y1
+ + (ulong)x1 * y2
+ + (ulong)x0 * y3;
+ ulong f11 = (ulong)x7 * y4
+ + (ulong)x6 * y5
+ + (ulong)x5 * y6
+ + (ulong)x4 * y7;
+ ulong g3 = (ulong)u3 * v0
+ + (ulong)u2 * v1
+ + (ulong)u1 * v2
+ + (ulong)u0 * v3;
+ ulong g11 = (ulong)u7 * v4
+ + (ulong)u6 * v5
+ + (ulong)u5 * v6
+ + (ulong)u4 * v7;
+ ulong h3 = (ulong)s3 * t0
+ + (ulong)s2 * t1
+ + (ulong)s1 * t2
+ + (ulong)s0 * t3;
+ ulong h11 = (ulong)s7 * t4
+ + (ulong)s6 * t5
+ + (ulong)s5 * t6
+ + (ulong)s4 * t7;
+
+ c += f3 + g3 + h11 - f11;
+ z3 = (uint)c & M28; c >>= 28;
+ d += g11 + h3 - f3 + h11;
+ z11 = (uint)d & M28; d >>= 28;
+
+ ulong f4 = (ulong)x4 * y0
+ + (ulong)x3 * y1
+ + (ulong)x2 * y2
+ + (ulong)x1 * y3
+ + (ulong)x0 * y4;
+ ulong f12 = (ulong)x7 * y5
+ + (ulong)x6 * y6
+ + (ulong)x5 * y7;
+ ulong g4 = (ulong)u4 * v0
+ + (ulong)u3 * v1
+ + (ulong)u2 * v2
+ + (ulong)u1 * v3
+ + (ulong)u0 * v4;
+ ulong g12 = (ulong)u7 * v5
+ + (ulong)u6 * v6
+ + (ulong)u5 * v7;
+ ulong h4 = (ulong)s4 * t0
+ + (ulong)s3 * t1
+ + (ulong)s2 * t2
+ + (ulong)s1 * t3
+ + (ulong)s0 * t4;
+ ulong h12 = (ulong)s7 * t5
+ + (ulong)s6 * t6
+ + (ulong)s5 * t7;
+
+ c += f4 + g4 + h12 - f12;
+ z4 = (uint)c & M28; c >>= 28;
+ d += g12 + h4 - f4 + h12;
+ z12 = (uint)d & M28; d >>= 28;
+
+ ulong f5 = (ulong)x5 * y0
+ + (ulong)x4 * y1
+ + (ulong)x3 * y2
+ + (ulong)x2 * y3
+ + (ulong)x1 * y4
+ + (ulong)x0 * y5;
+ ulong f13 = (ulong)x7 * y6
+ + (ulong)x6 * y7;
+ ulong g5 = (ulong)u5 * v0
+ + (ulong)u4 * v1
+ + (ulong)u3 * v2
+ + (ulong)u2 * v3
+ + (ulong)u1 * v4
+ + (ulong)u0 * v5;
+ ulong g13 = (ulong)u7 * v6
+ + (ulong)u6 * v7;
+ ulong h5 = (ulong)s5 * t0
+ + (ulong)s4 * t1
+ + (ulong)s3 * t2
+ + (ulong)s2 * t3
+ + (ulong)s1 * t4
+ + (ulong)s0 * t5;
+ ulong h13 = (ulong)s7 * t6
+ + (ulong)s6 * t7;
+
+ c += f5 + g5 + h13 - f13;
+ z5 = (uint)c & M28; c >>= 28;
+ d += g13 + h5 - f5 + h13;
+ z13 = (uint)d & M28; d >>= 28;
+
+ ulong f6 = (ulong)x6 * y0
+ + (ulong)x5 * y1
+ + (ulong)x4 * y2
+ + (ulong)x3 * y3
+ + (ulong)x2 * y4
+ + (ulong)x1 * y5
+ + (ulong)x0 * y6;
+ ulong f14 = (ulong)x7 * y7;
+ ulong g6 = (ulong)u6 * v0
+ + (ulong)u5 * v1
+ + (ulong)u4 * v2
+ + (ulong)u3 * v3
+ + (ulong)u2 * v4
+ + (ulong)u1 * v5
+ + (ulong)u0 * v6;
+ ulong g14 = (ulong)u7 * v7;
+ ulong h6 = (ulong)s6 * t0
+ + (ulong)s5 * t1
+ + (ulong)s4 * t2
+ + (ulong)s3 * t3
+ + (ulong)s2 * t4
+ + (ulong)s1 * t5
+ + (ulong)s0 * t6;
+ ulong h14 = (ulong)s7 * t7;
+
+ c += f6 + g6 + h14 - f14;
+ z6 = (uint)c & M28; c >>= 28;
+ d += g14 + h6 - f6 + h14;
+ z14 = (uint)d & M28; d >>= 28;
+
+ ulong f7 = (ulong)x7 * y0
+ + (ulong)x6 * y1
+ + (ulong)x5 * y2
+ + (ulong)x4 * y3
+ + (ulong)x3 * y4
+ + (ulong)x2 * y5
+ + (ulong)x1 * y6
+ + (ulong)x0 * y7;
+ ulong g7 = (ulong)u7 * v0
+ + (ulong)u6 * v1
+ + (ulong)u5 * v2
+ + (ulong)u4 * v3
+ + (ulong)u3 * v4
+ + (ulong)u2 * v5
+ + (ulong)u1 * v6
+ + (ulong)u0 * v7;
+ ulong h7 = (ulong)s7 * t0
+ + (ulong)s6 * t1
+ + (ulong)s5 * t2
+ + (ulong)s4 * t3
+ + (ulong)s3 * t4
+ + (ulong)s2 * t5
+ + (ulong)s1 * t6
+ + (ulong)s0 * t7;
+
+ c += f7 + g7;
+ z7 = (uint)c & M28; c >>= 28;
+ d += h7 - f7;
+ z15 = (uint)d & M28; d >>= 28;
+
+ c += d;
+
+ c += z8;
+ z8 = (uint)c & M28; c >>= 28;
+ d += z0;
+ z0 = (uint)d & M28; d >>= 28;
+ z9 += (uint)c;
+ z1 += (uint)d;
+
+ z[0] = z0;
+ z[1] = z1;
+ z[2] = z2;
+ z[3] = z3;
+ z[4] = z4;
+ z[5] = z5;
+ z[6] = z6;
+ z[7] = z7;
+ z[8] = z8;
+ z[9] = z9;
+ z[10] = z10;
+ z[11] = z11;
+ z[12] = z12;
+ z[13] = z13;
+ z[14] = z14;
+ z[15] = z15;
+ }
+
+ public static void Normalize(uint[] z)
+ {
+ //int x = (z[15] >> (28 - 1)) & 1;
+ Reduce(z, 1);
+ Reduce(z, -1);
+ Debug.Assert(z[15] >> 28 == 0U);
+ }
+
+ private static void Reduce(uint[] z, int c)
+ {
+ uint z15 = z[15];
+ long t = z15;
+ z15 &= M28;
+ t = (t >> 28) + c;
+ z[8] += (uint)t;
+ for (int i = 0; i < 15; ++i)
+ {
+ t += z[i]; z[i] = (uint)t & M28; t >>= 28;
+ }
+ z[15] = z15 + (uint)t;
+ }
+
+ public static void Sqr(uint[] x, uint[] z)
+ {
+ uint x0 = x[0];
+ uint x1 = x[1];
+ uint x2 = x[2];
+ uint x3 = x[3];
+ uint x4 = x[4];
+ uint x5 = x[5];
+ uint x6 = x[6];
+ uint x7 = x[7];
+
+ uint u0 = x[8];
+ uint u1 = x[9];
+ uint u2 = x[10];
+ uint u3 = x[11];
+ uint u4 = x[12];
+ uint u5 = x[13];
+ uint u6 = x[14];
+ uint u7 = x[15];
+
+ uint x0_2 = x0 * 2;
+ uint x1_2 = x1 * 2;
+ uint x2_2 = x2 * 2;
+ uint x3_2 = x3 * 2;
+ uint x4_2 = x4 * 2;
+ uint x5_2 = x5 * 2;
+ uint x6_2 = x6 * 2;
+
+ uint u0_2 = u0 * 2;
+ uint u1_2 = u1 * 2;
+ uint u2_2 = u2 * 2;
+ uint u3_2 = u3 * 2;
+ uint u4_2 = u4 * 2;
+ uint u5_2 = u5 * 2;
+ uint u6_2 = u6 * 2;
+
+ uint s0 = x0 + u0;
+ uint s1 = x1 + u1;
+ uint s2 = x2 + u2;
+ uint s3 = x3 + u3;
+ uint s4 = x4 + u4;
+ uint s5 = x5 + u5;
+ uint s6 = x6 + u6;
+ uint s7 = x7 + u7;
+
+ uint s0_2 = s0 * 2;
+ uint s1_2 = s1 * 2;
+ uint s2_2 = s2 * 2;
+ uint s3_2 = s3 * 2;
+ uint s4_2 = s4 * 2;
+ uint s5_2 = s5 * 2;
+ uint s6_2 = s6 * 2;
+
+ uint z0, z1, z2, z3, z4, z5, z6, z7, z8, z9, z10, z11, z12, z13, z14, z15;
+ ulong c, d;
+
+ ulong f0 = (ulong)x0 * x0;
+ ulong f8 = (ulong)x7 * x1_2
+ + (ulong)x6 * x2_2
+ + (ulong)x5 * x3_2
+ + (ulong)x4 * x4;
+ ulong g0 = (ulong)u0 * u0;
+ ulong g8 = (ulong)u7 * u1_2
+ + (ulong)u6 * u2_2
+ + (ulong)u5 * u3_2
+ + (ulong)u4 * u4;
+ ulong h0 = (ulong)s0 * s0;
+ ulong h8 = (ulong)s7 * s1_2
+ + (ulong)s6 * s2_2
+ + (ulong)s5 * s3_2
+ + (ulong)s4 * s4;
+
+ c = f0 + g0 + h8 - f8;
+ z0 = (uint)c & M28; c >>= 28;
+ d = g8 + h0 - f0 + h8;
+ z8 = (uint)d & M28; d >>= 28;
+
+ ulong f1 = (ulong)x1 * x0_2;
+ ulong f9 = (ulong)x7 * x2_2
+ + (ulong)x6 * x3_2
+ + (ulong)x5 * x4_2;
+ ulong g1 = (ulong)u1 * u0_2;
+ ulong g9 = (ulong)u7 * u2_2
+ + (ulong)u6 * u3_2
+ + (ulong)u5 * u4_2;
+ ulong h1 = (ulong)s1 * s0_2;
+ ulong h9 = (ulong)s7 * s2_2
+ + (ulong)s6 * s3_2
+ + (ulong)s5 * s4_2;
+
+ c += f1 + g1 + h9 - f9;
+ z1 = (uint)c & M28; c >>= 28;
+ d += g9 + h1 - f1 + h9;
+ z9 = (uint)d & M28; d >>= 28;
+
+ ulong f2 = (ulong)x2 * x0_2
+ + (ulong)x1 * x1;
+ ulong f10 = (ulong)x7 * x3_2
+ + (ulong)x6 * x4_2
+ + (ulong)x5 * x5;
+ ulong g2 = (ulong)u2 * u0_2
+ + (ulong)u1 * u1;
+ ulong g10 = (ulong)u7 * u3_2
+ + (ulong)u6 * u4_2
+ + (ulong)u5 * u5;
+ ulong h2 = (ulong)s2 * s0_2
+ + (ulong)s1 * s1;
+ ulong h10 = (ulong)s7 * s3_2
+ + (ulong)s6 * s4_2
+ + (ulong)s5 * s5;
+
+ c += f2 + g2 + h10 - f10;
+ z2 = (uint)c & M28; c >>= 28;
+ d += g10 + h2 - f2 + h10;
+ z10 = (uint)d & M28; d >>= 28;
+
+ ulong f3 = (ulong)x3 * x0_2
+ + (ulong)x2 * x1_2;
+ ulong f11 = (ulong)x7 * x4_2
+ + (ulong)x6 * x5_2;
+ ulong g3 = (ulong)u3 * u0_2
+ + (ulong)u2 * u1_2;
+ ulong g11 = (ulong)u7 * u4_2
+ + (ulong)u6 * u5_2;
+ ulong h3 = (ulong)s3 * s0_2
+ + (ulong)s2 * s1_2;
+ ulong h11 = (ulong)s7 * s4_2
+ + (ulong)s6 * s5_2;
+
+ c += f3 + g3 + h11 - f11;
+ z3 = (uint)c & M28; c >>= 28;
+ d += g11 + h3 - f3 + h11;
+ z11 = (uint)d & M28; d >>= 28;
+
+ ulong f4 = (ulong)x4 * x0_2
+ + (ulong)x3 * x1_2
+ + (ulong)x2 * x2;
+ ulong f12 = (ulong)x7 * x5_2
+ + (ulong)x6 * x6;
+ ulong g4 = (ulong)u4 * u0_2
+ + (ulong)u3 * u1_2
+ + (ulong)u2 * u2;
+ ulong g12 = (ulong)u7 * u5_2
+ + (ulong)u6 * u6;
+ ulong h4 = (ulong)s4 * s0_2
+ + (ulong)s3 * s1_2
+ + (ulong)s2 * s2;
+ ulong h12 = (ulong)s7 * s5_2
+ + (ulong)s6 * s6;
+
+ c += f4 + g4 + h12 - f12;
+ z4 = (uint)c & M28; c >>= 28;
+ d += g12 + h4 - f4 + h12;
+ z12 = (uint)d & M28; d >>= 28;
+
+ ulong f5 = (ulong)x5 * x0_2
+ + (ulong)x4 * x1_2
+ + (ulong)x3 * x2_2;
+ ulong f13 = (ulong)x7 * x6_2;
+ ulong g5 = (ulong)u5 * u0_2
+ + (ulong)u4 * u1_2
+ + (ulong)u3 * u2_2;
+ ulong g13 = (ulong)u7 * u6_2;
+ ulong h5 = (ulong)s5 * s0_2
+ + (ulong)s4 * s1_2
+ + (ulong)s3 * s2_2;
+ ulong h13 = (ulong)s7 * s6_2;
+
+ c += f5 + g5 + h13 - f13;
+ z5 = (uint)c & M28; c >>= 28;
+ d += g13 + h5 - f5 + h13;
+ z13 = (uint)d & M28; d >>= 28;
+
+ ulong f6 = (ulong)x6 * x0_2
+ + (ulong)x5 * x1_2
+ + (ulong)x4 * x2_2
+ + (ulong)x3 * x3;
+ ulong f14 = (ulong)x7 * x7;
+ ulong g6 = (ulong)u6 * u0_2
+ + (ulong)u5 * u1_2
+ + (ulong)u4 * u2_2
+ + (ulong)u3 * u3;
+ ulong g14 = (ulong)u7 * u7;
+ ulong h6 = (ulong)s6 * s0_2
+ + (ulong)s5 * s1_2
+ + (ulong)s4 * s2_2
+ + (ulong)s3 * s3;
+ ulong h14 = (ulong)s7 * s7;
+
+ c += f6 + g6 + h14 - f14;
+ z6 = (uint)c & M28; c >>= 28;
+ d += g14 + h6 - f6 + h14;
+ z14 = (uint)d & M28; d >>= 28;
+
+ ulong f7 = (ulong)x7 * x0_2
+ + (ulong)x6 * x1_2
+ + (ulong)x5 * x2_2
+ + (ulong)x4 * x3_2;
+ ulong g7 = (ulong)u7 * u0_2
+ + (ulong)u6 * u1_2
+ + (ulong)u5 * u2_2
+ + (ulong)u4 * u3_2;
+ ulong h7 = (ulong)s7 * s0_2
+ + (ulong)s6 * s1_2
+ + (ulong)s5 * s2_2
+ + (ulong)s4 * s3_2;
+
+ c += f7 + g7;
+ z7 = (uint)c & M28; c >>= 28;
+ d += h7 - f7;
+ z15 = (uint)d & M28; d >>= 28;
+
+ c += d;
+
+ c += z8;
+ z8 = (uint)c & M28; c >>= 28;
+ d += z0;
+ z0 = (uint)d & M28; d >>= 28;
+ z9 += (uint)c;
+ z1 += (uint)d;
+
+ z[0] = z0;
+ z[1] = z1;
+ z[2] = z2;
+ z[3] = z3;
+ z[4] = z4;
+ z[5] = z5;
+ z[6] = z6;
+ z[7] = z7;
+ z[8] = z8;
+ z[9] = z9;
+ z[10] = z10;
+ z[11] = z11;
+ z[12] = z12;
+ z[13] = z13;
+ z[14] = z14;
+ z[15] = z15;
+ }
+
+ public static void Sqr(uint[] x, int n, uint[] z)
+ {
+ Debug.Assert(n > 0);
+
+ Sqr(x, z);
+
+ while (--n > 0)
+ {
+ Sqr(z, z);
+ }
+ }
+
+ public static void Sub(uint[] x, uint[] y, uint[] z)
+ {
+ uint x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3], x4 = x[4], x5 = x[5], x6 = x[6], x7 = x[7];
+ uint x8 = x[8], x9 = x[9], x10 = x[10], x11 = x[11], x12 = x[12], x13 = x[13], x14 = x[14], x15 = x[15];
+ uint y0 = y[0], y1 = y[1], y2 = y[2], y3 = y[3], y4 = y[4], y5 = y[5], y6 = y[6], y7 = y[7];
+ uint y8 = y[8], y9 = y[9], y10 = y[10], y11 = y[11], y12 = y[12], y13 = y[13], y14 = y[14], y15 = y[15];
+
+ uint z0 = x0 + 0x1FFFFFFEU - y0;
+ uint z1 = x1 + 0x1FFFFFFEU - y1;
+ uint z2 = x2 + 0x1FFFFFFEU - y2;
+ uint z3 = x3 + 0x1FFFFFFEU - y3;
+ uint z4 = x4 + 0x1FFFFFFEU - y4;
+ uint z5 = x5 + 0x1FFFFFFEU - y5;
+ uint z6 = x6 + 0x1FFFFFFEU - y6;
+ uint z7 = x7 + 0x1FFFFFFEU - y7;
+ uint z8 = x8 + 0x1FFFFFFCU - y8;
+ uint z9 = x9 + 0x1FFFFFFEU - y9;
+ uint z10 = x10 + 0x1FFFFFFEU - y10;
+ uint z11 = x11 + 0x1FFFFFFEU - y11;
+ uint z12 = x12 + 0x1FFFFFFEU - y12;
+ uint z13 = x13 + 0x1FFFFFFEU - y13;
+ uint z14 = x14 + 0x1FFFFFFEU - y14;
+ uint z15 = x15 + 0x1FFFFFFEU - y15;
+
+ z2 += z1 >> 28; z1 &= M28;
+ z6 += z5 >> 28; z5 &= M28;
+ z10 += z9 >> 28; z9 &= M28;
+ z14 += z13 >> 28; z13 &= M28;
+
+ z3 += z2 >> 28; z2 &= M28;
+ z7 += z6 >> 28; z6 &= M28;
+ z11 += z10 >> 28; z10 &= M28;
+ z15 += z14 >> 28; z14 &= M28;
+
+ uint t = z15 >> 28; z15 &= M28;
+ z0 += t;
+ z8 += t;
+
+ z4 += z3 >> 28; z3 &= M28;
+ z8 += z7 >> 28; z7 &= M28;
+ z12 += z11 >> 28; z11 &= M28;
+
+ z1 += z0 >> 28; z0 &= M28;
+ z5 += z4 >> 28; z4 &= M28;
+ z9 += z8 >> 28; z8 &= M28;
+ z13 += z12 >> 28; z12 &= M28;
+
+ z[0] = z0;
+ z[1] = z1;
+ z[2] = z2;
+ z[3] = z3;
+ z[4] = z4;
+ z[5] = z5;
+ z[6] = z6;
+ z[7] = z7;
+ z[8] = z8;
+ z[9] = z9;
+ z[10] = z10;
+ z[11] = z11;
+ z[12] = z12;
+ z[13] = z13;
+ z[14] = z14;
+ z[15] = z15;
+ }
+ }
+}
|