summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--crypto/src/math/ec/custom/sec/SecT131Field.cs2
-rw-r--r--crypto/src/math/raw/Interleave.cs40
-rw-r--r--crypto/src/pqc/crypto/cmce/CmceEngine.cs288
-rw-r--r--crypto/src/pqc/crypto/cmce/GF.cs306
4 files changed, 269 insertions, 367 deletions
diff --git a/crypto/src/math/ec/custom/sec/SecT131Field.cs b/crypto/src/math/ec/custom/sec/SecT131Field.cs
index 6088b264c..f2c878d6a 100644
--- a/crypto/src/math/ec/custom/sec/SecT131Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT131Field.cs
@@ -370,7 +370,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
             Interleave.Expand64To128(x, 0, 2, zz, 0);
-            zz[4] = Interleave.Expand8to16((uint)x[2]);
+            zz[4] = Interleave.Expand8to16((byte)x[2]);
         }
     }
 }
diff --git a/crypto/src/math/raw/Interleave.cs b/crypto/src/math/raw/Interleave.cs
index 02aa79551..3e994a43c 100644
--- a/crypto/src/math/raw/Interleave.cs
+++ b/crypto/src/math/raw/Interleave.cs
@@ -12,23 +12,37 @@ namespace Org.BouncyCastle.Math.Raw
         private const ulong M64 = 0x5555555555555555UL;
         private const ulong M64R = 0xAAAAAAAAAAAAAAAAUL;
 
-        internal static uint Expand8to16(uint x)
+        internal static uint Expand8to16(byte x)
         {
-            x &= 0xFFU;
-            x = (x | (x << 4)) & 0x0F0FU;
-            x = (x | (x << 2)) & 0x3333U;
-            x = (x | (x << 1)) & 0x5555U;
-            return x;
+            uint t = x;
+
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.IsSupported)
+            {
+                return Bmi2.ParallelBitDeposit(t, 0x55555555U);
+            }
+#endif
+            t = (t | (t << 4)) & 0x0F0FU;
+            t = (t | (t << 2)) & 0x3333U;
+            t = (t | (t << 1)) & 0x5555U;
+            return t;
         }
 
-        internal static uint Expand16to32(uint x)
+        internal static uint Expand16to32(ushort x)
         {
-            x &= 0xFFFFU;
-            x = (x | (x << 8)) & 0x00FF00FFU;
-            x = (x | (x << 4)) & 0x0F0F0F0FU;
-            x = (x | (x << 2)) & 0x33333333U;
-            x = (x | (x << 1)) & 0x55555555U;
-            return x;
+            uint t = x;
+
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.IsSupported)
+            {
+                return Bmi2.ParallelBitDeposit(t, 0x55555555U);
+            }
+#endif
+            t = (t | (t << 8)) & 0x00FF00FFU;
+            t = (t | (t << 4)) & 0x0F0F0F0FU;
+            t = (t | (t << 2)) & 0x33333333U;
+            t = (t | (t << 1)) & 0x55555555U;
+            return t;
         }
 
         internal static ulong Expand32to64(uint x)
diff --git a/crypto/src/pqc/crypto/cmce/CmceEngine.cs b/crypto/src/pqc/crypto/cmce/CmceEngine.cs
index 64a0fbce3..0d62b57ca 100644
--- a/crypto/src/pqc/crypto/cmce/CmceEngine.cs
+++ b/crypto/src/pqc/crypto/cmce/CmceEngine.cs
@@ -52,7 +52,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
 
         public int DefaultSessionKeySize => defaultKeySize;
 
-        public CmceEngine(int m, int n, int t, int[] p, bool usePivots, int defaultKeySize)
+        internal CmceEngine(int m, int n, int t, int[] p, bool usePivots, int defaultKeySize)
         {
             this.usePivots = usePivots;
             this.SYS_N = n;
@@ -71,21 +71,21 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             SYND_BYTES = (PK_NROWS + 7) / 8;
             GFMASK = (1 << GFBITS) - 1;
 
-
             if (GFBITS == 12)
             {
-                gf = new GF12(GFBITS);
+                gf = new GF12();
                 benes = new Benes12(SYS_N, SYS_T, GFBITS);
             }
             else
             {
-                gf = new GF13(GFBITS);
+                gf = new GF13();
                 benes = new Benes13(SYS_N, SYS_T, GFBITS);
 
             }
             usePadding = SYS_T % 8 != 0;
             countErrorIndices = (1 << GFBITS) > SYS_N;
         }
+
         public byte[] GeneratePublicKeyFromPrivateKey(byte[] sk)
         {
             byte[] pk = new byte[PublicKeySize];
@@ -123,20 +123,18 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             // generate hash using the seed given in the sk (64 || first 32 bytes)
             byte[] hash = new byte[(SYS_N / 8) + ((1 << GFBITS) * 4) + IRR_BYTES + 32];
 
-            int hash_idx = 0;
             IDigest digest = DigestUtilities.GetDigest(NistObjectIdentifiers.IdShake256);
             digest.Update((byte)64);
             digest.BlockUpdate(sk, 0, 32); // input
             ((IXof)digest).OutputFinal(hash, 0, hash.Length);
 
-
             // generate g
             if (sk.Length <= 40)
             {
                 ushort[] field = new ushort[SYS_T];
 
                 byte[] reg_g = new byte[IRR_BYTES];
-                hash_idx = hash.Length - 32 - IRR_BYTES;
+                int hash_idx = hash.Length - 32 - IRR_BYTES;
                 for (int i = 0; i < SYS_T; i++)
                 {
                     field[i] = Utils.LoadGF(hash, hash_idx + i * 2, GFMASK);
@@ -156,7 +154,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                 uint[] perm = new uint[1 << GFBITS];
                 ushort[] pi = new ushort[1 << GFBITS];
 
-                hash_idx = hash.Length - 32 - IRR_BYTES - ((1 << GFBITS) * 4);
+                int hash_idx = hash.Length - 32 - IRR_BYTES - ((1 << GFBITS) * 4);
                 for (int i = 0; i < (1 << GFBITS); i++)
                 {
                     perm[i] = Utils.Load4(hash, hash_idx + i * 4);
@@ -172,10 +170,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                     long[] buf = new long[1 << GFBITS];
                     for (int i = 0; i < (1 << GFBITS); i++)
                     {
-                        buf[i] = perm[i];
-                        buf[i] <<= 31;
-                        buf[i] |= (uint)i;
-                        buf[i] &= 0x7fffffffffffffffL; // getting rid of signed longs
+                        buf[i] = ((long)perm[i] << 31) | (uint)i;
                     }
                     Sort64(buf, 0, buf.Length);
                     for (int i = 0; i < (1 << GFBITS); i++)
@@ -184,7 +179,6 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                     }
                 }
 
-
                 byte[] output = new byte[COND_BYTES];
                 ControlBitsFromPermutation(output, pi, GFBITS, 1 << GFBITS);
                 //copy the controlbits from the permutation to the private key
@@ -1348,10 +1342,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             long[] buf = new long[1 << GFBITS];
             for (i = 0; i < (1 << GFBITS); i++)
             {
-                buf[i] = perm[i];
-                buf[i] <<= 31;
-                buf[i] |= (uint)i;
-                // buf[i] &= 0x7fffffffffffffffL; // getting rid of signed longs
+                buf[i] = ((long)perm[i] << 31) | (uint)i;
             }
             // Sort32 the buffer
 
@@ -1431,34 +1422,84 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             // gaussian elimination
             int row, c;
             byte mask;
-            for (i = 0; i < (PK_NROWS + 7) / 8; i++)
+            for (row = 0; row < PK_NROWS; row++)
             {
-                for (j = 0; j < 8; j++)
+                i = row >> 3;
+                j = row & 7;
+
+                if (usePivots)
                 {
-                    row = i * 8 + j;
+                    if (row == PK_NROWS - 32)
+                    {
+                        if (MovColumns(mat, pi, pivots) != 0)
+                        {
+                            //System.out.println("failed mov column!");
+                            return -1;
+                        }
+                    }
+                }
+
+                byte[] mat_row = mat[row];
 
-                    if (row >= PK_NROWS)
-                        break;
+                for (k = row + 1; k < PK_NROWS; k++)
+                {
+                    byte[] mat_k = mat[k];
+                    mask = (byte)(mat_row[i] ^ mat_k[i]);
+                    mask >>= j;
+                    mask &= 1;
 
-                    byte[] mat_row = mat[row];
+                    c = 0;
 
-                    if (usePivots)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+                    if (Vector.IsHardwareAccelerated)
                     {
-                        if (row == PK_NROWS - 32)
+                        var vm = new Vector<byte>((byte)-mask);
+                        int limit = (SYS_N / 8) - Vector<byte>.Count;
+                        while (c <= limit)
                         {
-                            if (MovColumns(mat, pi, pivots) != 0)
-                            {
-                                //System.out.println("failed mov column!");
-                                return -1;
-                            }
+                            var vk = new Vector<byte>(mat_k, c);
+                            var vr = new Vector<byte>(mat_row, c);
+                            ((vk & vm) ^ vr).CopyTo(mat_row, c);
+                            c += Vector<byte>.Count;
+                        }
+                    }
+                    {
+                        ulong mask64 = 0UL - mask;
+                        int limit = (SYS_N / 8) - 8;
+                        while (c <= limit)
+                        {
+                            ulong t0 = MemoryMarshal.Read<ulong>(mat_k.AsSpan(c));
+                            ulong t1 = MemoryMarshal.Read<ulong>(mat_row.AsSpan(c));
+                            t1 ^= t0 & mask64;
+                            MemoryMarshal.Write(mat_row.AsSpan(c), ref t1);
+                            c += 8;
                         }
                     }
+#endif
+                    {
+                        byte maskByte = (byte)-mask;
+                        while (c < SYS_N / 8)
+                        {
+                            mat_row[c] ^= (byte)(mat_k[c] & maskByte);
+                            ++c;
+                        }
+                    }
+                }
+
+                // 7. Compute (T,cn−k−μ+1,...,cn−k,Γ′) ← MatGen(Γ). If this fails, set δ ← δ′ and
+                // restart the algorithm.
+                if (((mat_row[i] >> j) & 1) == 0) // return if not systematic
+                {
+                    //System.out.println("FAIL 2\n");
+                    return -1;
+                }
 
-                    for (k = row + 1; k < PK_NROWS; k++)
+                for (k = 0; k < PK_NROWS; k++)
+                {
+                    if (k != row)
                     {
                         byte[] mat_k = mat[k];
-                        mask = (byte)(mat_row[i] ^ mat_k[i]);
-                        mask >>= j;
+                        mask = (byte)(mat_k[i] >> j);
                         mask &= 1;
 
                         c = 0;
@@ -1470,9 +1511,9 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                             int limit = (SYS_N / 8) - Vector<byte>.Count;
                             while (c <= limit)
                             {
-                                var vk = new Vector<byte>(mat_k, c);
                                 var vr = new Vector<byte>(mat_row, c);
-                                ((vk & vm) ^ vr).CopyTo(mat_row, c);
+                                var vk = new Vector<byte>(mat_k, c);
+                                ((vr & vm) ^ vk).CopyTo(mat_k, c);
                                 c += Vector<byte>.Count;
                             }
                         }
@@ -1481,10 +1522,10 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                             int limit = (SYS_N / 8) - 8;
                             while (c <= limit)
                             {
-                                ulong t0 = MemoryMarshal.Read<ulong>(mat_k.AsSpan(c));
-                                ulong t1 = MemoryMarshal.Read<ulong>(mat_row.AsSpan(c));
+                                ulong t0 = MemoryMarshal.Read<ulong>(mat_row.AsSpan(c));
+                                ulong t1 = MemoryMarshal.Read<ulong>(mat_k.AsSpan(c));
                                 t1 ^= t0 & mask64;
-                                MemoryMarshal.Write(mat_row.AsSpan(c), ref t1);
+                                MemoryMarshal.Write(mat_k.AsSpan(c), ref t1);
                                 c += 8;
                             }
                         }
@@ -1493,73 +1534,11 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                             byte maskByte = (byte)-mask;
                             while (c < SYS_N / 8)
                             {
-                                mat_row[c] ^= (byte)(mat_k[c] & maskByte);
+                                mat_k[c] ^= (byte)(mat_row[c] & maskByte);
                                 ++c;
                             }
                         }
                     }
-
-                    // 7. Compute (T,cn−k−μ+1,...,cn−k,Γ′) ← MatGen(Γ). If this fails, set δ ← δ′ and
-                    // restart the algorithm.
-                    if (((mat_row[i] >> j) & 1) == 0) // return if not systematic
-                    {
-                        //System.out.println("FAIL 2\n");
-                        return -1;
-                    }
-
-                    for (k = 0; k < PK_NROWS; k++)
-                    {
-                        if (k != row)
-                        {
-                            byte[] mat_k = mat[k];
-                            mask = (byte)(mat_k[i] >> j);
-                            mask &= 1;
-
-                            //mask = (byte)-mask;
-
-                            //for (c = 0; c < SYS_N / 8; c++)
-                            //{
-                            //    mat_k[c] ^= (byte)(mat_row[c] & mask);
-                            //}
-
-                            c = 0;
-
-#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
-                            if (Vector.IsHardwareAccelerated)
-                            {
-                                var vm = new Vector<byte>((byte)-mask);
-                                int limit = (SYS_N / 8) - Vector<byte>.Count;
-                                while (c <= limit)
-                                {
-                                    var vr = new Vector<byte>(mat_row, c);
-                                    var vk = new Vector<byte>(mat_k, c);
-                                    ((vr & vm) ^ vk).CopyTo(mat_k, c);
-                                    c += Vector<byte>.Count;
-                                }
-                            }
-                            {
-                                ulong mask64 = 0UL - mask;
-                                int limit = (SYS_N / 8) - 8;
-                                while (c <= limit)
-                                {
-                                    ulong t0 = MemoryMarshal.Read<ulong>(mat_row.AsSpan(c));
-                                    ulong t1 = MemoryMarshal.Read<ulong>(mat_k.AsSpan(c));
-                                    t1 ^= t0 & mask64;
-                                    MemoryMarshal.Write(mat_k.AsSpan(c), ref t1);
-                                    c += 8;
-                                }
-                            }
-#endif
-                            {
-                                byte maskByte = (byte)-mask;
-                                while (c < SYS_N / 8)
-                                {
-                                    mat_k[c] ^= (byte)(mat_row[c] & maskByte);
-                                    ++c;
-                                }
-                            }
-                        }
-                    }
                 }
             }
 
@@ -1568,27 +1547,23 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             {
                 if (usePadding)
                 {
-                    int tail, pk_index = 0;
-                    tail = PK_NROWS % 8;
+                    int pk_index = 0, tail = PK_NROWS % 8;
                     for (i = 0; i < PK_NROWS; i++)
                     {
+                        byte[] mat_i = mat[i];
                         for (j = (PK_NROWS - 1) / 8; j < SYS_N / 8 - 1; j++)
                         {
-                            pk[pk_index++] = (byte)(((mat[i][j] & 0xff) >> tail) | (mat[i][j + 1] << (8 - tail)));
+                            pk[pk_index++] = (byte)(((mat_i[j] & 0xff) >> tail) | (mat_i[j + 1] << (8 - tail)));
                         }
-                        pk[pk_index++] = (byte)((mat[i][j] & 0xff) >> tail);
+                        pk[pk_index++] = (byte)((mat_i[j] & 0xff) >> tail);
                     }
                 }
                 else
                 {
+                    int count = (SYS_N - PK_NROWS + 7) / 8;
                     for (i = 0; i < PK_NROWS; i++)
                     {
-                        k = 0;
-                        for (j = 0; j < (((SYS_N - PK_NROWS) + 7) / 8); j++)
-                        {
-                            pk[i * (((SYS_N - PK_NROWS) + 7) / 8) + k] = mat[i][j + PK_NROWS / 8];
-                            k++;
-                        }
+                        Array.Copy(mat[i], PK_NROWS / 8, pk, count * i, count);
                     }
                 }
             }
@@ -1597,9 +1572,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
 
         private ushort Eval(ushort[] f, ushort a)
         {
-            ushort r;
-
-            r = f[SYS_T];
+            ushort r = f[SYS_T];
 
             for (int i = SYS_T - 1; i >= 0; i--)
             {
@@ -1696,19 +1669,19 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
 
         private void GFMul(ushort[] output, ushort[] left, ushort[] right)
         {
-
             ushort[] prod = new ushort[SYS_T * 2 - 1];
-            for (int i = 0; i < SYS_T * 2 - 1; i++)
-            {
-                prod[i] = 0;
-            }
+
             for (int i = 0; i < SYS_T; i++)
             {
-                for (int j = 0; j < SYS_T; j++)
+                ushort left_i = left[i];
+                ushort right_i = right[i];
+
+                for (int j = 0; j < i; j++)
                 {
-                    ushort temp = gf.GFMul(left[i], right[j]);
-                    prod[i + j] ^= temp;
+                    prod[i + j] ^= (ushort)(gf.GFMul(left_i, right[j]) ^ gf.GFMul(left[j], right_i));
                 }
+
+                prod[i + i] ^= gf.GFMul(left_i, right_i);
             }
 
             for (int i = (SYS_T - 1) * 2; i >= SYS_T; i--)
@@ -1717,7 +1690,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                 {
                     if (polyIndex == 0 && GFBITS == 12)
                     {
-                        prod[i - SYS_T] ^= (gf.GFMul(prod[i], (ushort)2));
+                        prod[i - SYS_T] ^= gf.GFMul(prod[i], 2);
                     }
                     else
                     {
@@ -1727,20 +1700,13 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             }
 
             Array.Copy(prod, 0, output, 0, SYS_T);
-            for (int i = 0; i < SYS_T; i++)
-            {
-                output[i] = prod[i];
-            }
         }
 
         /* check if the padding bits of pk are all zero */
         int CheckPKPadding(byte[] pk)
         {
-            byte b;
-            int i, ret;
-
-            b = 0;
-            for (i = 0; i < PK_NROWS; i++)
+            byte b = 0;
+            for (int i = 0; i < PK_NROWS; i++)
             {
                 b |= pk[i * PK_ROW_BYTES + PK_ROW_BYTES - 1];
             }
@@ -1748,7 +1714,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             b = (byte)((b & 0xff) >> (PK_NCOLS % 8));
             b -= 1;
             b = (byte)((b & 0xff) >> 7);
-            ret = b;
+            int ret = b;
 
             return ret - 1;
         }
@@ -1756,31 +1722,29 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
         /* check if the padding bits of c are all zero */
         int CheckCPadding(byte[] c)
         {
-            byte b;
-            int ret;
-
-            b = (byte)((c[SYND_BYTES - 1] & 0xff) >> (PK_NROWS % 8));
+            byte b = (byte)((c[SYND_BYTES - 1] & 0xff) >> (PK_NROWS % 8));
             b -= 1;
             b = (byte)((b & 0xff) >> 7);
-            ret = b;
+            int ret = b;
 
             return ret - 1;
         }
 
-
-
         private static void Sort32(int[] temp, int from, int to)
         {
-            int top, p, q, r, i;
             int n = to - from;
+            if (n < 2)
+                return;
 
-            if (n < 2) return;
-            top = 1;
-            while (top < n - top) top += top;
+            int top = 1;
+            while (top < n - top)
+            {
+                top += top;
+            }
 
-            for (p = top; p > 0; p >>= 1)
+            for (int p = top; p > 0; p >>= 1)
             {
-                for (i = 0; i < n - p; ++i)
+                for (int i = 0; i < n - p; ++i)
                 {
                     if ((i & p) == 0)
                     {
@@ -1793,15 +1757,14 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                         temp[from + i + p] ^= c;
                     }
                 }
-                i = 0;
-                for (q = top; q > p; q >>= 1)
+                for (int q = top; q > p; q >>= 1)
                 {
-                    for (; i < n - q; ++i)
+                    for (int i = 0; i < n - q; ++i)
                     {
                         if ((i & p) == 0)
                         {
                             int a = temp[from + i + p];
-                            for (r = q; r > p; r >>= 1)
+                            for (int r = q; r > p; r >>= 1)
                             {
                                 int ab = temp[from + i + r] ^ a;
                                 int c = temp[from + i + r] - a;
@@ -1820,40 +1783,40 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
 
         private static void Sort64(long[] temp, int from, int to)
         {
-            int top, p, q, r, i;
             int n = to - from;
+            if (n < 2)
+                return;
 
-            if (n < 2) return;
-            top = 1;
-            while (top < n - top) top += top;
+            int top = 1;
+            while (top < n - top)
+            {
+                top += top;
+            }
 
-            for (p = top; p > 0; p >>= 1)
+            for (int p = top; p > 0; p >>= 1)
             {
-                for (i = 0; i < n - p; ++i)
+                for (int i = 0; i < n - p; ++i)
                 {
                     if ((i & p) == 0)
                     {
                         long c = temp[from + i + p] - temp[from + i];
                         c >>= 63;
-                        // c = -c;
                         c &= temp[from + i] ^ temp[from + i + p];
                         temp[from + i] ^= c;
                         temp[from + i + p] ^= c;
                     }
                 }
-                i = 0;
-                for (q = top; q > p; q >>= 1)
+                for (int q = top; q > p; q >>= 1)
                 {
-                    for (; i < n - q; ++i)
+                    for (int i = 0; i < n - q; ++i)
                     {
                         if ((i & p) == 0)
                         {
                             long a = temp[from + i + p];
-                            for (r = q; r > p; r >>= 1)
+                            for (int r = q; r > p; r >>= 1)
                             {
                                 long c = temp[from + i + r] - a;
                                 c >>= 63;
-                                // c = -c;
                                 c &= a ^ temp[from + i + r];
                                 a ^= c;
                                 temp[from + i + r] ^= c;
@@ -1863,7 +1826,6 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
                     }
                 }
             }
-
         }
     }
 }
diff --git a/crypto/src/pqc/crypto/cmce/GF.cs b/crypto/src/pqc/crypto/cmce/GF.cs
index b0e9e7c5e..02c14f971 100644
--- a/crypto/src/pqc/crypto/cmce/GF.cs
+++ b/crypto/src/pqc/crypto/cmce/GF.cs
@@ -1,99 +1,53 @@
 using System;
+using System.Diagnostics;
+
+using Org.BouncyCastle.Math.Raw;
 
 namespace Org.BouncyCastle.Pqc.Crypto.Cmce
 {
-    abstract class GF
+    internal abstract class GF
     {
-        protected int GFBITS;
-        protected int GFMASK; //  = ((1 << GFBITS) - 1);
-
-        public GF(int gfbits)
+        internal GF()
         {
-            GFBITS = gfbits;
-            GFMASK = ((1 << GFBITS) - 1);
-
         }
 
         internal ushort GFIsZero(ushort a)
         {
-            int t = a;
-
-            t -= 1;
-            t >>= 19;
-
-            return (ushort) t;
+            return (ushort)((a - 1) >> 31);
         }
 
         internal ushort GFAdd(ushort left, ushort right)
         {
-            return (ushort) (left ^ right);
+            return (ushort)(left ^ right);
         }
 
-        public abstract ushort GFMul(ushort left, ushort right);
-        internal abstract protected ushort GFFrac(ushort den, ushort num);
-        internal abstract protected ushort GFInv(ushort input);
-
+        internal abstract ushort GFMul(ushort left, ushort right);
+        internal abstract ushort GFFrac(ushort den, ushort num);
+        internal abstract ushort GFInv(ushort input);
     }
 
-    class GF12
+    internal class GF12
         : GF
     {
-        
-        public GF12(int gfbits)
-            :base(gfbits)
+        internal GF12()
         {
         }
 
-        public override ushort GFMul(ushort left, ushort right)
+        internal override ushort GFMul(ushort left, ushort right)
         {
-            int temp, temp_left, temp_right, t;
-            temp_left = left;
-            temp_right = right;
-            temp = temp_left * (temp_right & 1);
+            int x = left;
+            int y = right;
 
-            for (int i = 1; i < GFBITS; i++)
+            int z = x * (y & 1);
+            for (int i = 1; i < 12; i++)
             {
-                temp ^= (temp_left * (temp_right & (1<<i)));
+                z ^= x * (y & (1 << i));
             }
 
-            t = (temp & 0x7FC000);
-            temp ^= t >> 9;
-            temp ^= t >> 12;
-
-            t = (temp & 0x3000);
-            temp ^= t >> 9;
-            temp ^= t >> 12;
-
-            ushort res = (ushort) ( temp & ((1 << GFBITS)-1));
-            return res;
-
-        }
-
-        protected ushort GFSq(ushort input)
-        {
-            int[] B = {0x55555555, 0x33333333, 0x0F0F0F0F, 0x00FF00FF};
-            int x = input;
-            int t;
-
-            x = (x | (x << 8)) & B[3];
-            x = (x | (x << 4)) & B[2];
-            x = (x | (x << 2)) & B[1];
-            x = (x | (x << 1)) & B[0];
-
-            t = x & 0x7FC000;
-
-            x ^= t >> 9;
-            x ^= t >> 12;
-
-            t = x & 0x3000;
-
-            x ^= t >> 9;
-            x ^= t >> 12;
-
-            return (ushort) (x & ((1 << GFBITS)-1));
+            return Reduce((uint)z);
         }
 
-        protected internal override ushort GFInv(ushort input)
+        internal override ushort GFInv(ushort input)
         {
             ushort tmp_11;
             ushort tmp_1111;
@@ -123,180 +77,152 @@ namespace Org.BouncyCastle.Pqc.Crypto.Cmce
             return GFSq(output); // 111111111110
         }
 
-        protected internal override ushort GFFrac(ushort den, ushort num)
+        internal override ushort GFFrac(ushort den, ushort num)
         {
             return GFMul(GFInv(den), num);
         }
 
+        private static ushort GFSq(ushort input)
+        {
+            uint z = Interleave.Expand16to32(input);
+            return Reduce(z);
+        }
+
+        private static ushort Reduce(uint x)
+        {
+            Debug.Assert((x >> 24) == 0);
+
+            uint u0 = x & 0x00000FFFU;
+            uint u1 = x >> 12;
+            uint u2 = (x & 0x001FF000U) >> 9;
+            uint u3 = (x & 0x00E00000U) >> 18;
+            uint u4 = x >> 21;
+
+            return (ushort)(u0 ^ u1 ^ u2 ^ u3 ^ u4);
+        }
     }
 
-    class GF13
+    internal class GF13
         : GF
     {
-        public GF13(int gfbits)
-    	    :base(gfbits)
+        private const int GFMASK = (1 << 13) - 1;
+
+        internal GF13()
         {
         }
 
-        public override ushort GFMul(ushort in0, ushort in1)
+        internal override ushort GFMul(ushort in0, ushort in1)
         {
-            int i;
-
-            long tmp;
-            long t0;
-            long t1;
-            long t;
+            int x = in0;
+            int y = in1;
 
-            t0 = in0;
-            t1 = in1;
-
-            tmp = t0 * (t1 & 1);
+            int z = x * (y & 1);
+            for (int i = 1; i < 13; i++)
+            {
+                z ^= x * (y & (1 << i));
+            }
 
-            for (i = 1; i < GFBITS; i++)
-                tmp ^= (t0 * (t1 & (1 << i)));
+            return Reduce((uint)z);
+        }
 
-            //
+        /* input: field element den, num */
+        /* return: (num/den) */
+        internal override ushort GFFrac(ushort den, ushort num)
+        {
+            ushort tmp_11, tmp_1111, output;
 
-            t = tmp & 0x1FF0000L;
-            tmp ^= (t >> 9) ^ (t >> 10) ^ (t >> 12) ^ (t >> 13);
+            tmp_11 = GFSqMul(den, den); // ^11
+            tmp_1111 = GFSq2Mul(tmp_11, tmp_11); // ^1111
+            output = GFSq2(tmp_1111);
+            output = GFSq2Mul(output, tmp_1111); // ^11111111
+            output = GFSq2(output);
+            output = GFSq2Mul(output, tmp_1111); // ^111111111111
 
-            t = tmp & 0x000E000L;
-            tmp ^= (t >> 9) ^ (t >> 10) ^ (t >> 12) ^ (t >> 13);
+            return GFSqMul(output, num); // ^1111111111110 = ^-1
+        }
 
-            return (ushort) (tmp & GFMASK);
+        internal override ushort GFInv(ushort den)
+        {
+            return GFFrac(den, 1);
         }
 
         /* input: field element in */
         /* return: (in^2)^2 */
-        protected ushort GFSq2(ushort input)
+        private static ushort GFSq2(ushort input)
         {
-            int i;
-
-            long[] B = {0x1111111111111111L,
-                        0x0303030303030303L,
-                        0x000F000F000F000FL,
-                        0x000000FF000000FFL};
-
-            long[] M = {0x0001FF0000000000L,
-                        0x000000FF80000000L,
-                        0x000000007FC00000L,
-                        0x00000000003FE000L};
-
-            long x = input;
-            long t;
-
-            x = (x | (x << 24)) & B[3];
-            x = (x | (x << 12)) & B[2];
-            x = (x | (x << 6)) & B[1];
-            x = (x | (x << 3)) & B[0];
-
-            for (i = 0; i < 4; i++)
-            {
-                t = x & M[i];
-                x ^= (t >> 9) ^ (t >> 10) ^ (t >> 12) ^ (t >> 13);
-            }
-
-            return (ushort) (x & GFMASK);
+            uint z1 = Interleave.Expand16to32(input);
+            input = Reduce(z1);
+            uint z2 = Interleave.Expand16to32(input);
+            return Reduce(z2);
         }
 
         /* input: field element in, m */
         /* return: (in^2)*m */
-        private ushort GFSqMul(ushort input, ushort m)
+        private static ushort GFSqMul(ushort input, ushort m)
         {
-            int i;
-
-            long x;
-            long t0;
-            long t1;
-            long t;
-
-            long[] M = {0x0000001FF0000000L,
-                        0x000000000FF80000L,
-                        0x000000000007E000L};
-
-            t0 = input;
-            t1 = m;
+            long t0 = input;
+            long t1 = m;
 
-            x = (t1 << 6) * (t0 & (1 << 6));
+            long x = (t1 << 6) * (t0 & (1 << 6));
 
-            t0 ^= (t0 << 7);
+            t0 ^= t0 << 7;
 
-            x ^= (t1 * (t0 & (0x04001)));
-            x ^= (t1 * (t0 & (0x08002))) << 1;
-            x ^= (t1 * (t0 & (0x10004))) << 2;
-            x ^= (t1 * (t0 & (0x20008))) << 3;
-            x ^= (t1 * (t0 & (0x40010))) << 4;
-            x ^= (t1 * (t0 & (0x80020))) << 5;
+            x ^= (t1 << 0) * (t0 & 0x04001);
+            x ^= (t1 << 1) * (t0 & 0x08002);
+            x ^= (t1 << 2) * (t0 & 0x10004);
+            x ^= (t1 << 3) * (t0 & 0x20008);
+            x ^= (t1 << 4) * (t0 & 0x40010);
+            x ^= (t1 << 5) * (t0 & 0x80020);
 
-            for (i = 0; i < 3; i++)
-            {
-                t = x & M[i];
-                x ^= (t >> 9) ^ (t >> 10) ^ (t >> 12) ^ (t >> 13);
-            }
+            long t;
+            t  = x & 0x0000001FFC000000L;
+            x ^= (t >> 18) ^ (t >> 20) ^ (t >> 24) ^ (t >> 26);
 
-            return (ushort) (x & GFMASK);
+            return Reduce((uint)(x & 0x03FFFFFFU));
         }
 
         /* input: field element in, m */
         /* return: ((in^2)^2)*m */
-        private ushort GFSq2Mul(ushort input, ushort m)
+        private static ushort GFSq2Mul(ushort input, ushort m)
         {
-            int i;
-
-            long x;
-            long t0;
-            long t1;
-            long t;
+            long t0 = input;
+            long t1 = m;
 
-            long[] M = {0x1FF0000000000000L,
-                        0x000FF80000000000L,
-                        0x000007FC00000000L,
-                        0x00000003FE000000L,
-                        0x0000000001FE0000L,
-                        0x000000000001E000L};
+            long x = (t1 << 18) * (t0 & (1 << 6));
 
-            t0 = input;
-            t1 = m;
+            t0 ^= t0 << 21;
 
-            x = (t1 << 18) * (t0 & (1 << 6));
+            x ^= (t1 <<  0) * (t0 & (0x010000001L));
+            x ^= (t1 <<  3) * (t0 & (0x020000002L));
+            x ^= (t1 <<  6) * (t0 & (0x040000004L));
+            x ^= (t1 <<  9) * (t0 & (0x080000008L));
+            x ^= (t1 << 12) * (t0 & (0x100000010L));
+            x ^= (t1 << 15) * (t0 & (0x200000020L));
 
-            t0 ^= (t0 << 21);
-
-            x ^= (t1 * (t0 & (0x010000001L)));
-            x ^= (t1 * (t0 & (0x020000002L))) << 3;
-            x ^= (t1 * (t0 & (0x040000004L))) << 6;
-            x ^= (t1 * (t0 & (0x080000008L))) << 9;
-            x ^= (t1 * (t0 & (0x100000010L))) << 12;
-            x ^= (t1 * (t0 & (0x200000020L))) << 15;
+            long t;
+            t  = x & 0x1FFFF80000000000L;
+            x ^= (t >> 18) ^ (t >> 20) ^ (t >> 24) ^ (t >> 26);
 
-            for (i = 0; i < 6; i++)
-            {
-                t = x & M[i];
-                x ^= (t >> 9) ^ (t >> 10) ^ (t >> 12) ^ (t >> 13);
-            }
+            t  = x & 0x000007FFFC000000L;
+            x ^= (t >> 18) ^ (t >> 20) ^ (t >> 24) ^ (t >> 26);
 
-            return (ushort) (x & GFMASK);
+            return Reduce((uint)x & 0x03FFFFFFU);
         }
 
-        /* input: field element den, num */
-        /* return: (num/den) */
-        protected internal override ushort GFFrac(ushort den, ushort num)
+        private static ushort Reduce(uint x)
         {
-            ushort tmp_11, tmp_1111, output;
+            Debug.Assert((x >> 26) == 0);
 
-            tmp_11 = GFSqMul(den, den); // ^11
-            tmp_1111 = GFSq2Mul(tmp_11, tmp_11); // ^1111
-            output = GFSq2(tmp_1111);
-            output = GFSq2Mul(output, tmp_1111); // ^11111111
-            output = GFSq2(output);
-            output = GFSq2Mul(output, tmp_1111); // ^111111111111
+            uint u0 = x & 0x00001FFFU;
+            uint u1 = x >> 13;
 
-            return GFSqMul(output, num); // ^1111111111110 = ^-1
-        }
+            uint t2 = (u1 << 4) ^ (u1 << 3) ^ (u1 << 1);
 
-        protected internal override ushort GFInv(ushort den)
-        {
-            return GFFrac(den, ((ushort) 1));
+            uint u2 = t2 >> 13;
+            uint u3 = t2 & 0x00001FFFU;
+            uint u4 = (u2 << 4) ^ (u2 << 3) ^ (u2 << 1);
+
+            return (ushort)(u0 ^ u1 ^ u2 ^ u3 ^ u4);
         }
     }
 }