summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-10-09 19:23:18 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-10-09 19:23:18 +0700
commitf162d89ee52649716317f096847b89cb356effb7 (patch)
tree94bb4a836f4f0a69bef2211986063608325fc521
parentRound out Bits methods (diff)
downloadBouncyCastle.NET-ed25519-f162d89ee52649716317f096847b89cb356effb7.tar.xz
Haraka refactoring (performance)
-rw-r--r--crypto/src/pqc/crypto/sphincsplus/HarakaSBase.cs359
1 files changed, 122 insertions, 237 deletions
diff --git a/crypto/src/pqc/crypto/sphincsplus/HarakaSBase.cs b/crypto/src/pqc/crypto/sphincsplus/HarakaSBase.cs
index bad948a22..b7ee8e2aa 100644
--- a/crypto/src/pqc/crypto/sphincsplus/HarakaSBase.cs
+++ b/crypto/src/pqc/crypto/sphincsplus/HarakaSBase.cs
@@ -1,5 +1,7 @@
 using System;
 
+using Org.BouncyCastle.Crypto.Utilities;
+using Org.BouncyCastle.Math.Raw;
 using Org.BouncyCastle.Utilities;
 
 namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
@@ -42,20 +44,10 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             Arrays.Clear(buffer);
         }
 
-        private void BrRangeDec32Le(byte[] input, uint[] output, int inputPos)
-        {
-            int tmp;
-            for (int i = 0; i < output.Length; ++i)
-            {
-                tmp = inputPos + (i << 2);
-                output[i] = (uint)(input[tmp] & 0xFF) | (uint)((input[tmp + 1] << 8) & 0xFF00) | (uint)((input[tmp + 2] << 16) & 0xFF0000) | (uint)(input[tmp + 3] << 24);
-            }
-        }
-
-        protected void InterleaveConstant(ulong[] output, byte[] input, int startPos)
+        protected static void InterleaveConstant(ulong[] output, byte[] input, int startPos)
         {
             uint[] tmp_32_constant = new uint[16];
-            BrRangeDec32Le(input, tmp_32_constant, startPos);
+            Pack.LE_To_UInt32(input, startPos, tmp_32_constant);
             for (int i = 0; i < 4; ++i)
             {
                 BrAesCt64InterleaveIn(output, i, tmp_32_constant, i << 2);
@@ -63,71 +55,63 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             BrAesCt64Ortho(output);
         }
 
-        protected void InterleaveConstant32(uint[] output, byte[] input, int startPos)
+        protected static void InterleaveConstant32(uint[] output, byte[] input, int startPos)
         {
             for (int i = 0; i < 4; ++i)
             {
-                output[i << 1] = BrDec32Le(input, startPos + (i << 2));
-                output[(i << 1) + 1] = BrDec32Le(input, startPos + (i << 2) + 16);
+                output[i << 1] = Pack.LE_To_UInt32(input, startPos + (i << 2));
+                output[(i << 1) + 1] = Pack.LE_To_UInt32(input, startPos + (i << 2) + 16);
             }
             BrAesCtOrtho(output);
         }
 
-        private uint BrDec32Le(byte[] input, int startPos)
-        {
-            return (uint)(input[startPos] & 0xFF) | (uint)((input[startPos + 1] << 8) & 0xFF00)
-                | (((uint)input[startPos + 2] << 16) & 0xFF0000) | ((uint)input[startPos + 3] << 24);
-        }
-
         protected void Haraka512Perm(byte[] output)
         {
             uint[] w = new uint[16];
             ulong[] q = new ulong[8];
-            ulong tmp_q;
-            int i, j;
-            BrRangeDec32Le(buffer, w, 0);
-            for (i = 0; i < 4; ++i)
+            Pack.LE_To_UInt32(buffer, 0, w);
+            for (int i = 0; i < 4; ++i)
             {
                 BrAesCt64InterleaveIn(q, i, w, i << 2);
             }
             BrAesCt64Ortho(q);
-            for (i = 0; i < 5; ++i)
+            for (int i = 0; i < 5; ++i)
             {
-                for (j = 0; j < 2; ++j)
+                for (int j = 0; j < 2; ++j)
                 {
                     BrAesCt64BitsliceSbox(q);
                     ShiftRows(q);
                     MixColumns(q);
                     AddRoundKey(q, haraka512_rc[(i << 1) + j]);
                 }
-                for (j = 0; j < 8; j++)
+                for (int j = 0; j < 8; j++)
                 {
-                    tmp_q = q[j];
-                    q[j] = (tmp_q & 0x0001000100010001L) << 5 |
-                        (tmp_q & 0x0002000200020002L) << 12 |
-                        (tmp_q & 0x0004000400040004L) >> 1 |
-                        (tmp_q & 0x0008000800080008L) << 6 |
-                        (tmp_q & 0x0020002000200020L) << 9 |
-                        (tmp_q & 0x0040004000400040L) >> 4 |
-                        (tmp_q & 0x0080008000800080L) << 3 |
-                        (tmp_q & 0x2100210021002100L) >> 5 |
-                        (tmp_q & 0x0210021002100210L) << 2 |
-                        (tmp_q & 0x0800080008000800L) << 4 |
-                        (tmp_q & 0x1000100010001000L) >> 12 |
-                        (tmp_q & 0x4000400040004000L) >> 10 |
-                        (tmp_q & 0x8400840084008400L) >> 3;
+                    ulong t = q[j];
+                    q[j] = (t & 0x0001_0001_0001_0001L) <<  5 |
+                           (t & 0x0002_0002_0002_0002L) << 12 |
+                           (t & 0x0004_0004_0004_0004L) >>  1 |
+                           (t & 0x0008_0008_0008_0008L) <<  6 |
+                           (t & 0x0020_0020_0020_0020L) <<  9 |
+                           (t & 0x0040_0040_0040_0040L) >>  4 |
+                           (t & 0x0080_0080_0080_0080L) <<  3 |
+                           (t & 0x2100_2100_2100_2100L) >>  5 |
+                           (t & 0x0210_0210_0210_0210L) <<  2 |
+                           (t & 0x0800_0800_0800_0800L) <<  4 |
+                           (t & 0x1000_1000_1000_1000L) >> 12 |
+                           (t & 0x4000_4000_4000_4000L) >> 10 |
+                           (t & 0x8400_8400_8400_8400L) >>  3;
                 }
             }
             BrAesCt64Ortho(q);
-            for (i = 0; i < 4; i++)
+            for (int i = 0; i < 4; i++)
             {
                 BrAesCt64InterleaveOut(w, q, i);
             }
-            for (i = 0; i < 16; ++i)
+            for (int i = 0; i < 16; ++i)
             {
-                for (j = 0; j < 4; ++j)
+                for (int j = 0; j < 4; ++j)
                 {
-                    output[(i << 2) + j] = (byte)((w[i] >> (j << 3)) & 0xFF);
+                    output[(i << 2) + j] = (byte)(w[i] >> (j << 3));
                 }
             }
         }
@@ -135,52 +119,36 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
         protected void Haraka256Perm(byte[] output)
         {
             uint[] q = new uint[8];
-            int i, j;
-            uint tmp_q;
             InterleaveConstant32(q, buffer, 0);
-            for (i = 0; i < 5; ++i)
+            for (int i = 0; i < 5; ++i)
             {
-                for (j = 0; j < 2; ++j)
+                for (int j = 0; j < 2; ++j)
                 {
                     BrAesCtBitsliceSbox(q);
                     ShiftRows32(q);
                     MixColumns32(q);
                     AddRoundKey32(q, haraka256_rc[(i << 1) + j]);
                 }
-                for (j = 0; j < 8; j++)
+                for (int j = 0; j < 8; j++)
                 {
-                    tmp_q = q[j];
-                    q[j] = (tmp_q & 0x81818181) |
-                        (tmp_q & 0x02020202) << 1 |
-                        (tmp_q & 0x04040404) << 2 |
-                        (tmp_q & 0x08080808) << 3 |
-                        (tmp_q & 0x10101010) >> 3 |
-                        (tmp_q & 0x20202020) >> 2 |
-                        (tmp_q & 0x40404040) >> 1;
+                    uint t = Bits.BitPermuteStep(q[j], 0x0C_0C_0C_0CU, 2);
+                    q[j]   = Bits.BitPermuteStep(t   , 0x22_22_22_22U, 1);
                 }
             }
             BrAesCtOrtho(q);
-            for (i = 0; i < 4; i++)
+            for (int i = 0; i < 4; i++)
             {
-                BrEnc32Le(output, q[i << 1], i << 2);
-                BrEnc32Le(output, q[(i << 1) + 1], (i << 2) + 16);
+                Pack.UInt32_To_LE(q[i << 1], output, i << 2);
+                Pack.UInt32_To_LE(q[(i << 1) + 1], output, (i << 2) + 16);
             }
 
-            for (i = 0; i < 32; i++)
+            for (int i = 0; i < 32; i++)
             {
                 output[i] ^= buffer[i];
             }
         }
 
-        private void BrEnc32Le(byte[] dst, uint x, int startPos)
-        {
-            for (int i = 0; i < 4; ++i)
-            {
-                dst[startPos + i] = (byte)(x >> (i << 3));
-            }
-        }
-
-        private void BrAesCt64InterleaveIn(ulong[] q, int qPos, uint[] w, int startPos)
+        private static void BrAesCt64InterleaveIn(ulong[] q, int qPos, uint[] w, int startPos)
         {
             ulong x0, x1, x2, x3;
             x0 = (ulong)w[startPos] & 0x00000000FFFFFFFFL;
@@ -382,52 +350,37 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             q[0] = s7;
         }
 
-        private void ShiftRows32(uint[] q)
+        private static void ShiftRows32(uint[] q)
         {
-            uint x;
             for (int i = 0; i < 8; i++)
             {
-                x = q[i];
-                q[i] = (x & 0x000000FF)
-                    | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
-                    | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
-                    | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
+                uint t = Bits.BitPermuteStep(q[i], 0x0C_0F_03_00U, 4);
+                q[i]   = Bits.BitPermuteStep(t   , 0x33_00_33_00U, 2);
             }
         }
 
-        private void MixColumns32(uint[] q)
+        private static void MixColumns32(uint[] q)
         {
-            uint q0, q1, q2, q3, q4, q5, q6, q7;
-            uint r0, r1, r2, r3, r4, r5, r6, r7;
-
-            q0 = q[0];
-            q1 = q[1];
-            q2 = q[2];
-            q3 = q[3];
-            q4 = q[4];
-            q5 = q[5];
-            q6 = q[6];
-            q7 = q[7];
-            r0 = (q0 >> 8) | (q0 << 24);
-            r1 = (q1 >> 8) | (q1 << 24);
-            r2 = (q2 >> 8) | (q2 << 24);
-            r3 = (q3 >> 8) | (q3 << 24);
-            r4 = (q4 >> 8) | (q4 << 24);
-            r5 = (q5 >> 8) | (q5 << 24);
-            r6 = (q6 >> 8) | (q6 << 24);
-            r7 = (q7 >> 8) | (q7 << 24);
-
-            q[0] = q7 ^ r7 ^ r0 ^ Rotr16(q0 ^ r0);
-            q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ Rotr16(q1 ^ r1);
-            q[2] = q1 ^ r1 ^ r2 ^ Rotr16(q2 ^ r2);
-            q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ Rotr16(q3 ^ r3);
-            q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ Rotr16(q4 ^ r4);
-            q[5] = q4 ^ r4 ^ r5 ^ Rotr16(q5 ^ r5);
-            q[6] = q5 ^ r5 ^ r6 ^ Rotr16(q6 ^ r6);
-            q[7] = q6 ^ r6 ^ r7 ^ Rotr16(q7 ^ r7);
+            uint q0 = q[0], r0 = Integers.RotateRight(q0, 8), s0 = q0 ^ r0;
+            uint q1 = q[1], r1 = Integers.RotateRight(q1, 8), s1 = q1 ^ r1;
+            uint q2 = q[2], r2 = Integers.RotateRight(q2, 8), s2 = q2 ^ r2;
+            uint q3 = q[3], r3 = Integers.RotateRight(q3, 8), s3 = q3 ^ r3;
+            uint q4 = q[4], r4 = Integers.RotateRight(q4, 8), s4 = q4 ^ r4;
+            uint q5 = q[5], r5 = Integers.RotateRight(q5, 8), s5 = q5 ^ r5;
+            uint q6 = q[6], r6 = Integers.RotateRight(q6, 8), s6 = q6 ^ r6;
+            uint q7 = q[7], r7 = Integers.RotateRight(q7, 8), s7 = q7 ^ r7;
+
+            q[0] = r0       ^ s7 ^ Integers.RotateRight(s0, 16); 
+            q[1] = r1 ^ s0  ^ s7 ^ Integers.RotateRight(s1, 16); 
+            q[2] = r2 ^ s1       ^ Integers.RotateRight(s2, 16);
+            q[3] = r3 ^ s2  ^ s7 ^ Integers.RotateRight(s3, 16);
+            q[4] = r4 ^ s3  ^ s7 ^ Integers.RotateRight(s4, 16);
+            q[5] = r5 ^ s4       ^ Integers.RotateRight(s5, 16);
+            q[6] = r6 ^ s5       ^ Integers.RotateRight(s6, 16);
+            q[7] = r7 ^ s6       ^ Integers.RotateRight(s7, 16);
         }
 
-        private void AddRoundKey32(uint[] q, uint[] sk)
+        private static void AddRoundKey32(uint[] q, uint[] sk)
         {
             q[0] ^= sk[0];
             q[1] ^= sk[1];
@@ -439,96 +392,51 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             q[7] ^= sk[7];
         }
 
-        private uint Rotr16(uint x)
+        private static void BrAesCt64Ortho(ulong[] q)
         {
-            return (x << 16) | (x >> 16);
-        }
+            ulong q0 = q[0], q1 = q[1], q2 = q[2], q3 = q[3], q4 = q[4], q5 = q[5], q6 = q[6], q7 = q[7];
 
-        private void BrAesCt64Ortho(ulong[] q)
-        {
-            Swapn(q, 1, 0, 1);
-            Swapn(q, 1, 2, 3);
-            Swapn(q, 1, 4, 5);
-            Swapn(q, 1, 6, 7);
-
-            Swapn(q, 2, 0, 2);
-            Swapn(q, 2, 1, 3);
-            Swapn(q, 2, 4, 6);
-            Swapn(q, 2, 5, 7);
-
-            Swapn(q, 4, 0, 4);
-            Swapn(q, 4, 1, 5);
-            Swapn(q, 4, 2, 6);
-            Swapn(q, 4, 3, 7);
-        }
+            Bits.BitPermuteStep2(ref q1, ref q0, 0x5555555555555555UL, 1);
+            Bits.BitPermuteStep2(ref q3, ref q2, 0x5555555555555555UL, 1);
+            Bits.BitPermuteStep2(ref q5, ref q4, 0x5555555555555555UL, 1);
+            Bits.BitPermuteStep2(ref q7, ref q6, 0x5555555555555555UL, 1);
 
-        private void BrAesCtOrtho(uint[] q)
-        {
-            Swapn32(q, 1, 0, 1);
-            Swapn32(q, 1, 2, 3);
-            Swapn32(q, 1, 4, 5);
-            Swapn32(q, 1, 6, 7);
-
-            Swapn32(q, 2, 0, 2);
-            Swapn32(q, 2, 1, 3);
-            Swapn32(q, 2, 4, 6);
-            Swapn32(q, 2, 5, 7);
-
-            Swapn32(q, 4, 0, 4);
-            Swapn32(q, 4, 1, 5);
-            Swapn32(q, 4, 2, 6);
-            Swapn32(q, 4, 3, 7);
-        }
+            Bits.BitPermuteStep2(ref q2, ref q0, 0x3333333333333333UL, 2);
+            Bits.BitPermuteStep2(ref q3, ref q1, 0x3333333333333333UL, 2);
+            Bits.BitPermuteStep2(ref q6, ref q4, 0x3333333333333333UL, 2);
+            Bits.BitPermuteStep2(ref q7, ref q5, 0x3333333333333333UL, 2);
 
-        private void Swapn32(uint[] q, int s, int pos1, int pos2)
-        {
-            uint cl = 0, ch = 0;
-            switch (s)
-            {
-                case 1:
-                    cl = 0x55555555;
-                    ch = 0xAAAAAAAA;
-                    break;
-                case 2:
-                    cl = 0x33333333;
-                    ch = 0xCCCCCCCC;
-                    break;
-                case 4:
-                    cl = 0x0F0F0F0F;
-                    ch = 0xF0F0F0F0;
-                    break;
-            }
-            uint a = q[pos1], b = q[pos2];
-            q[pos1] = (a & cl) | ((b & cl) << s);
-            q[pos2] = ((a & ch) >> s) | (b & ch);
+            Bits.BitPermuteStep2(ref q4, ref q0, 0x0F0F0F0F0F0F0F0FUL, 4);
+            Bits.BitPermuteStep2(ref q5, ref q1, 0x0F0F0F0F0F0F0F0FUL, 4);
+            Bits.BitPermuteStep2(ref q6, ref q2, 0x0F0F0F0F0F0F0F0FUL, 4);
+            Bits.BitPermuteStep2(ref q7, ref q3, 0x0F0F0F0F0F0F0F0FUL, 4);
+
+            q[0] = q0; q[1] = q1; q[2] = q2; q[3] = q3; q[4] = q4; q[5] = q5; q[6] = q6; q[7] = q7;
         }
 
-        private void Swapn(ulong[] q, int s, int pos1, int pos2)
+        private static void BrAesCtOrtho(uint[] q)
         {
-            ulong cl = 0, ch = 0;
-            switch (s)
-            {
-                case 1:
-                    cl = 0x5555555555555555L;
-                    ch = 0xAAAAAAAAAAAAAAAAL;
-                    break;
-                case 2:
-                    cl = 0x3333333333333333L;
-                    ch = 0xCCCCCCCCCCCCCCCCL;
-                    break;
-                case 4:
-                    cl = 0x0F0F0F0F0F0F0F0FL;
-                    ch = 0xF0F0F0F0F0F0F0F0L;
-                    break;
-                default:
-                    return;
-            }
-            ulong a = q[pos1], b = q[pos2];
-            q[pos1] = (a & cl) | ((b & cl) << s);
-            q[pos2] = ((a & ch) >> s) | (b & ch);
+            uint q0 = q[0], q1 = q[1], q2 = q[2], q3 = q[3], q4 = q[4], q5 = q[5], q6 = q[6], q7 = q[7];
+
+            Bits.BitPermuteStep2(ref q1, ref q0, 0x55555555U, 1);
+            Bits.BitPermuteStep2(ref q3, ref q2, 0x55555555U, 1);
+            Bits.BitPermuteStep2(ref q5, ref q4, 0x55555555U, 1);
+            Bits.BitPermuteStep2(ref q7, ref q6, 0x55555555U, 1);
+
+            Bits.BitPermuteStep2(ref q2, ref q0, 0x33333333U, 2);
+            Bits.BitPermuteStep2(ref q3, ref q1, 0x33333333U, 2);
+            Bits.BitPermuteStep2(ref q6, ref q4, 0x33333333U, 2);
+            Bits.BitPermuteStep2(ref q7, ref q5, 0x33333333U, 2);
+
+            Bits.BitPermuteStep2(ref q4, ref q0, 0x0F0F0F0FU, 4);
+            Bits.BitPermuteStep2(ref q5, ref q1, 0x0F0F0F0FU, 4);
+            Bits.BitPermuteStep2(ref q6, ref q2, 0x0F0F0F0FU, 4);
+            Bits.BitPermuteStep2(ref q7, ref q3, 0x0F0F0F0FU, 4);
+
+            q[0] = q0; q[1] = q1; q[2] = q2; q[3] = q3; q[4] = q4; q[5] = q5; q[6] = q6; q[7] = q7;
         }
 
-        private void BrAesCt64BitsliceSbox(ulong[] q)
+        private static void BrAesCt64BitsliceSbox(ulong[] q)
         {
             /*
              * This S-box implementation is a straightforward translation of
@@ -703,60 +611,37 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             q[0] = s7;
         }
 
-        private void ShiftRows(ulong[] q)
+        private static void ShiftRows(ulong[] q)
         {
-            ulong x;
-            for (int i = 0; i < q.Length; i++)
+            for (int i = 0; i < 8; i++)
             {
-                x = q[i];
-                q[i] = (x & 0x000000000000FFFFL)
-                    | ((x & 0x00000000FFF00000L) >> 4)
-                    | ((x & 0x00000000000F0000L) << 12)
-                    | ((x & 0x0000FF0000000000L) >> 8)
-                    | ((x & 0x000000FF00000000L) << 8)
-                    | ((x & 0xF000000000000000L) >> 12)
-                    | ((x & 0x0FFF000000000000L) << 4);
+                ulong x = Bits.BitPermuteStep(q[i], 0x00F0_00FF_000F_0000UL, 8);
+                q[i]    = Bits.BitPermuteStep(x   , 0x0F0F_0000_0F0F_0000UL, 4);
             }
         }
 
-        private void MixColumns(ulong[] q)
-        {
-            ulong q0, q1, q2, q3, q4, q5, q6, q7;
-            ulong r0, r1, r2, r3, r4, r5, r6, r7;
-
-            q0 = q[0];
-            q1 = q[1];
-            q2 = q[2];
-            q3 = q[3];
-            q4 = q[4];
-            q5 = q[5];
-            q6 = q[6];
-            q7 = q[7];
-            r0 = (q0 >> 16) | (q0 << 48);
-            r1 = (q1 >> 16) | (q1 << 48);
-            r2 = (q2 >> 16) | (q2 << 48);
-            r3 = (q3 >> 16) | (q3 << 48);
-            r4 = (q4 >> 16) | (q4 << 48);
-            r5 = (q5 >> 16) | (q5 << 48);
-            r6 = (q6 >> 16) | (q6 << 48);
-            r7 = (q7 >> 16) | (q7 << 48);
-
-            q[0] = q7 ^ r7 ^ r0 ^ Rotr32(q0 ^ r0);
-            q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ Rotr32(q1 ^ r1);
-            q[2] = q1 ^ r1 ^ r2 ^ Rotr32(q2 ^ r2);
-            q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ Rotr32(q3 ^ r3);
-            q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ Rotr32(q4 ^ r4);
-            q[5] = q4 ^ r4 ^ r5 ^ Rotr32(q5 ^ r5);
-            q[6] = q5 ^ r5 ^ r6 ^ Rotr32(q6 ^ r6);
-            q[7] = q6 ^ r6 ^ r7 ^ Rotr32(q7 ^ r7);
-        }
-
-        private ulong Rotr32(ulong x)
+        private static void MixColumns(ulong[] q)
         {
-            return (x << 32) | (x >> 32);
+            ulong q0 = q[0], r0 = Longs.RotateRight(q0, 16), s0 = q0 ^ r0;
+            ulong q1 = q[1], r1 = Longs.RotateRight(q1, 16), s1 = q1 ^ r1;
+            ulong q2 = q[2], r2 = Longs.RotateRight(q2, 16), s2 = q2 ^ r2;
+            ulong q3 = q[3], r3 = Longs.RotateRight(q3, 16), s3 = q3 ^ r3;
+            ulong q4 = q[4], r4 = Longs.RotateRight(q4, 16), s4 = q4 ^ r4;
+            ulong q5 = q[5], r5 = Longs.RotateRight(q5, 16), s5 = q5 ^ r5;
+            ulong q6 = q[6], r6 = Longs.RotateRight(q6, 16), s6 = q6 ^ r6;
+            ulong q7 = q[7], r7 = Longs.RotateRight(q7, 16), s7 = q7 ^ r7;
+
+            q[0] = r0       ^ s7 ^ Longs.RotateRight(s0, 32); 
+            q[1] = r1 ^ s0  ^ s7 ^ Longs.RotateRight(s1, 32); 
+            q[2] = r2 ^ s1       ^ Longs.RotateRight(s2, 32);
+            q[3] = r3 ^ s2  ^ s7 ^ Longs.RotateRight(s3, 32);
+            q[4] = r4 ^ s3  ^ s7 ^ Longs.RotateRight(s4, 32);
+            q[5] = r5 ^ s4       ^ Longs.RotateRight(s5, 32);
+            q[6] = r6 ^ s5       ^ Longs.RotateRight(s6, 32);
+            q[7] = r7 ^ s6       ^ Longs.RotateRight(s7, 32);
         }
 
-        private void AddRoundKey(ulong[] q, ulong[] sk)
+        private static void AddRoundKey(ulong[] q, ulong[] sk)
         {
             q[0] ^= sk[0];
             q[1] ^= sk[1];
@@ -768,7 +653,7 @@ namespace Org.BouncyCastle.Pqc.Crypto.SphincsPlus
             q[7] ^= sk[7];
         }
 
-        private void BrAesCt64InterleaveOut(uint[] w, ulong[] q, int pos)
+        private static void BrAesCt64InterleaveOut(uint[] w, ulong[] q, int pos)
         {
             ulong x0, x1, x2, x3;