summary refs log tree commit diff
diff options
context:
space:
mode:
authorPeter Dettman <peter.dettman@bouncycastle.org>2022-12-01 15:29:51 +0700
committerPeter Dettman <peter.dettman@bouncycastle.org>2022-12-01 15:29:51 +0700
commitc7f1dcd67152e6d7b9d2cfa907e188af0f7ab940 (patch)
treec559923dda8e4669ccb5d9116dbc7282aa9943b7
parentBinary curve perf. opts. (diff)
downloadBouncyCastle.NET-ed25519-c7f1dcd67152e6d7b9d2cfa907e188af0f7ab940.tar.xz
SecT233Field perf. opts.
-rw-r--r--crypto/src/math/ec/custom/sec/SecT233Field.cs252
1 files changed, 219 insertions, 33 deletions
diff --git a/crypto/src/math/ec/custom/sec/SecT233Field.cs b/crypto/src/math/ec/custom/sec/SecT233Field.cs
index 70cd92215..1e65b6112 100644
--- a/crypto/src/math/ec/custom/sec/SecT233Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT233Field.cs
@@ -1,6 +1,7 @@
 using System;
 using System.Diagnostics;
 #if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 #endif
@@ -9,12 +10,16 @@ using Org.BouncyCastle.Math.Raw;
 
 namespace Org.BouncyCastle.Math.EC.Custom.Sec
 {
-    internal class SecT233Field
+    internal static class SecT233Field
     {
         private const ulong M41 = ulong.MaxValue >> 23;
         private const ulong M59 = ulong.MaxValue >> 5;
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Add(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+#else
         public static void Add(ulong[] x, ulong[] y, ulong[] z)
+#endif
         {
             z[0] = x[0] ^ y[0];
             z[1] = x[1] ^ y[1];
@@ -22,7 +27,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[3] = x[3] ^ y[3];
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void AddBothTo(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+#else
         public static void AddBothTo(ulong[] x, ulong[] y, ulong[] z)
+#endif
         {
             z[0] ^= x[0] ^ y[0];
             z[1] ^= x[1] ^ y[1];
@@ -30,7 +39,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[3] ^= x[3] ^ y[3];
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void AddExt(ReadOnlySpan<ulong> xx, ReadOnlySpan<ulong> yy, Span<ulong> zz)
+#else
         public static void AddExt(ulong[] xx, ulong[] yy, ulong[] zz)
+#endif
         {
             zz[0] = xx[0] ^ yy[0];
             zz[1] = xx[1] ^ yy[1];
@@ -42,7 +55,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             zz[7] = xx[7] ^ yy[7];
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void AddOne(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
         public static void AddOne(ulong[] x, ulong[] z)
+#endif
         {
             z[0] = x[0] ^ 1UL;
             z[1] = x[1];
@@ -50,7 +67,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[3] = x[3];
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void AddTo(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
         public static void AddTo(ulong[] x, ulong[] z)
+#endif
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -63,9 +84,17 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             return Nat.FromBigInteger64(233, x);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void HalfTrace(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
         public static void HalfTrace(ulong[] x, ulong[] z)
+#endif
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+#else
             ulong[] tt = Nat256.CreateExt64();
+#endif
 
             Nat256.Copy64(x, z);
             for (int i = 1; i < 233; i += 2)
@@ -78,15 +107,24 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             }
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Invert(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
         public static void Invert(ulong[] x, ulong[] z)
+#endif
         {
             if (Nat256.IsZero64(x))
                 throw new InvalidOperationException();
 
             // Itoh-Tsujii inversion
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> t0 = stackalloc ulong[8];
+            Span<ulong> t1 = stackalloc ulong[8];
+#else
             ulong[] t0 = Nat256.Create64();
             ulong[] t1 = Nat256.Create64();
+#endif
 
             Square(x, t0);
             Multiply(t0, x, t0);
@@ -111,46 +149,74 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             Square(t0, z);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Multiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> z)
+#else
         public static void Multiply(ulong[] x, ulong[] y, ulong[] z)
+#endif
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+#else
             ulong[] tt = Nat256.CreateExt64();
+#endif
             ImplMultiply(x, y, tt);
             Reduce(tt, z);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void MultiplyAddToExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
+#else
         public static void MultiplyAddToExt(ulong[] x, ulong[] y, ulong[] zz)
+#endif
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+#else
             ulong[] tt = Nat256.CreateExt64();
+#endif
             ImplMultiply(x, y, tt);
             AddExt(zz, tt, zz);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void MultiplyExt(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
+#else
         public static void MultiplyExt(ulong[] x, ulong[] y, ulong[] zz)
+#endif
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            zz[..8].Fill(0UL);
+#else
             Array.Clear(zz, 0, 8);
+#endif
             ImplMultiply(x, y, zz);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Reduce(ReadOnlySpan<ulong> xx, Span<ulong> z)
+#else
         public static void Reduce(ulong[] xx, ulong[] z)
+#endif
         {
             ulong x0 = xx[0], x1 = xx[1], x2 = xx[2], x3 = xx[3];
             ulong x4 = xx[4], x5 = xx[5], x6 = xx[6], x7 = xx[7];
 
             x3 ^= (x7 << 23);
             x4 ^= (x7 >> 41) ^ (x7 << 33);
-            x5 ^= (x7 >> 31);
+            x5 ^=              (x7 >> 31);
 
             x2 ^= (x6 << 23);
             x3 ^= (x6 >> 41) ^ (x6 << 33);
-            x4 ^= (x6 >> 31);
+            x4 ^=              (x6 >> 31);
 
             x1 ^= (x5 << 23);
             x2 ^= (x5 >> 41) ^ (x5 << 33);
-            x3 ^= (x5 >> 31);
+            x3 ^=              (x5 >> 31);
 
             x0 ^= (x4 << 23);
             x1 ^= (x4 >> 41) ^ (x4 << 33);
-            x2 ^= (x4 >> 31);
+            x2 ^=              (x4 >> 31);
 
             ulong t = x3 >> 41;
             z[0]    = x0 ^ t;
@@ -167,7 +233,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[zOff + 3]  = z3 & M41;
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Sqrt(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
         public static void Sqrt(ulong[] x, ulong[] z)
+#endif
         {
             ulong c0 = Interleave.Unshuffle(x[0], x[1], out ulong e0);
             ulong c1 = Interleave.Unshuffle(x[2], x[3], out ulong e1);
@@ -177,9 +247,14 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             c1 ^= (c0 >> 27) | (c1 << 37);
             c0 ^=              (c0 << 37);
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+            Span<int> shifts = stackalloc int[]{ 32, 117, 191 };
+#else
             ulong[] tt = Nat256.CreateExt64();
-
             int[] shifts = { 32, 117, 191 };
+#endif
+
             for (int i = 0; i < shifts.Length; ++i)
             {
                 int w = shifts[i] >> 6, s = shifts[i] & 63;
@@ -196,30 +271,58 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[1] ^= e1;
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void Square(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
         public static void Square(ulong[] x, ulong[] z)
+#endif
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+#else
             ulong[] tt = Nat256.CreateExt64();
+#endif
             ImplSquare(x, tt);
             Reduce(tt, z);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void SquareAddToExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
+#else
         public static void SquareAddToExt(ulong[] x, ulong[] zz)
+#endif
         {
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+#else
             ulong[] tt = Nat256.CreateExt64();
+#endif
             ImplSquare(x, tt);
             AddExt(zz, tt, zz);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void SquareExt(ReadOnlySpan<ulong> x, Span<ulong> zz)
+#else
         public static void SquareExt(ulong[] x, ulong[] zz)
+#endif
         {
             ImplSquare(x, zz);
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static void SquareN(ReadOnlySpan<ulong> x, int n, Span<ulong> z)
+#else
         public static void SquareN(ulong[] x, int n, ulong[] z)
+#endif
         {
             Debug.Assert(n > 0);
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Span<ulong> tt = stackalloc ulong[8];
+#else
             ulong[] tt = Nat256.CreateExt64();
+#endif
             ImplSquare(x, tt);
             Reduce(tt, z);
 
@@ -230,13 +333,21 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             }
         }
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        public static uint Trace(ReadOnlySpan<ulong> x)
+#else
         public static uint Trace(ulong[] x)
+#endif
         {
             // Non-zero-trace bits: 0, 159
             return (uint)(x[0] ^ (x[2] >> 31)) & 1U;
         }
 
-        protected static void ImplCompactExt(ulong[] zz)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static void ImplCompactExt(Span<ulong> zz)
+#else
+        private static void ImplCompactExt(ulong[] zz)
+#endif
         {
             ulong z0 = zz[0], z1 = zz[1], z2 = zz[2], z3 = zz[3], z4 = zz[4], z5 = zz[5], z6 = zz[6], z7 = zz[7];
             zz[0] =  z0         ^ (z1 << 59);
@@ -249,7 +360,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             zz[7] = (z7 >> 35);
         }
 
-        protected static void ImplExpand(ulong[] x, ulong[] z)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static void ImplExpand(ReadOnlySpan<ulong> x, Span<ulong> z)
+#else
+        private static void ImplExpand(ulong[] x, ulong[] z)
+#endif
         {
             ulong x0 = x[0], x1 = x[1], x2 = x[2], x3 = x[3];
             z[0] = x0 & M59;
@@ -258,7 +373,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[3] = ((x2 >> 49) ^ (x3 << 15));
         }
 
-        protected static void ImplMultiply(ulong[] x, ulong[] y, ulong[] zz)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static void ImplMultiply(ReadOnlySpan<ulong> x, ReadOnlySpan<ulong> y, Span<ulong> zz)
         {
 #if NETCOREAPP3_0_OR_GREATER
             if (Pclmulqdq.IsSupported)
@@ -275,35 +391,37 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                                    Pclmulqdq.CarrylessMultiply(X01, Y01, 0x10));
                 var Z23 =          Pclmulqdq.CarrylessMultiply(X01, Y01, 0x11);
 
+                Z01 = Sse2.Xor(Z01, Sse2.ShiftLeftLogical128BitLane (Z12, 8));
+                Z23 = Sse2.Xor(Z23, Sse2.ShiftRightLogical128BitLane(Z12, 8));
+
                 var Z45 =          Pclmulqdq.CarrylessMultiply(X23, Y23, 0x00);
                 var Z56 = Sse2.Xor(Pclmulqdq.CarrylessMultiply(X23, Y23, 0x01),
                                    Pclmulqdq.CarrylessMultiply(X23, Y23, 0x10));
                 var Z67 =          Pclmulqdq.CarrylessMultiply(X23, Y23, 0x11);
 
+                Z45 = Sse2.Xor(Z45, Sse2.ShiftLeftLogical128BitLane (Z56, 8));
+                Z67 = Sse2.Xor(Z67, Sse2.ShiftRightLogical128BitLane(Z56, 8));
+
                 var K01 =          Pclmulqdq.CarrylessMultiply(X03, Y03, 0x00);
                 var K12 = Sse2.Xor(Pclmulqdq.CarrylessMultiply(X03, Y03, 0x01),
                                    Pclmulqdq.CarrylessMultiply(X03, Y03, 0x10));
                 var K23 =          Pclmulqdq.CarrylessMultiply(X03, Y03, 0x11);
 
-                K01 = Sse2.Xor(K01, Z01);
-                K12 = Sse2.Xor(K12, Z12);
-                K23 = Sse2.Xor(K23, Z23);
+                var T = Sse2.Xor(Z23, Z45);
 
-                K01 = Sse2.Xor(K01, Z45);
-                K12 = Sse2.Xor(K12, Z56);
-                K23 = Sse2.Xor(K23, Z67);
+                Z23 = Sse2.Xor(T, Z01);
+                Z45 = Sse2.Xor(T, Z67);
 
                 Z23 = Sse2.Xor(Z23, K01);
+                Z23 = Sse2.Xor(Z23, Sse2.ShiftLeftLogical128BitLane (K12, 8));
+                Z45 = Sse2.Xor(Z45, Sse2.ShiftRightLogical128BitLane(K12, 8));
                 Z45 = Sse2.Xor(Z45, K23);
 
-                zz[0] = Z01.GetElement(0);
-                zz[1] = Z01.GetElement(1) ^ Z12.GetElement(0);
-                zz[2] = Z23.GetElement(0) ^ Z12.GetElement(1);
-                zz[3] = Z23.GetElement(1) ^ K12.GetElement(0);
-                zz[4] = Z45.GetElement(0) ^ K12.GetElement(1);
-                zz[5] = Z45.GetElement(1) ^ Z56.GetElement(0);
-                zz[6] = Z67.GetElement(0) ^ Z56.GetElement(1);
-                zz[7] = Z67.GetElement(1);
+                Span<byte> zzBytes = MemoryMarshal.AsBytes(zz);
+                MemoryMarshal.Write(zzBytes[0x00..0x10], ref Z01);
+                MemoryMarshal.Write(zzBytes[0x10..0x20], ref Z23);
+                MemoryMarshal.Write(zzBytes[0x20..0x30], ref Z45);
+                MemoryMarshal.Write(zzBytes[0x30..0x40], ref Z67);
                 return;
             }
 #endif
@@ -312,6 +430,56 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
              * "Two-level seven-way recursion" as described in "Batch binary Edwards", Daniel J. Bernstein.
              */
 
+            Span<ulong> f = stackalloc ulong[4], g = stackalloc ulong[4];
+            ImplExpand(x, f);
+            ImplExpand(y, g);
+
+            Span<ulong> u = stackalloc ulong[8];
+
+            ImplMulwAcc(u, f[0], g[0], zz[0..]);
+            ImplMulwAcc(u, f[1], g[1], zz[1..]);
+            ImplMulwAcc(u, f[2], g[2], zz[2..]);
+            ImplMulwAcc(u, f[3], g[3], zz[3..]);
+
+            // U *= (1 - t^n)
+            for (int i = 5; i > 0; --i)
+            {
+                zz[i] ^= zz[i - 1];
+            }
+
+            ImplMulwAcc(u, f[0] ^ f[1], g[0] ^ g[1], zz[1..]);
+            ImplMulwAcc(u, f[2] ^ f[3], g[2] ^ g[3], zz[3..]);
+
+            // V *= (1 - t^2n)
+            for (int i = 7; i > 1; --i)
+            {
+                zz[i] ^= zz[i - 2];
+            }
+
+            // Double-length recursion
+            {
+                ulong c0 = f[0] ^ f[2], c1 = f[1] ^ f[3];
+                ulong d0 = g[0] ^ g[2], d1 = g[1] ^ g[3];
+                ImplMulwAcc(u, c0 ^ c1, d0 ^ d1, zz[3..]);
+                ulong[] t = new ulong[3];
+                ImplMulwAcc(u, c0, d0, t[0..]);
+                ImplMulwAcc(u, c1, d1, t[1..]);
+                ulong t0 = t[0], t1 = t[1], t2 = t[2];
+                zz[2] ^= t0;
+                zz[3] ^= t0 ^ t1;
+                zz[4] ^= t2 ^ t1;
+                zz[5] ^= t2;
+            }
+
+            ImplCompactExt(zz);
+        }
+#else
+        private static void ImplMultiply(ulong[] x, ulong[] y, ulong[] zz)
+        {
+            /*
+             * "Two-level seven-way recursion" as described in "Batch binary Edwards", Daniel J. Bernstein.
+             */
+
             ulong[] f = new ulong[4], g = new ulong[4];
             ImplExpand(x, f);
             ImplExpand(y, g);
@@ -355,8 +523,13 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
             ImplCompactExt(zz);
         }
+#endif
 
-        protected static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static void ImplMulwAcc(Span<ulong> u, ulong x, ulong y, Span<ulong> z)
+#else
+        private static void ImplMulwAcc(ulong[] u, ulong x, ulong y, ulong[] z, int zOff)
+#endif
         {
             Debug.Assert(x >> 59 == 0);
             Debug.Assert(y >> 59 == 0);
@@ -371,14 +544,14 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             u[7] = u[6] ^  y;
 
             uint j = (uint)x;
-            ulong g, h = 0, l = u[j & 7]
-                              ^ (u[(j >> 3) & 7] << 3);
+            ulong g, h = 0, l = u[(int)j & 7]
+                              ^ (u[(int)(j >> 3) & 7] << 3);
             int k = 54;
             do
             {
                 j  = (uint)(x >> k);
-                g  = u[j & 7]
-                   ^ u[(j >> 3) & 7] << 3;
+                g  = u[(int)j & 7]
+                   ^ u[(int)(j >> 3) & 7] << 3;
                 l ^= (g <<  k);
                 h ^= (g >> -k);
             }
@@ -386,28 +559,41 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
             Debug.Assert(h >> 53 == 0);
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            z[0] ^= l & M59;
+            z[1] ^= (l >> 59) ^ (h << 5);
+#else
             z[zOff    ] ^= l & M59;
             z[zOff + 1] ^= (l >> 59) ^ (h << 5);
+#endif
         }
 
-        protected static void ImplSquare(ulong[] x, ulong[] zz)
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+        private static void ImplSquare(ReadOnlySpan<ulong> x, Span<ulong> zz)
+#else
+        private static void ImplSquare(ulong[] x, ulong[] zz)
+#endif
         {
 #if NETCOREAPP3_0_OR_GREATER
             if (Bmi2.X64.IsSupported)
             {
                 zz[7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
-                zz[6] = Bmi2.X64.ParallelBitDeposit(x[3], 0x5555555555555555UL);
+                zz[6] = Bmi2.X64.ParallelBitDeposit(x[3]      , 0x5555555555555555UL);
                 zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
-                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2], 0x5555555555555555UL);
+                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
                 zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
-                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1], 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
                 zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
-                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0], 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
                 return;
             }
 #endif
 
+#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
+            Interleave.Expand64To128(x[..4], zz[..8]);
+#else
             Interleave.Expand64To128(x, 0, 4, zz, 0);
+#endif
         }
     }
 }