summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--crypto/src/math/ec/custom/sec/SecT113Field.cs18
-rw-r--r--crypto/src/math/ec/custom/sec/SecT113FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT131Field.cs21
-rw-r--r--crypto/src/math/ec/custom/sec/SecT131FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT163Field.cs20
-rw-r--r--crypto/src/math/ec/custom/sec/SecT163FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT163K1Point.cs107
-rw-r--r--crypto/src/math/ec/custom/sec/SecT193Field.cs23
-rw-r--r--crypto/src/math/ec/custom/sec/SecT193FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT233Field.cs15
-rw-r--r--crypto/src/math/ec/custom/sec/SecT233FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT239Field.cs24
-rw-r--r--crypto/src/math/ec/custom/sec/SecT239FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT239K1Point.cs107
-rw-r--r--crypto/src/math/ec/custom/sec/SecT283Field.cs25
-rw-r--r--crypto/src/math/ec/custom/sec/SecT283FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT283K1Point.cs107
-rw-r--r--crypto/src/math/ec/custom/sec/SecT409Field.cs29
-rw-r--r--crypto/src/math/ec/custom/sec/SecT409FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT409K1Point.cs107
-rw-r--r--crypto/src/math/ec/custom/sec/SecT571Field.cs32
-rw-r--r--crypto/src/math/ec/custom/sec/SecT571FieldElement.cs2
-rw-r--r--crypto/src/math/ec/custom/sec/SecT571K1Point.cs3
-rw-r--r--crypto/src/math/ec/custom/sec/SecT571R1Point.cs2
24 files changed, 454 insertions, 204 deletions
diff --git a/crypto/src/math/ec/custom/sec/SecT113Field.cs b/crypto/src/math/ec/custom/sec/SecT113Field.cs
index 65249562a..2e99bdc4c 100644
--- a/crypto/src/math/ec/custom/sec/SecT113Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT113Field.cs
@@ -34,7 +34,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[1] = x[1];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -147,6 +147,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -258,6 +263,17 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
             Interleave.Expand64To128(x, 0, 2, zz, 0);
         }
     }
diff --git a/crypto/src/math/ec/custom/sec/SecT113FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT113FieldElement.cs
index 63de2b88c..1f08af0db 100644
--- a/crypto/src/math/ec/custom/sec/SecT113FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT113FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT113FieldElement)x).x, yx = ((SecT113FieldElement)y).x;
 
             ulong[] tt = Nat128.CreateExt64();
-            SecT113Field.SquareAddToExt(ax, tt);
+            SecT113Field.SquareExt(ax, tt);
             SecT113Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat128.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT131Field.cs b/crypto/src/math/ec/custom/sec/SecT131Field.cs
index f2c878d6a..f84fdd212 100644
--- a/crypto/src/math/ec/custom/sec/SecT131Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT131Field.cs
@@ -39,7 +39,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[2] = x[2];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -162,6 +162,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -369,8 +374,20 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
-            Interleave.Expand64To128(x, 0, 2, zz, 0);
             zz[4] = Interleave.Expand8to16((byte)x[2]);
+
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
+            Interleave.Expand64To128(x, 0, 2, zz, 0);
         }
     }
 }
diff --git a/crypto/src/math/ec/custom/sec/SecT131FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT131FieldElement.cs
index 4884e7152..06d31614f 100644
--- a/crypto/src/math/ec/custom/sec/SecT131FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT131FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT131FieldElement)x).x, yx = ((SecT131FieldElement)y).x;
 
             ulong[] tt = Nat.Create64(5);
-            SecT131Field.SquareAddToExt(ax, tt);
+            SecT131Field.SquareExt(ax, tt);
             SecT131Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat192.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT163Field.cs b/crypto/src/math/ec/custom/sec/SecT163Field.cs
index 0c616600a..1ba747681 100644
--- a/crypto/src/math/ec/custom/sec/SecT163Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT163Field.cs
@@ -40,7 +40,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[2] = x[2];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -175,6 +175,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -375,6 +380,19 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
             Interleave.Expand64To128(x, 0, 3, zz, 0);
         }
     }
diff --git a/crypto/src/math/ec/custom/sec/SecT163FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT163FieldElement.cs
index 214a56343..e2dbaac19 100644
--- a/crypto/src/math/ec/custom/sec/SecT163FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT163FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT163FieldElement)x).x, yx = ((SecT163FieldElement)y).x;
 
             ulong[] tt = Nat192.CreateExt64();
-            SecT163Field.SquareAddToExt(ax, tt);
+            SecT163Field.SquareExt(ax, tt);
             SecT163Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat192.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT163K1Point.cs b/crypto/src/math/ec/custom/sec/SecT163K1Point.cs
index 0a96f04eb..06855c50c 100644
--- a/crypto/src/math/ec/custom/sec/SecT163K1Point.cs
+++ b/crypto/src/math/ec/custom/sec/SecT163K1Point.cs
@@ -1,5 +1,7 @@
 using System;
 
+using Org.BouncyCastle.Math.Raw;
+
 namespace Org.BouncyCastle.Math.EC.Custom.Sec
 {
     internal class SecT163K1Point
@@ -66,8 +68,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
             ECCurve curve = this.Curve;
 
-            ECFieldElement X1 = this.RawXCoord;
-            ECFieldElement X2 = b.RawXCoord;
+            SecT163FieldElement X1 = (SecT163FieldElement)this.RawXCoord;
+            SecT163FieldElement X2 = (SecT163FieldElement)b.RawXCoord;
 
             if (X1.IsZero)
             {
@@ -77,86 +79,101 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                 return b.Add(this);
             }
 
-            ECFieldElement L1 = this.RawYCoord, Z1 = this.RawZCoords[0];
-            ECFieldElement L2 = b.RawYCoord, Z2 = b.RawZCoords[0];
+            SecT163FieldElement L1 = (SecT163FieldElement)this.RawYCoord, Z1 = (SecT163FieldElement)this.RawZCoords[0];
+            SecT163FieldElement L2 = (SecT163FieldElement)b.RawYCoord, Z2 = (SecT163FieldElement)b.RawZCoords[0];
+
+            ulong[] tt0 = Nat192.CreateExt64();
+            ulong[] t1 = Nat192.Create64();
+            ulong[] t2 = Nat192.Create64();
+            ulong[] t3 = Nat192.Create64();
 
             bool Z1IsOne = Z1.IsOne;
-            ECFieldElement U2 = X2, S2 = L2;
-            if (!Z1IsOne)
+            if (Z1IsOne)
             {
-                U2 = U2.Multiply(Z1);
-                S2 = S2.Multiply(Z1);
+                Nat192.Copy64(X2.x, t1);                    // U2
+                Nat192.Copy64(L2.x, t2);                    // S2
+            }
+            else
+            {
+                SecT163Field.Multiply(X2.x, Z1.x, t1);      // U2
+                SecT163Field.Multiply(L2.x, Z1.x, t2);      // S2
             }
 
             bool Z2IsOne = Z2.IsOne;
-            ECFieldElement U1 = X1, S1 = L1;
-            if (!Z2IsOne)
+            if (Z2IsOne)
             {
-                U1 = U1.Multiply(Z2);
-                S1 = S1.Multiply(Z2);
+                Nat192.Copy64(X1.x, t3);                    // U1
+                Nat192.Copy64(L1.x, tt0);                   // S1
+            }
+            else
+            {
+                SecT163Field.Multiply(X1.x, Z2.x, t3);      // U1
+                SecT163Field.Multiply(L1.x, Z2.x, tt0);     // S1
             }
 
-            ECFieldElement A = S1.Add(S2);
-            ECFieldElement B = U1.Add(U2);
+            SecT163Field.AddTo(tt0, t2);                    // A
+            SecT163Field.Add(t3, t1, tt0);                  // B
 
-            if (B.IsZero)
+            if (Nat192.IsZero64(tt0))
             {
-                if (A.IsZero)
+                if (Nat192.IsZero64(t2))
                     return Twice();
 
                 return curve.Infinity;
             }
 
-            ECFieldElement X3, L3, Z3;
             if (X2.IsZero)
             {
                 // TODO This can probably be optimized quite a bit
                 ECPoint p = this.Normalize();
-                X1 = p.XCoord;
+                X1 = (SecT163FieldElement)p.XCoord;
                 ECFieldElement Y1 = p.YCoord;
 
                 ECFieldElement Y2 = L2;
                 ECFieldElement L = Y1.Add(Y2).Divide(X1);
 
-                X3 = L.Square().Add(L).Add(X1).AddOne();
+                ECFieldElement X3 = L.Square().Add(L).Add(X1);
                 if (X3.IsZero)
-                {
                     return new SecT163K1Point(curve, X3, curve.B);
-                }
 
                 ECFieldElement Y3 = L.Multiply(X1.Add(X3)).Add(X3).Add(Y1);
-                L3 = Y3.Divide(X3).Add(X3);
-                Z3 = curve.FromBigInteger(BigInteger.One);
+                ECFieldElement L3 = Y3.Divide(X3).Add(X3);
+                ECFieldElement Z3 = curve.FromBigInteger(BigInteger.One);
+
+                return new SecT163K1Point(curve, X3, L3, new ECFieldElement[]{ Z3 });
             }
-            else
-            {
-                B = B.Square();
 
-                ECFieldElement AU1 = A.Multiply(U1);
-                ECFieldElement AU2 = A.Multiply(U2);
+            SecT163Field.Square(tt0, tt0);
 
-                X3 = AU1.Multiply(AU2);
-                if (X3.IsZero)
-                {
-                    return new SecT163K1Point(curve, X3, curve.B);
-                }
+            SecT163Field.Multiply(t3, t2, t3);      // AU1
+            SecT163Field.Multiply(t1, t2, t1);      // AU2
 
-                ECFieldElement ABZ2 = A.Multiply(B);
-                if (!Z2IsOne)
-                {
-                    ABZ2 = ABZ2.Multiply(Z2);
-                }
+            ulong[] _X3 = t3;
+            SecT163Field.Multiply(_X3, t1, _X3);
+            if (Nat192.IsZero64(_X3))
+                return new SecT163K1Point(curve, new SecT163FieldElement(_X3), curve.B);
 
-                L3 = AU2.Add(B).SquarePlusProduct(ABZ2, L1.Add(Z1));
+            ulong[] _Z3 = t2;
+            SecT163Field.Multiply(_Z3, tt0, _Z3);   // ABZ2
+            if (!Z2IsOne)
+            {
+                SecT163Field.Multiply(_Z3, Z2.x, _Z3);
+            }
 
-                Z3 = ABZ2;
-                if (!Z1IsOne)
-                {
-                    Z3 = Z3.Multiply(Z1);
-                }
+            ulong[] _L3 = t1;
+            SecT163Field.AddTo(tt0, _L3);
+            SecT163Field.SquareExt(_L3, tt0);
+            SecT163Field.Add(L1.x, Z1.x, _L3);
+            SecT163Field.MultiplyAddToExt(_Z3, _L3, tt0);
+            SecT163Field.Reduce(tt0, _L3);
+
+            if (!Z1IsOne)
+            {
+                SecT163Field.Multiply(_Z3, Z1.x, _Z3);
             }
 
-            return new SecT163K1Point(curve, X3, L3, new ECFieldElement[] { Z3 });
+            return new SecT163K1Point(curve, new SecT163FieldElement(_X3), new SecT163FieldElement(_L3),
+                new ECFieldElement[]{ new SecT163FieldElement(_Z3) });
         }
 
         public override ECPoint Twice()
diff --git a/crypto/src/math/ec/custom/sec/SecT193Field.cs b/crypto/src/math/ec/custom/sec/SecT193Field.cs
index 4aa3ad5c2..3d9937f75 100644
--- a/crypto/src/math/ec/custom/sec/SecT193Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT193Field.cs
@@ -41,7 +41,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[3] = x[3];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -183,6 +183,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -349,8 +354,22 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
-            Interleave.Expand64To128(x, 0, 3, zz, 0);
             zz[6] = (x[3] & M01);
+
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
+            Interleave.Expand64To128(x, 0, 3, zz, 0);
         }
     }
 }
diff --git a/crypto/src/math/ec/custom/sec/SecT193FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT193FieldElement.cs
index 3a3ed09ce..1ae6b7a28 100644
--- a/crypto/src/math/ec/custom/sec/SecT193FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT193FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT193FieldElement)x).x, yx = ((SecT193FieldElement)y).x;
 
             ulong[] tt = Nat256.CreateExt64();
-            SecT193Field.SquareAddToExt(ax, tt);
+            SecT193Field.SquareExt(ax, tt);
             SecT193Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat256.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT233Field.cs b/crypto/src/math/ec/custom/sec/SecT233Field.cs
index f2519b369..70cd92215 100644
--- a/crypto/src/math/ec/custom/sec/SecT233Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT233Field.cs
@@ -392,6 +392,21 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
+                zz[6] = Bmi2.X64.ParallelBitDeposit(x[3], 0x5555555555555555UL);
+                zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2], 0x5555555555555555UL);
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1], 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0], 0x5555555555555555UL);
+                return;
+            }
+#endif
+
             Interleave.Expand64To128(x, 0, 4, zz, 0);
         }
     }
diff --git a/crypto/src/math/ec/custom/sec/SecT233FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT233FieldElement.cs
index 8aff8c87a..6625b63d7 100644
--- a/crypto/src/math/ec/custom/sec/SecT233FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT233FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT233FieldElement)x).x, yx = ((SecT233FieldElement)y).x;
 
             ulong[] tt = Nat256.CreateExt64();
-            SecT233Field.SquareAddToExt(ax, tt);
+            SecT233Field.SquareExt(ax, tt);
             SecT233Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat256.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT239Field.cs b/crypto/src/math/ec/custom/sec/SecT239Field.cs
index a3851de16..d48e1ec2b 100644
--- a/crypto/src/math/ec/custom/sec/SecT239Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT239Field.cs
@@ -1,6 +1,8 @@
 using System;
 using System.Diagnostics;
 #if NETCOREAPP3_0_OR_GREATER
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
 #endif
@@ -42,7 +44,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[3] = x[3];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -205,6 +207,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -384,6 +391,21 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
+                zz[6] = Bmi2.X64.ParallelBitDeposit(x[3]      , 0x5555555555555555UL);
+                zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
             Interleave.Expand64To128(x, 0, 4, zz, 0);
         }
     }
diff --git a/crypto/src/math/ec/custom/sec/SecT239FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT239FieldElement.cs
index 9f1bf671c..773eeea2a 100644
--- a/crypto/src/math/ec/custom/sec/SecT239FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT239FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT239FieldElement)x).x, yx = ((SecT239FieldElement)y).x;
 
             ulong[] tt = Nat256.CreateExt64();
-            SecT239Field.SquareAddToExt(ax, tt);
+            SecT239Field.SquareExt(ax, tt);
             SecT239Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat256.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT239K1Point.cs b/crypto/src/math/ec/custom/sec/SecT239K1Point.cs
index b49563a98..a57519d2b 100644
--- a/crypto/src/math/ec/custom/sec/SecT239K1Point.cs
+++ b/crypto/src/math/ec/custom/sec/SecT239K1Point.cs
@@ -1,5 +1,7 @@
 using System;
 
+using Org.BouncyCastle.Math.Raw;
+
 namespace Org.BouncyCastle.Math.EC.Custom.Sec
 {
     internal class SecT239K1Point
@@ -66,8 +68,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
             ECCurve curve = this.Curve;
 
-            ECFieldElement X1 = this.RawXCoord;
-            ECFieldElement X2 = b.RawXCoord;
+            SecT239FieldElement X1 = (SecT239FieldElement)this.RawXCoord;
+            SecT239FieldElement X2 = (SecT239FieldElement)b.RawXCoord;
 
             if (X1.IsZero)
             {
@@ -77,86 +79,101 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                 return b.Add(this);
             }
 
-            ECFieldElement L1 = this.RawYCoord, Z1 = this.RawZCoords[0];
-            ECFieldElement L2 = b.RawYCoord, Z2 = b.RawZCoords[0];
+            SecT239FieldElement L1 = (SecT239FieldElement)this.RawYCoord, Z1 = (SecT239FieldElement)this.RawZCoords[0];
+            SecT239FieldElement L2 = (SecT239FieldElement)b.RawYCoord, Z2 = (SecT239FieldElement)b.RawZCoords[0];
+
+            ulong[] tt0 = Nat256.CreateExt64();
+            ulong[] t1 = Nat256.Create64();
+            ulong[] t2 = Nat256.Create64();
+            ulong[] t3 = Nat256.Create64();
 
             bool Z1IsOne = Z1.IsOne;
-            ECFieldElement U2 = X2, S2 = L2;
-            if (!Z1IsOne)
+            if (Z1IsOne)
+            {
+                Nat256.Copy64(X2.x, t1);                    // U2
+                Nat256.Copy64(L2.x, t2);                    // S2
+            }
+            else
             {
-                U2 = U2.Multiply(Z1);
-                S2 = S2.Multiply(Z1);
+                SecT239Field.Multiply(X2.x, Z1.x, t1);      // U2
+                SecT239Field.Multiply(L2.x, Z1.x, t2);      // S2
             }
 
             bool Z2IsOne = Z2.IsOne;
-            ECFieldElement U1 = X1, S1 = L1;
-            if (!Z2IsOne)
+            if (Z2IsOne)
             {
-                U1 = U1.Multiply(Z2);
-                S1 = S1.Multiply(Z2);
+                Nat256.Copy64(X1.x, t3);                    // U1
+                Nat256.Copy64(L1.x, tt0);                   // S1
+            }
+            else
+            {
+                SecT239Field.Multiply(X1.x, Z2.x, t3);      // U1
+                SecT239Field.Multiply(L1.x, Z2.x, tt0);     // S1
             }
 
-            ECFieldElement A = S1.Add(S2);
-            ECFieldElement B = U1.Add(U2);
+            SecT239Field.AddTo(tt0, t2);                    // A
+            SecT239Field.Add(t3, t1, tt0);                  // B
 
-            if (B.IsZero)
+            if (Nat256.IsZero64(tt0))
             {
-                if (A.IsZero)
+                if (Nat256.IsZero64(t2))
                     return Twice();
 
                 return curve.Infinity;
             }
 
-            ECFieldElement X3, L3, Z3;
             if (X2.IsZero)
             {
                 // TODO This can probably be optimized quite a bit
                 ECPoint p = this.Normalize();
-                X1 = p.XCoord;
+                X1 = (SecT239FieldElement)p.XCoord;
                 ECFieldElement Y1 = p.YCoord;
 
                 ECFieldElement Y2 = L2;
                 ECFieldElement L = Y1.Add(Y2).Divide(X1);
 
-                X3 = L.Square().Add(L).Add(X1);
+                ECFieldElement X3 = L.Square().Add(L).Add(X1);
                 if (X3.IsZero)
-                {
                     return new SecT239K1Point(curve, X3, curve.B);
-                }
 
                 ECFieldElement Y3 = L.Multiply(X1.Add(X3)).Add(X3).Add(Y1);
-                L3 = Y3.Divide(X3).Add(X3);
-                Z3 = curve.FromBigInteger(BigInteger.One);
+                ECFieldElement L3 = Y3.Divide(X3).Add(X3);
+                ECFieldElement Z3 = curve.FromBigInteger(BigInteger.One);
+
+                return new SecT239K1Point(curve, X3, L3, new ECFieldElement[]{ Z3 });
             }
-            else
-            {
-                B = B.Square();
 
-                ECFieldElement AU1 = A.Multiply(U1);
-                ECFieldElement AU2 = A.Multiply(U2);
+            SecT239Field.Square(tt0, tt0);
 
-                X3 = AU1.Multiply(AU2);
-                if (X3.IsZero)
-                {
-                    return new SecT239K1Point(curve, X3, curve.B);
-                }
+            SecT239Field.Multiply(t3, t2, t3);      // AU1
+            SecT239Field.Multiply(t1, t2, t1);      // AU2
 
-                ECFieldElement ABZ2 = A.Multiply(B);
-                if (!Z2IsOne)
-                {
-                    ABZ2 = ABZ2.Multiply(Z2);
-                }
+            ulong[] _X3 = t3;
+            SecT239Field.Multiply(_X3, t1, _X3);
+            if (Nat256.IsZero64(_X3))
+                return new SecT239K1Point(curve, new SecT239FieldElement(_X3), curve.B);
 
-                L3 = AU2.Add(B).SquarePlusProduct(ABZ2, L1.Add(Z1));
+            ulong[] _Z3 = t2;
+            SecT239Field.Multiply(_Z3, tt0, _Z3);   // ABZ2
+            if (!Z2IsOne)
+            {
+                SecT239Field.Multiply(_Z3, Z2.x, _Z3);
+            }
 
-                Z3 = ABZ2;
-                if (!Z1IsOne)
-                {
-                    Z3 = Z3.Multiply(Z1);
-                }
+            ulong[] _L3 = t1;
+            SecT239Field.AddTo(tt0, _L3);
+            SecT239Field.SquareExt(_L3, tt0);
+            SecT239Field.Add(L1.x, Z1.x, _L3);
+            SecT239Field.MultiplyAddToExt(_Z3, _L3, tt0);
+            SecT239Field.Reduce(tt0, _L3);
+
+            if (!Z1IsOne)
+            {
+                SecT239Field.Multiply(_Z3, Z1.x, _Z3);
             }
 
-            return new SecT239K1Point(curve, X3, L3, new ECFieldElement[] { Z3 });
+            return new SecT239K1Point(curve, new SecT239FieldElement(_X3), new SecT239FieldElement(_L3),
+                new ECFieldElement[]{ new SecT239FieldElement(_Z3) });
         }
 
         public override ECPoint Twice()
diff --git a/crypto/src/math/ec/custom/sec/SecT283Field.cs b/crypto/src/math/ec/custom/sec/SecT283Field.cs
index 334986452..332e47afe 100644
--- a/crypto/src/math/ec/custom/sec/SecT283Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT283Field.cs
@@ -48,7 +48,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[4] = x[4];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -187,6 +187,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -460,8 +465,24 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
-            Interleave.Expand64To128(x, 0, 4, zz, 0);
             zz[8] = Interleave.Expand32to64((uint)x[4]);
+
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
+                zz[6] = Bmi2.X64.ParallelBitDeposit(x[3]      , 0x5555555555555555UL);
+                zz[5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
+                zz[3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
+            Interleave.Expand64To128(x, 0, 4, zz, 0);
         }
     }
 }
diff --git a/crypto/src/math/ec/custom/sec/SecT283FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT283FieldElement.cs
index 6bd720acd..2eee80fb7 100644
--- a/crypto/src/math/ec/custom/sec/SecT283FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT283FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT283FieldElement)x).x, yx = ((SecT283FieldElement)y).x;
 
             ulong[] tt = Nat.Create64(9);
-            SecT283Field.SquareAddToExt(ax, tt);
+            SecT283Field.SquareExt(ax, tt);
             SecT283Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat320.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT283K1Point.cs b/crypto/src/math/ec/custom/sec/SecT283K1Point.cs
index e5888daca..440c5d348 100644
--- a/crypto/src/math/ec/custom/sec/SecT283K1Point.cs
+++ b/crypto/src/math/ec/custom/sec/SecT283K1Point.cs
@@ -1,5 +1,7 @@
 using System;
 
+using Org.BouncyCastle.Math.Raw;
+
 namespace Org.BouncyCastle.Math.EC.Custom.Sec
 {
     internal class SecT283K1Point
@@ -66,8 +68,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
             ECCurve curve = this.Curve;
 
-            ECFieldElement X1 = this.RawXCoord;
-            ECFieldElement X2 = b.RawXCoord;
+            SecT283FieldElement X1 = (SecT283FieldElement)this.RawXCoord;
+            SecT283FieldElement X2 = (SecT283FieldElement)b.RawXCoord;
 
             if (X1.IsZero)
             {
@@ -77,86 +79,101 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                 return b.Add(this);
             }
 
-            ECFieldElement L1 = this.RawYCoord, Z1 = this.RawZCoords[0];
-            ECFieldElement L2 = b.RawYCoord, Z2 = b.RawZCoords[0];
+            SecT283FieldElement L1 = (SecT283FieldElement)this.RawYCoord, Z1 = (SecT283FieldElement)this.RawZCoords[0];
+            SecT283FieldElement L2 = (SecT283FieldElement)b.RawYCoord, Z2 = (SecT283FieldElement)b.RawZCoords[0];
+
+            ulong[] tt0 = Nat.Create64(9);
+            ulong[] t1 = Nat320.Create64();
+            ulong[] t2 = Nat320.Create64();
+            ulong[] t3 = Nat320.Create64();
 
             bool Z1IsOne = Z1.IsOne;
-            ECFieldElement U2 = X2, S2 = L2;
-            if (!Z1IsOne)
+            if (Z1IsOne)
+            {
+                Nat320.Copy64(X2.x, t1);                    // U2
+                Nat320.Copy64(L2.x, t2);                    // S2
+            }
+            else
             {
-                U2 = U2.Multiply(Z1);
-                S2 = S2.Multiply(Z1);
+                SecT283Field.Multiply(X2.x, Z1.x, t1);      // U2
+                SecT283Field.Multiply(L2.x, Z1.x, t2);      // S2
             }
 
             bool Z2IsOne = Z2.IsOne;
-            ECFieldElement U1 = X1, S1 = L1;
-            if (!Z2IsOne)
+            if (Z2IsOne)
             {
-                U1 = U1.Multiply(Z2);
-                S1 = S1.Multiply(Z2);
+                Nat320.Copy64(X1.x, t3);                    // U1
+                Nat320.Copy64(L1.x, tt0);                   // S1
+            }
+            else
+            {
+                SecT283Field.Multiply(X1.x, Z2.x, t3);      // U1
+                SecT283Field.Multiply(L1.x, Z2.x, tt0);     // S1
             }
 
-            ECFieldElement A = S1.Add(S2);
-            ECFieldElement B = U1.Add(U2);
+            SecT283Field.AddTo(tt0, t2);                    // A
+            SecT283Field.Add(t3, t1, tt0);                  // B
 
-            if (B.IsZero)
+            if (Nat320.IsZero64(tt0))
             {
-                if (A.IsZero)
+                if (Nat320.IsZero64(t2))
                     return Twice();
 
                 return curve.Infinity;
             }
 
-            ECFieldElement X3, L3, Z3;
             if (X2.IsZero)
             {
                 // TODO This can probably be optimized quite a bit
                 ECPoint p = this.Normalize();
-                X1 = p.XCoord;
+                X1 = (SecT283FieldElement)p.XCoord;
                 ECFieldElement Y1 = p.YCoord;
 
                 ECFieldElement Y2 = L2;
                 ECFieldElement L = Y1.Add(Y2).Divide(X1);
 
-                X3 = L.Square().Add(L).Add(X1);
+                ECFieldElement X3 = L.Square().Add(L).Add(X1);
                 if (X3.IsZero)
-                {
                     return new SecT283K1Point(curve, X3, curve.B);
-                }
 
                 ECFieldElement Y3 = L.Multiply(X1.Add(X3)).Add(X3).Add(Y1);
-                L3 = Y3.Divide(X3).Add(X3);
-                Z3 = curve.FromBigInteger(BigInteger.One);
+                ECFieldElement L3 = Y3.Divide(X3).Add(X3);
+                ECFieldElement Z3 = curve.FromBigInteger(BigInteger.One);
+
+                return new SecT283K1Point(curve, X3, L3, new ECFieldElement[]{ Z3 });
             }
-            else
-            {
-                B = B.Square();
 
-                ECFieldElement AU1 = A.Multiply(U1);
-                ECFieldElement AU2 = A.Multiply(U2);
+            SecT283Field.Square(tt0, tt0);
 
-                X3 = AU1.Multiply(AU2);
-                if (X3.IsZero)
-                {
-                    return new SecT283K1Point(curve, X3, curve.B);
-                }
+            SecT283Field.Multiply(t3, t2, t3);      // AU1
+            SecT283Field.Multiply(t1, t2, t1);      // AU2
 
-                ECFieldElement ABZ2 = A.Multiply(B);
-                if (!Z2IsOne)
-                {
-                    ABZ2 = ABZ2.Multiply(Z2);
-                }
+            ulong[] _X3 = t3;
+            SecT283Field.Multiply(_X3, t1, _X3);
+            if (Nat320.IsZero64(_X3))
+                return new SecT283K1Point(curve, new SecT283FieldElement(_X3), curve.B);
 
-                L3 = AU2.Add(B).SquarePlusProduct(ABZ2, L1.Add(Z1));
+            ulong[] _Z3 = t2;
+            SecT283Field.Multiply(_Z3, tt0, _Z3);   // ABZ2
+            if (!Z2IsOne)
+            {
+                SecT283Field.Multiply(_Z3, Z2.x, _Z3);
+            }
 
-                Z3 = ABZ2;
-                if (!Z1IsOne)
-                {
-                    Z3 = Z3.Multiply(Z1);
-                }
+            ulong[] _L3 = t1;
+            SecT283Field.AddTo(tt0, _L3);
+            SecT283Field.SquareExt(_L3, tt0);
+            SecT283Field.Add(L1.x, Z1.x, _L3);
+            SecT283Field.MultiplyAddToExt(_Z3, _L3, tt0);
+            SecT283Field.Reduce(tt0, _L3);
+
+            if (!Z1IsOne)
+            {
+                SecT283Field.Multiply(_Z3, Z1.x, _Z3);
             }
 
-            return new SecT283K1Point(curve, X3, L3, new ECFieldElement[] { Z3 });
+            return new SecT283K1Point(curve, new SecT283FieldElement(_X3), new SecT283FieldElement(_L3),
+                new ECFieldElement[]{ new SecT283FieldElement(_Z3) });
         }
 
         public override ECPoint Twice()
diff --git a/crypto/src/math/ec/custom/sec/SecT409Field.cs b/crypto/src/math/ec/custom/sec/SecT409Field.cs
index 414a094a8..c497d0223 100644
--- a/crypto/src/math/ec/custom/sec/SecT409Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT409Field.cs
@@ -44,7 +44,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             z[6] = x[6];
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             z[0] ^= x[0];
             z[1] ^= x[1];
@@ -225,6 +225,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -393,8 +398,28 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
-            Interleave.Expand64To128(x, 0, 6, zz, 0);
             zz[12] = Interleave.Expand32to64((uint)x[6]);
+
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[11] = Bmi2.X64.ParallelBitDeposit(x[5] >> 32, 0x5555555555555555UL);
+                zz[10] = Bmi2.X64.ParallelBitDeposit(x[5]      , 0x5555555555555555UL);
+                zz[ 9] = Bmi2.X64.ParallelBitDeposit(x[4] >> 32, 0x5555555555555555UL);
+                zz[ 8] = Bmi2.X64.ParallelBitDeposit(x[4]      , 0x5555555555555555UL);
+                zz[ 7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
+                zz[ 6] = Bmi2.X64.ParallelBitDeposit(x[3]      , 0x5555555555555555UL);
+                zz[ 5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[ 4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
+                zz[ 3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[ 2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[ 1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[ 0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
+            Interleave.Expand64To128(x, 0, 6, zz, 0);
         }
     }
 }
diff --git a/crypto/src/math/ec/custom/sec/SecT409FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT409FieldElement.cs
index a9b08526a..d6cbd2591 100644
--- a/crypto/src/math/ec/custom/sec/SecT409FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT409FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT409FieldElement)x).x, yx = ((SecT409FieldElement)y).x;
 
             ulong[] tt = Nat.Create64(13);
-            SecT409Field.SquareAddToExt(ax, tt);
+            SecT409Field.SquareExt(ax, tt);
             SecT409Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat448.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT409K1Point.cs b/crypto/src/math/ec/custom/sec/SecT409K1Point.cs
index e98fc77f0..86a02896c 100644
--- a/crypto/src/math/ec/custom/sec/SecT409K1Point.cs
+++ b/crypto/src/math/ec/custom/sec/SecT409K1Point.cs
@@ -1,5 +1,7 @@
 using System;
 
+using Org.BouncyCastle.Math.Raw;
+
 namespace Org.BouncyCastle.Math.EC.Custom.Sec
 {
     internal class SecT409K1Point
@@ -66,8 +68,8 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
             ECCurve curve = this.Curve;
 
-            ECFieldElement X1 = this.RawXCoord;
-            ECFieldElement X2 = b.RawXCoord;
+            SecT409FieldElement X1 = (SecT409FieldElement)this.RawXCoord;
+            SecT409FieldElement X2 = (SecT409FieldElement)b.RawXCoord;
 
             if (X1.IsZero)
             {
@@ -77,86 +79,101 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                 return b.Add(this);
             }
 
-            ECFieldElement L1 = this.RawYCoord, Z1 = this.RawZCoords[0];
-            ECFieldElement L2 = b.RawYCoord, Z2 = b.RawZCoords[0];
+            SecT409FieldElement L1 = (SecT409FieldElement)this.RawYCoord, Z1 = (SecT409FieldElement)this.RawZCoords[0];
+            SecT409FieldElement L2 = (SecT409FieldElement)b.RawYCoord, Z2 = (SecT409FieldElement)b.RawZCoords[0];
+
+            ulong[] tt0 = Nat.Create64(13);
+            ulong[] t1 = Nat448.Create64();
+            ulong[] t2 = Nat448.Create64();
+            ulong[] t3 = Nat448.Create64();
 
             bool Z1IsOne = Z1.IsOne;
-            ECFieldElement U2 = X2, S2 = L2;
-            if (!Z1IsOne)
+            if (Z1IsOne)
+            {
+                Nat448.Copy64(X2.x, t1);                    // U2
+                Nat448.Copy64(L2.x, t2);                    // S2
+            }
+            else
             {
-                U2 = U2.Multiply(Z1);
-                S2 = S2.Multiply(Z1);
+                SecT409Field.Multiply(X2.x, Z1.x, t1);      // U2
+                SecT409Field.Multiply(L2.x, Z1.x, t2);      // S2
             }
 
             bool Z2IsOne = Z2.IsOne;
-            ECFieldElement U1 = X1, S1 = L1;
-            if (!Z2IsOne)
+            if (Z2IsOne)
             {
-                U1 = U1.Multiply(Z2);
-                S1 = S1.Multiply(Z2);
+                Nat448.Copy64(X1.x, t3);                    // U1
+                Nat448.Copy64(L1.x, tt0);                   // S1
+            }
+            else
+            {
+                SecT409Field.Multiply(X1.x, Z2.x, t3);      // U1
+                SecT409Field.Multiply(L1.x, Z2.x, tt0);     // S1
             }
 
-            ECFieldElement A = S1.Add(S2);
-            ECFieldElement B = U1.Add(U2);
+            SecT409Field.AddTo(tt0, t2);                    // A
+            SecT409Field.Add(t3, t1, tt0);                  // B
 
-            if (B.IsZero)
+            if (Nat448.IsZero64(tt0))
             {
-                if (A.IsZero)
+                if (Nat448.IsZero64(t2))
                     return Twice();
 
                 return curve.Infinity;
             }
 
-            ECFieldElement X3, L3, Z3;
             if (X2.IsZero)
             {
                 // TODO This can probably be optimized quite a bit
                 ECPoint p = this.Normalize();
-                X1 = p.XCoord;
+                X1 = (SecT409FieldElement)p.XCoord;
                 ECFieldElement Y1 = p.YCoord;
 
                 ECFieldElement Y2 = L2;
                 ECFieldElement L = Y1.Add(Y2).Divide(X1);
 
-                X3 = L.Square().Add(L).Add(X1);
+                ECFieldElement X3 = L.Square().Add(L).Add(X1);
                 if (X3.IsZero)
-                {
                     return new SecT409K1Point(curve, X3, curve.B);
-                }
 
                 ECFieldElement Y3 = L.Multiply(X1.Add(X3)).Add(X3).Add(Y1);
-                L3 = Y3.Divide(X3).Add(X3);
-                Z3 = curve.FromBigInteger(BigInteger.One);
+                ECFieldElement L3 = Y3.Divide(X3).Add(X3);
+                ECFieldElement Z3 = curve.FromBigInteger(BigInteger.One);
+
+                return new SecT409K1Point(curve, X3, L3, new ECFieldElement[]{ Z3 });
             }
-            else
-            {
-                B = B.Square();
 
-                ECFieldElement AU1 = A.Multiply(U1);
-                ECFieldElement AU2 = A.Multiply(U2);
+            SecT409Field.Square(tt0, tt0);
 
-                X3 = AU1.Multiply(AU2);
-                if (X3.IsZero)
-                {
-                    return new SecT409K1Point(curve, X3, curve.B);
-                }
+            SecT409Field.Multiply(t3, t2, t3);      // AU1
+            SecT409Field.Multiply(t1, t2, t1);      // AU2
 
-                ECFieldElement ABZ2 = A.Multiply(B);
-                if (!Z2IsOne)
-                {
-                    ABZ2 = ABZ2.Multiply(Z2);
-                }
+            ulong[] _X3 = t3;
+            SecT409Field.Multiply(_X3, t1, _X3);
+            if (Nat448.IsZero64(_X3))
+                return new SecT409K1Point(curve, new SecT409FieldElement(_X3), curve.B);
 
-                L3 = AU2.Add(B).SquarePlusProduct(ABZ2, L1.Add(Z1));
+            ulong[] _Z3 = t2;
+            SecT409Field.Multiply(_Z3, tt0, _Z3);   // ABZ2
+            if (!Z2IsOne)
+            {
+                SecT409Field.Multiply(_Z3, Z2.x, _Z3);
+            }
 
-                Z3 = ABZ2;
-                if (!Z1IsOne)
-                {
-                    Z3 = Z3.Multiply(Z1);
-                }
+            ulong[] _L3 = t1;
+            SecT409Field.AddTo(tt0, _L3);
+            SecT409Field.SquareExt(_L3, tt0);
+            SecT409Field.Add(L1.x, Z1.x, _L3);
+            SecT409Field.MultiplyAddToExt(_Z3, _L3, tt0);
+            SecT409Field.Reduce(tt0, _L3);
+
+            if (!Z1IsOne)
+            {
+                SecT409Field.Multiply(_Z3, Z1.x, _Z3);
             }
 
-            return new SecT409K1Point(curve, X3, L3, new ECFieldElement[] { Z3 });
+            return new SecT409K1Point(curve, new SecT409FieldElement(_X3), new SecT409FieldElement(_L3),
+                new ECFieldElement[]{ new SecT409FieldElement(_Z3) });
         }
 
         public override ECPoint Twice()
diff --git a/crypto/src/math/ec/custom/sec/SecT571Field.cs b/crypto/src/math/ec/custom/sec/SecT571Field.cs
index 49eaae2d4..b09a86ac3 100644
--- a/crypto/src/math/ec/custom/sec/SecT571Field.cs
+++ b/crypto/src/math/ec/custom/sec/SecT571Field.cs
@@ -57,7 +57,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             }
         }
 
-        private static void AddTo(ulong[] x, ulong[] z)
+        public static void AddTo(ulong[] x, ulong[] z)
         {
             Nat.XorTo64(9, x, z);
         }
@@ -253,6 +253,11 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             AddExt(zz, tt, zz);
         }
 
+        public static void SquareExt(ulong[] x, ulong[] zz)
+        {
+            ImplSquare(x, zz);
+        }
+
         public static void SquareN(ulong[] x, int n, ulong[] z)
         {
             Debug.Assert(n > 0);
@@ -449,6 +454,31 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
 
         protected static void ImplSquare(ulong[] x, ulong[] zz)
         {
+#if NETCOREAPP3_0_OR_GREATER
+            if (Bmi2.X64.IsSupported)
+            {
+                zz[17] = Bmi2.X64.ParallelBitDeposit(x[8] >> 32, 0x5555555555555555UL);
+                zz[16] = Bmi2.X64.ParallelBitDeposit(x[8]      , 0x5555555555555555UL);
+                zz[15] = Bmi2.X64.ParallelBitDeposit(x[7] >> 32, 0x5555555555555555UL);
+                zz[14] = Bmi2.X64.ParallelBitDeposit(x[7]      , 0x5555555555555555UL);
+                zz[13] = Bmi2.X64.ParallelBitDeposit(x[6] >> 32, 0x5555555555555555UL);
+                zz[12] = Bmi2.X64.ParallelBitDeposit(x[6]      , 0x5555555555555555UL);
+                zz[11] = Bmi2.X64.ParallelBitDeposit(x[5] >> 32, 0x5555555555555555UL);
+                zz[10] = Bmi2.X64.ParallelBitDeposit(x[5]      , 0x5555555555555555UL);
+                zz[ 9] = Bmi2.X64.ParallelBitDeposit(x[4] >> 32, 0x5555555555555555UL);
+                zz[ 8] = Bmi2.X64.ParallelBitDeposit(x[4]      , 0x5555555555555555UL);
+                zz[ 7] = Bmi2.X64.ParallelBitDeposit(x[3] >> 32, 0x5555555555555555UL);
+                zz[ 6] = Bmi2.X64.ParallelBitDeposit(x[3]      , 0x5555555555555555UL);
+                zz[ 5] = Bmi2.X64.ParallelBitDeposit(x[2] >> 32, 0x5555555555555555UL);
+                zz[ 4] = Bmi2.X64.ParallelBitDeposit(x[2]      , 0x5555555555555555UL);
+                zz[ 3] = Bmi2.X64.ParallelBitDeposit(x[1] >> 32, 0x5555555555555555UL);
+                zz[ 2] = Bmi2.X64.ParallelBitDeposit(x[1]      , 0x5555555555555555UL);
+                zz[ 1] = Bmi2.X64.ParallelBitDeposit(x[0] >> 32, 0x5555555555555555UL);
+                zz[ 0] = Bmi2.X64.ParallelBitDeposit(x[0]      , 0x5555555555555555UL);
+                return;
+            }
+#endif
+
             Interleave.Expand64To128(x, 0, 9, zz, 0);
         }
     }
diff --git a/crypto/src/math/ec/custom/sec/SecT571FieldElement.cs b/crypto/src/math/ec/custom/sec/SecT571FieldElement.cs
index b9c581860..773df3939 100644
--- a/crypto/src/math/ec/custom/sec/SecT571FieldElement.cs
+++ b/crypto/src/math/ec/custom/sec/SecT571FieldElement.cs
@@ -132,7 +132,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
             ulong[] xx = ((SecT571FieldElement)x).x, yx = ((SecT571FieldElement)y).x;
 
             ulong[] tt = Nat576.CreateExt64();
-            SecT571Field.SquareAddToExt(ax, tt);
+            SecT571Field.SquareExt(ax, tt);
             SecT571Field.MultiplyAddToExt(xx, yx, tt);
 
             ulong[] z = Nat576.Create64();
diff --git a/crypto/src/math/ec/custom/sec/SecT571K1Point.cs b/crypto/src/math/ec/custom/sec/SecT571K1Point.cs
index e5c3cc62f..95af0629f 100644
--- a/crypto/src/math/ec/custom/sec/SecT571K1Point.cs
+++ b/crypto/src/math/ec/custom/sec/SecT571K1Point.cs
@@ -176,11 +176,10 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                     SecT571Field.MultiplyPrecomp(Z3.x, Z2Precomp, Z3.x);
                 }
 
-                //L3 = AU2.Add(B).SquarePlusProduct(ABZ2, L1.Add(Z1));
                 ulong[] tt = Nat576.CreateExt64();
 
                 SecT571Field.Add(AU2, B, t4);
-                SecT571Field.SquareAddToExt(t4, tt);
+                SecT571Field.SquareExt(t4, tt);
 
                 SecT571Field.Add(L1.x, Z1.x, t4);
                 SecT571Field.MultiplyAddToExt(t4, Z3.x, tt);
diff --git a/crypto/src/math/ec/custom/sec/SecT571R1Point.cs b/crypto/src/math/ec/custom/sec/SecT571R1Point.cs
index 1ba7909f8..986885db6 100644
--- a/crypto/src/math/ec/custom/sec/SecT571R1Point.cs
+++ b/crypto/src/math/ec/custom/sec/SecT571R1Point.cs
@@ -179,7 +179,7 @@ namespace Org.BouncyCastle.Math.EC.Custom.Sec
                 ulong[] tt = Nat576.CreateExt64();
 
                 SecT571Field.Add(AU2, B, t4);
-                SecT571Field.SquareAddToExt(t4, tt);
+                SecT571Field.SquareExt(t4, tt);
 
                 SecT571Field.Add(L1.x, Z1.x, t4);
                 SecT571Field.MultiplyAddToExt(t4, Z3.x, tt);