summary refs log tree commit diff
path: root/crypto/src/math/raw/Nat512.cs
blob: 37cf0b6ffcb869a2acbcfbee42518c13b38fec5e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
using System;
#if NETCOREAPP3_0_OR_GREATER
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace Org.BouncyCastle.Math.Raw
{
    internal abstract class Nat512
    {
        public static void Mul(uint[] x, uint[] y, uint[] zz)
        {
            Nat256.Mul(x, y, zz);
            Nat256.Mul(x, 8, y, 8, zz, 16);

            uint c24 = Nat256.AddToEachOther(zz, 8, zz, 16);
            uint c16 = c24 + Nat256.AddTo(zz, 0, zz, 8, 0);
            c24 += Nat256.AddTo(zz, 24, zz, 16, c16);

            uint[] dx = Nat256.Create(), dy = Nat256.Create();
            bool neg = Nat256.Diff(x, 8, x, 0, dx, 0) != Nat256.Diff(y, 8, y, 0, dy, 0);

            uint[] tt = Nat256.CreateExt();
            Nat256.Mul(dx, dy, tt);

            c24 += neg ? Nat.AddTo(16, tt, 0, zz, 8) : (uint)Nat.SubFrom(16, tt, 0, zz, 8);
            Nat.AddWordAt(32, c24, zz, 24); 
        }

        public static void Square(uint[] x, uint[] zz)
        {
            Nat256.Square(x, zz);
            Nat256.Square(x, 8, zz, 16);

            uint c24 = Nat256.AddToEachOther(zz, 8, zz, 16);
            uint c16 = c24 + Nat256.AddTo(zz, 0, zz, 8, 0);
            c24 += Nat256.AddTo(zz, 24, zz, 16, c16);

            uint[] dx = Nat256.Create();
            Nat256.Diff(x, 8, x, 0, dx, 0);

            uint[] m = Nat256.CreateExt();
            Nat256.Square(dx, m);

            c24 += (uint)Nat.SubFrom(16, m, 0, zz, 8);
            Nat.AddWordAt(32, c24, zz, 24); 
        }

        public static void Xor(uint[] x, int xOff, uint[] y, int yOff, uint[] z, int zOff)
        {
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
            Xor(x.AsSpan(xOff), y.AsSpan(yOff), z.AsSpan(zOff));
#else
            for (int i = 0; i < 16; i += 4)
            {
                z[zOff + i + 0] = x[xOff + i + 0] ^ y[yOff + i + 0];
                z[zOff + i + 1] = x[xOff + i + 1] ^ y[yOff + i + 1];
                z[zOff + i + 2] = x[xOff + i + 2] ^ y[yOff + i + 2];
                z[zOff + i + 3] = x[xOff + i + 3] ^ y[yOff + i + 3];
            }
#endif
        }

#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER
        public static void Xor(ReadOnlySpan<uint> x, ReadOnlySpan<uint> y, Span<uint> z)
        {
#if NETCOREAPP3_0_OR_GREATER
            if (Avx2.IsSupported && Unsafe.SizeOf<Vector256<byte>>() == 32)
            {
                var X = MemoryMarshal.Cast<uint, byte>(x[..16]);
                var Y = MemoryMarshal.Cast<uint, byte>(y[..16]);
                var Z = MemoryMarshal.Cast<uint, byte>(z[..16]);

                var X0 = MemoryMarshal.Read<Vector256<byte>>(X[0x00..0x20]);
                var X1 = MemoryMarshal.Read<Vector256<byte>>(X[0x20..0x40]);

                var Y0 = MemoryMarshal.Read<Vector256<byte>>(Y[0x00..0x20]);
                var Y1 = MemoryMarshal.Read<Vector256<byte>>(Y[0x20..0x40]);

                var Z0 = Avx2.Xor(X0, Y0);
                var Z1 = Avx2.Xor(X1, Y1);

                MemoryMarshal.Write(Z[0x00..0x20], ref Z0);
                MemoryMarshal.Write(Z[0x20..0x40], ref Z1);
                return;
            }

            if (Sse2.IsSupported && Unsafe.SizeOf<Vector128<byte>>() == 16)
            {
                var X = MemoryMarshal.Cast<uint, byte>(x[..16]);
                var Y = MemoryMarshal.Cast<uint, byte>(y[..16]);
                var Z = MemoryMarshal.Cast<uint, byte>(z[..16]);

                var X0 = MemoryMarshal.Read<Vector128<byte>>(X[0x00..0x10]);
                var X1 = MemoryMarshal.Read<Vector128<byte>>(X[0x10..0x20]);
                var X2 = MemoryMarshal.Read<Vector128<byte>>(X[0x20..0x30]);
                var X3 = MemoryMarshal.Read<Vector128<byte>>(X[0x30..0x40]);

                var Y0 = MemoryMarshal.Read<Vector128<byte>>(Y[0x00..0x10]);
                var Y1 = MemoryMarshal.Read<Vector128<byte>>(Y[0x10..0x20]);
                var Y2 = MemoryMarshal.Read<Vector128<byte>>(Y[0x20..0x30]);
                var Y3 = MemoryMarshal.Read<Vector128<byte>>(Y[0x30..0x40]);

                var Z0 = Sse2.Xor(X0, Y0);
                var Z1 = Sse2.Xor(X1, Y1);
                var Z2 = Sse2.Xor(X2, Y2);
                var Z3 = Sse2.Xor(X3, Y3);

                MemoryMarshal.Write(Z[0x00..0x10], ref Z0);
                MemoryMarshal.Write(Z[0x10..0x20], ref Z1);
                MemoryMarshal.Write(Z[0x20..0x30], ref Z2);
                MemoryMarshal.Write(Z[0x30..0x40], ref Z3);
                return;
            }
#endif

            for (int i = 0; i < 16; i += 4)
            {
                z[i + 0] = x[i + 0] ^ y[i + 0];
                z[i + 1] = x[i + 1] ^ y[i + 1];
                z[i + 2] = x[i + 2] ^ y[i + 2];
                z[i + 3] = x[i + 3] ^ y[i + 3];
            }
        }
#endif
    }
}