diff options
author | Peter Dettman <peter.dettman@bouncycastle.org> | 2015-08-14 17:51:55 +0700 |
---|---|---|
committer | Peter Dettman <peter.dettman@bouncycastle.org> | 2015-08-14 17:51:55 +0700 |
commit | 4d3b04a09a74783f6800e3425c16dc01d091642b (patch) | |
tree | 96400c965c6adb676162dd7ab0d1b22a5bb47b57 | |
parent | Fix 64-bit multiply (not used) (diff) | |
download | BouncyCastle.NET-ed25519-4d3b04a09a74783f6800e3425c16dc01d091642b.tar.xz |
Optimize (Inv_)Mcol methods in AES engines
-rw-r--r-- | crypto/src/crypto/engines/AesEngine.cs | 23 | ||||
-rw-r--r-- | crypto/src/crypto/engines/AesFastEngine.cs | 23 | ||||
-rw-r--r-- | crypto/src/crypto/engines/AesLightEngine.cs | 29 |
3 files changed, 55 insertions, 20 deletions
diff --git a/crypto/src/crypto/engines/AesEngine.cs b/crypto/src/crypto/engines/AesEngine.cs index 9d7f76c05..164c43ee9 100644 --- a/crypto/src/crypto/engines/AesEngine.cs +++ b/crypto/src/crypto/engines/AesEngine.cs @@ -237,12 +237,22 @@ namespace Org.BouncyCastle.Crypto.Engines private const uint m1 = 0x80808080; private const uint m2 = 0x7f7f7f7f; private const uint m3 = 0x0000001b; + private const uint m4 = 0xC0C0C0C0; + private const uint m5 = 0x3f3f3f3f; private static uint FFmulX(uint x) { return ((x & m2) << 1) ^ (((x & m1) >> 7) * m3); } + private static uint FFmulX2(uint x) + { + uint t0 = (x & m5) << 2; + uint t1 = (x & m4); + t1 ^= (t1 >> 1); + return t0 ^ (t1 >> 2) ^ (t1 >> 5); + } + /* The following defines provide alternative definitions of FFmulX that might give improved performance if a fast 32-bit multiply is not available. @@ -255,12 +265,13 @@ namespace Org.BouncyCastle.Crypto.Engines private static uint Inv_Mcol(uint x) { - uint f2 = FFmulX(x); - uint f4 = FFmulX(f2); - uint f8 = FFmulX(f4); - uint f9 = x ^ f8; - - return f2 ^ f4 ^ f8 ^ Shift(f2 ^ f9, 8) ^ Shift(f4 ^ f9, 16) ^ Shift(f9, 24); + uint t0, t1; + t0 = x; + t1 = t0 ^ Shift(t0, 8); + t0 ^= FFmulX(t1); + t1 ^= FFmulX2(t0); + t0 ^= t1 ^ Shift(t1, 16); + return t0; } private static uint SubWord(uint x) diff --git a/crypto/src/crypto/engines/AesFastEngine.cs b/crypto/src/crypto/engines/AesFastEngine.cs index a1b544568..38ce1a946 100644 --- a/crypto/src/crypto/engines/AesFastEngine.cs +++ b/crypto/src/crypto/engines/AesFastEngine.cs @@ -573,12 +573,22 @@ namespace Org.BouncyCastle.Crypto.Engines private const uint m1 = 0x80808080; private const uint m2 = 0x7f7f7f7f; private const uint m3 = 0x0000001b; + private const uint m4 = 0xC0C0C0C0; + private const uint m5 = 0x3f3f3f3f; private static uint FFmulX(uint x) { return ((x & m2) << 1) ^ (((x & m1) >> 7) * m3); } + private static uint FFmulX2(uint x) + { + uint t0 = (x & m5) << 2; + uint t1 = (x & m4); + t1 ^= (t1 >> 1); + return t0 ^ (t1 >> 2) ^ (t1 >> 5); + } + /* The following defines provide alternative definitions of FFmulX that might give improved performance if a fast 32-bit multiply is not available. @@ -591,12 +601,13 @@ namespace Org.BouncyCastle.Crypto.Engines private static uint Inv_Mcol(uint x) { - uint f2 = FFmulX(x); - uint f4 = FFmulX(f2); - uint f8 = FFmulX(f4); - uint f9 = x ^ f8; - - return f2 ^ f4 ^ f8 ^ Shift(f2 ^ f9, 8) ^ Shift(f4 ^ f9, 16) ^ Shift(f9, 24); + uint t0, t1; + t0 = x; + t1 = t0 ^ Shift(t0, 8); + t0 ^= FFmulX(t1); + t1 ^= FFmulX2(t0); + t0 ^= t1 ^ Shift(t1, 16); + return t0; } private static uint SubWord(uint x) diff --git a/crypto/src/crypto/engines/AesLightEngine.cs b/crypto/src/crypto/engines/AesLightEngine.cs index a6b9e3bd4..a42b34971 100644 --- a/crypto/src/crypto/engines/AesLightEngine.cs +++ b/crypto/src/crypto/engines/AesLightEngine.cs @@ -126,12 +126,22 @@ namespace Org.BouncyCastle.Crypto.Engines private const uint m1 = 0x80808080; private const uint m2 = 0x7f7f7f7f; private const uint m3 = 0x0000001b; + private const uint m4 = 0xC0C0C0C0; + private const uint m5 = 0x3f3f3f3f; private static uint FFmulX(uint x) { return ((x & m2) << 1) ^ (((x & m1) >> 7) * m3); } + private static uint FFmulX2(uint x) + { + uint t0 = (x & m5) << 2; + uint t1 = (x & m4); + t1 ^= (t1 >> 1); + return t0 ^ (t1 >> 2) ^ (t1 >> 5); + } + /* The following defines provide alternative definitions of FFmulX that might give improved performance if a fast 32-bit multiply is not available. @@ -144,18 +154,21 @@ namespace Org.BouncyCastle.Crypto.Engines private static uint Mcol(uint x) { - uint f2 = FFmulX(x); - return f2 ^ Shift(x ^ f2, 8) ^ Shift(x, 16) ^ Shift(x, 24); + uint t0, t1; + t0 = Shift(x, 8); + t1 = x ^ t0; + return Shift(t1, 16) ^ t0 ^ FFmulX(t1); } private static uint Inv_Mcol(uint x) { - uint f2 = FFmulX(x); - uint f4 = FFmulX(f2); - uint f8 = FFmulX(f4); - uint f9 = x ^ f8; - - return f2 ^ f4 ^ f8 ^ Shift(f2 ^ f9, 8) ^ Shift(f4 ^ f9, 16) ^ Shift(f9, 24); + uint t0, t1; + t0 = x; + t1 = t0 ^ Shift(t0, 8); + t0 ^= FFmulX(t1); + t1 ^= FFmulX2(t0); + t0 ^= t1 ^ Shift(t1, 16); + return t0; } private static uint SubWord(uint x) |