20
|
1 ???using System; |
|
2 |
|
3 namespace UCIS.NaCl.crypto_core { |
|
4 static unsafe class salsa20 { |
|
5 static Boolean UseNativeFunctions = false; |
|
6 static unsafe internal Boolean EnableNativeImplementation() { |
|
7 UseNativeFunctions = false; |
|
8 Byte* dummy = stackalloc Byte[64]; |
|
9 try { |
|
10 if (Native.crypto_core_salsa20(dummy, dummy, dummy, dummy) != 0) return false; |
|
11 } catch (Exception) { |
|
12 return false; |
|
13 } |
|
14 return UseNativeFunctions = true; |
|
15 } |
|
16 |
|
17 public const int OUTPUTBYTES = 64; |
|
18 public const int INPUTBYTES = 16; |
|
19 public const int KEYBYTES = 32; |
|
20 public const int CONSTBYTES = 16; |
|
21 |
|
22 public const int ROUNDS = 20; |
|
23 |
|
24 static UInt32 load_littleendian(Byte* x) { |
|
25 return (UInt32)(x[0] | (x[1] << 8) | (x[2] << 16) | (x[3] << 24)); |
|
26 } |
|
27 |
|
28 static void store_littleendian(Byte* x, UInt32 u) { |
|
29 x[0] = (Byte)u; u >>= 8; |
|
30 x[1] = (Byte)u; u >>= 8; |
|
31 x[2] = (Byte)u; u >>= 8; |
|
32 x[3] = (Byte)u; |
|
33 } |
|
34 |
|
35 public static void crypto_core(Byte* outv, Byte* inv, Byte* k, Byte[] c) { |
|
36 fixed (Byte* cp = c) crypto_core(outv, inv, k, cp); |
|
37 } |
|
38 |
|
39 public static void crypto_core(Byte* outv, Byte* inv, Byte* k, Byte* c) { |
|
40 if (UseNativeFunctions) { |
|
41 Native.crypto_core_salsa20(outv, inv, k, c); |
|
42 return; |
|
43 } |
|
44 |
|
45 UInt32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
|
46 UInt32 j0, j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15; |
|
47 |
|
48 j0 = x0 = load_littleendian(c + 0); |
|
49 j1 = x1 = load_littleendian(k + 0); |
|
50 j2 = x2 = load_littleendian(k + 4); |
|
51 j3 = x3 = load_littleendian(k + 8); |
|
52 j4 = x4 = load_littleendian(k + 12); |
|
53 j5 = x5 = load_littleendian(c + 4); |
|
54 j6 = x6 = load_littleendian(inv + 0); |
|
55 j7 = x7 = load_littleendian(inv + 4); |
|
56 j8 = x8 = load_littleendian(inv + 8); |
|
57 j9 = x9 = load_littleendian(inv + 12); |
|
58 j10 = x10 = load_littleendian(c + 8); |
|
59 j11 = x11 = load_littleendian(k + 16); |
|
60 j12 = x12 = load_littleendian(k + 20); |
|
61 j13 = x13 = load_littleendian(k + 24); |
|
62 j14 = x14 = load_littleendian(k + 28); |
|
63 j15 = x15 = load_littleendian(c + 12); |
|
64 |
|
65 for (int i = ROUNDS; i > 0; i -= 2) { |
|
66 UInt32 tsum; |
|
67 tsum = x0 + x12; x4 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
68 tsum = x4 + x0; x8 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
69 tsum = x8 + x4; x12 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
70 tsum = x12 + x8; x0 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
71 tsum = x5 + x1; x9 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
72 tsum = x9 + x5; x13 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
73 tsum = x13 + x9; x1 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
74 tsum = x1 + x13; x5 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
75 tsum = x10 + x6; x14 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
76 tsum = x14 + x10; x2 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
77 tsum = x2 + x14; x6 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
78 tsum = x6 + x2; x10 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
79 tsum = x15 + x11; x3 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
80 tsum = x3 + x15; x7 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
81 tsum = x7 + x3; x11 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
82 tsum = x11 + x7; x15 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
83 tsum = x0 + x3; x1 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
84 tsum = x1 + x0; x2 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
85 tsum = x2 + x1; x3 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
86 tsum = x3 + x2; x0 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
87 tsum = x5 + x4; x6 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
88 tsum = x6 + x5; x7 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
89 tsum = x7 + x6; x4 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
90 tsum = x4 + x7; x5 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
91 tsum = x10 + x9; x11 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
92 tsum = x11 + x10; x8 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
93 tsum = x8 + x11; x9 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
94 tsum = x9 + x8; x10 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
95 tsum = x15 + x14; x12 ^= (tsum << 7) | (tsum >> (32 - 7)); |
|
96 tsum = x12 + x15; x13 ^= (tsum << 9) | (tsum >> (32 - 9)); |
|
97 tsum = x13 + x12; x14 ^= (tsum << 13) | (tsum >> (32 - 13)); |
|
98 tsum = x14 + x13; x15 ^= (tsum << 18) | (tsum >> (32 - 18)); |
|
99 } |
|
100 |
|
101 x0 += j0; |
|
102 x1 += j1; |
|
103 x2 += j2; |
|
104 x3 += j3; |
|
105 x4 += j4; |
|
106 x5 += j5; |
|
107 x6 += j6; |
|
108 x7 += j7; |
|
109 x8 += j8; |
|
110 x9 += j9; |
|
111 x10 += j10; |
|
112 x11 += j11; |
|
113 x12 += j12; |
|
114 x13 += j13; |
|
115 x14 += j14; |
|
116 x15 += j15; |
|
117 |
|
118 store_littleendian(outv + 0, x0); |
|
119 store_littleendian(outv + 4, x1); |
|
120 store_littleendian(outv + 8, x2); |
|
121 store_littleendian(outv + 12, x3); |
|
122 store_littleendian(outv + 16, x4); |
|
123 store_littleendian(outv + 20, x5); |
|
124 store_littleendian(outv + 24, x6); |
|
125 store_littleendian(outv + 28, x7); |
|
126 store_littleendian(outv + 32, x8); |
|
127 store_littleendian(outv + 36, x9); |
|
128 store_littleendian(outv + 40, x10); |
|
129 store_littleendian(outv + 44, x11); |
|
130 store_littleendian(outv + 48, x12); |
|
131 store_littleendian(outv + 52, x13); |
|
132 store_littleendian(outv + 56, x14); |
|
133 store_littleendian(outv + 60, x15); |
|
134 } |
|
135 } |
|
136 } |