20
|
1 ???using System; |
|
2 |
|
3 namespace UCIS.NaCl.crypto_scalarmult { |
|
4 unsafe public static class curve25519 { |
|
5 const int CRYPTO_BYTES = 32; |
|
6 const int CRYPTO_SCALARBYTES = 32; |
|
7 |
|
8 //Never written to (both) |
|
9 static Byte[] basev = new Byte[32] { 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; //[32] = {9}; |
|
10 static UInt32[] minusp = new UInt32[32] { 19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; |
|
11 |
|
12 public static void crypto_scalarmult_base(Byte* q, Byte* n) { |
|
13 fixed (Byte* basevp = basev) crypto_scalarmult(q, n, basevp); |
|
14 } |
|
15 public static void crypto_scalarmult_base(Byte[] q, Byte[] n) { |
|
16 fixed (Byte* basevp = basev, qp = q, np = n) crypto_scalarmult(qp, np, basevp); |
|
17 } |
|
18 |
|
19 static void add(UInt32[] outv, UInt32[] a, UInt32[] b) { //outv[32],a[32],b[32] |
|
20 fixed (UInt32* outvp = outv, ap = a, bp = b) add(outvp, ap, bp); |
|
21 } |
|
22 static void add(UInt32[] outv, UInt32[] a, UInt32* b) { |
|
23 fixed (UInt32* outvp = outv, ap = a) add(outvp, ap, b); |
|
24 } |
|
25 static void add(UInt32* outv, UInt32* a, UInt32* b) { |
|
26 UInt32 u = 0; |
|
27 for (int j = 0; j < 31; ++j) { u += a[j] + b[j]; outv[j] = u & 255; u >>= 8; } |
|
28 u += a[31] + b[31]; outv[31] = u; |
|
29 } |
|
30 |
|
31 static void sub(UInt32* outv, UInt32[] a, UInt32* b) {//outv[32], a[32], b[32] |
|
32 UInt32 u = 218; |
|
33 for (int j = 0; j < 31; ++j) { |
|
34 u += a[j] + 65280 - b[j]; |
|
35 outv[j] = u & 255; |
|
36 u >>= 8; |
|
37 } |
|
38 u += a[31] - b[31]; |
|
39 outv[31] = u; |
|
40 } |
|
41 |
|
42 static void squeeze(UInt32* a) { //a[32] |
|
43 UInt32 u = 0; |
|
44 for (int j = 0; j < 31; ++j) { u += a[j]; a[j] = u & 255; u >>= 8; } |
|
45 u += a[31]; a[31] = u & 127; |
|
46 u = 19 * (u >> 7); |
|
47 for (int j = 0; j < 31; ++j) { u += a[j]; a[j] = u & 255; u >>= 8; } |
|
48 u += a[31]; a[31] = u; |
|
49 } |
|
50 |
|
51 static void freeze(UInt32* a) { //a[32] |
|
52 UInt32[] aorig = new UInt32[32]; |
|
53 for (int j = 0; j < 32; ++j) aorig[j] = a[j]; |
|
54 fixed (UInt32* minuspp = minusp) add(a, a, minuspp); |
|
55 UInt32 negative = (UInt32)(-((a[31] >> 7) & 1)); |
|
56 for (int j = 0; j < 32; ++j) a[j] ^= negative & (aorig[j] ^ a[j]); |
|
57 } |
|
58 |
|
59 static void mult(UInt32[] outv, UInt32[] a, UInt32[] b) { //outv[32], a[32], b[32] |
|
60 fixed (UInt32* outvp = outv, ap = a, bp = b) mult(outvp, ap, bp); |
|
61 } |
|
62 static void mult(UInt32* outv, UInt32* a, UInt32* b) { |
|
63 UInt32 j; |
|
64 for (uint i = 0; i < 32; ++i) { |
|
65 UInt32 u = 0; |
|
66 for (j = 0; j <= i; ++j) u += a[j] * b[i - j]; |
|
67 for (j = i + 1; j < 32; ++j) u += 38 * a[j] * b[i + 32 - j]; |
|
68 outv[i] = u; |
|
69 } |
|
70 squeeze(outv); |
|
71 } |
|
72 |
|
73 static void mult121665(UInt32[] outv, UInt32[] a) { //outv[32], a[32] |
|
74 UInt32 j; |
|
75 UInt32 u = 0; |
|
76 for (j = 0; j < 31; ++j) { u += 121665 * a[j]; outv[j] = u & 255; u >>= 8; } |
|
77 u += 121665 * a[31]; outv[31] = u & 127; |
|
78 u = 19 * (u >> 7); |
|
79 for (j = 0; j < 31; ++j) { u += outv[j]; outv[j] = u & 255; u >>= 8; } |
|
80 u += outv[j]; outv[j] = u; |
|
81 } |
|
82 |
|
83 static void square(UInt32[] outv, UInt32[] a) { //outv[32], a[32] |
|
84 fixed (UInt32* outvp = outv, ap = a) square(outvp, ap); |
|
85 } |
|
86 static void square(UInt32* outv, UInt32* a) { |
|
87 UInt32 j; |
|
88 for (uint i = 0; i < 32; ++i) { |
|
89 UInt32 u = 0; |
|
90 for (j = 0; j < i - j; ++j) u += a[j] * a[i - j]; |
|
91 for (j = i + 1; j < i + 32 - j; ++j) u += 38 * a[j] * a[i + 32 - j]; |
|
92 u *= 2; |
|
93 if ((i & 1) == 0) { |
|
94 u += a[i / 2] * a[i / 2]; |
|
95 u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; |
|
96 } |
|
97 outv[i] = u; |
|
98 } |
|
99 squeeze(outv); |
|
100 } |
|
101 |
|
102 static void select(UInt32[] p, UInt32[] q, UInt32[] r, UInt32[] s, UInt32 b) { //p[64], q[64], r[64], s[64] |
|
103 UInt32 bminus1 = b - 1; |
|
104 for (int j = 0; j < 64; ++j) { |
|
105 UInt32 t = bminus1 & (r[j] ^ s[j]); |
|
106 p[j] = s[j] ^ t; |
|
107 q[j] = r[j] ^ t; |
|
108 } |
|
109 } |
|
110 |
|
111 static void mainloop(UInt32[] work, Byte[] e) { //work[64], e[32] |
|
112 UInt32[] xzm1 = new UInt32[64]; |
|
113 UInt32[] xzm = new UInt32[64]; |
|
114 UInt32[] xzmb = new UInt32[64]; |
|
115 UInt32[] xzm1b = new UInt32[64]; |
|
116 UInt32[] xznb = new UInt32[64]; |
|
117 UInt32[] xzn1b = new UInt32[64]; |
|
118 UInt32[] a0 = new UInt32[64]; |
|
119 UInt32[] a1 = new UInt32[64]; |
|
120 UInt32[] b0 = new UInt32[64]; |
|
121 UInt32[] b1 = new UInt32[64]; |
|
122 UInt32[] c1 = new UInt32[64]; |
|
123 UInt32[] r = new UInt32[32]; |
|
124 UInt32[] s = new UInt32[32]; |
|
125 UInt32[] t = new UInt32[32]; |
|
126 UInt32[] u = new UInt32[32]; |
|
127 |
|
128 for (int j = 0; j < 32; ++j) xzm1[j] = work[j]; |
|
129 xzm1[32] = 1; |
|
130 for (int j = 33; j < 64; ++j) xzm1[j] = 0; |
|
131 |
|
132 xzm[0] = 1; |
|
133 for (int j = 1; j < 64; ++j) xzm[j] = 0; |
|
134 |
|
135 fixed (UInt32* xzmbp = xzmb, a0p = a0, xzm1bp = xzm1b, a1p = a1, b0p = b0, b1p = b1, c1p = c1, xznbp = xznb, up = u, xzn1bp = xzn1b, workp = work, sp = s, rp = r) { |
|
136 for (int pos = 254; pos >= 0; --pos) { |
|
137 UInt32 b = (UInt32)(e[pos / 8] >> (pos & 7)); |
|
138 b &= 1; |
|
139 select(xzmb, xzm1b, xzm, xzm1, b); |
|
140 add(a0, xzmb, xzmbp + 32); |
|
141 sub(a0p + 32, xzmb, xzmbp + 32); |
|
142 add(a1, xzm1b, xzm1bp + 32); |
|
143 sub(a1p + 32, xzm1b, xzm1bp + 32); |
|
144 square(b0p, a0p); |
|
145 square(b0p + 32, a0p + 32); |
|
146 mult(b1p, a1p, a0p + 32); |
|
147 mult(b1p + 32, a1p + 32, a0p); |
|
148 add(c1, b1, b1p + 32); |
|
149 sub(c1p + 32, b1, b1p + 32); |
|
150 square(rp, c1p + 32); |
|
151 sub(sp, b0, b0p + 32); |
|
152 mult121665(t, s); |
|
153 add(u, t, b0p); |
|
154 mult(xznbp, b0p, b0p + 32); |
|
155 mult(xznbp + 32, sp, up); |
|
156 square(xzn1bp, c1p); |
|
157 mult(xzn1bp + 32, rp, workp); |
|
158 select(xzm, xzm1, xznb, xzn1b, b); |
|
159 } |
|
160 } |
|
161 |
|
162 for (int j = 0; j < 64; ++j) work[j] = xzm[j]; |
|
163 } |
|
164 |
|
165 static void recip(UInt32* outv, UInt32* z) { //outv[32], z[32] |
|
166 UInt32[] z2 = new UInt32[32]; |
|
167 UInt32[] z9 = new UInt32[32]; |
|
168 UInt32[] z11 = new UInt32[32]; |
|
169 UInt32[] z2_5_0 = new UInt32[32]; |
|
170 UInt32[] z2_10_0 = new UInt32[32]; |
|
171 UInt32[] z2_20_0 = new UInt32[32]; |
|
172 UInt32[] z2_50_0 = new UInt32[32]; |
|
173 UInt32[] z2_100_0 = new UInt32[32]; |
|
174 UInt32[] t0 = new UInt32[32]; |
|
175 UInt32[] t1 = new UInt32[32]; |
|
176 |
|
177 /* 2 */ |
|
178 fixed (UInt32* z2p = z2) square(z2p, z); |
|
179 /* 4 */ |
|
180 square(t1, z2); |
|
181 /* 8 */ |
|
182 square(t0, t1); |
|
183 /* 9 */ |
|
184 fixed (UInt32* z9p = z9, t0p = t0) mult(z9p, t0p, z); |
|
185 /* 11 */ |
|
186 mult(z11, z9, z2); |
|
187 /* 22 */ |
|
188 square(t0, z11); |
|
189 /* 2^5 - 2^0 = 31 */ |
|
190 mult(z2_5_0, t0, z9); |
|
191 |
|
192 /* 2^6 - 2^1 */ |
|
193 square(t0, z2_5_0); |
|
194 /* 2^7 - 2^2 */ |
|
195 square(t1, t0); |
|
196 /* 2^8 - 2^3 */ |
|
197 square(t0, t1); |
|
198 /* 2^9 - 2^4 */ |
|
199 square(t1, t0); |
|
200 /* 2^10 - 2^5 */ |
|
201 square(t0, t1); |
|
202 /* 2^10 - 2^0 */ |
|
203 mult(z2_10_0, t0, z2_5_0); |
|
204 |
|
205 /* 2^11 - 2^1 */ |
|
206 square(t0, z2_10_0); |
|
207 /* 2^12 - 2^2 */ |
|
208 square(t1, t0); |
|
209 /* 2^20 - 2^10 */ |
|
210 for (int i = 2; i < 10; i += 2) { square(t0, t1); square(t1, t0); } |
|
211 /* 2^20 - 2^0 */ |
|
212 mult(z2_20_0, t1, z2_10_0); |
|
213 |
|
214 /* 2^21 - 2^1 */ |
|
215 square(t0, z2_20_0); |
|
216 /* 2^22 - 2^2 */ |
|
217 square(t1, t0); |
|
218 /* 2^40 - 2^20 */ |
|
219 for (int i = 2; i < 20; i += 2) { square(t0, t1); square(t1, t0); } |
|
220 /* 2^40 - 2^0 */ |
|
221 mult(t0, t1, z2_20_0); |
|
222 |
|
223 /* 2^41 - 2^1 */ |
|
224 square(t1, t0); |
|
225 /* 2^42 - 2^2 */ |
|
226 square(t0, t1); |
|
227 /* 2^50 - 2^10 */ |
|
228 for (int i = 2; i < 10; i += 2) { square(t1, t0); square(t0, t1); } |
|
229 /* 2^50 - 2^0 */ |
|
230 mult(z2_50_0, t0, z2_10_0); |
|
231 |
|
232 /* 2^51 - 2^1 */ |
|
233 square(t0, z2_50_0); |
|
234 /* 2^52 - 2^2 */ |
|
235 square(t1, t0); |
|
236 /* 2^100 - 2^50 */ |
|
237 for (int i = 2; i < 50; i += 2) { square(t0, t1); square(t1, t0); } |
|
238 /* 2^100 - 2^0 */ |
|
239 mult(z2_100_0, t1, z2_50_0); |
|
240 |
|
241 /* 2^101 - 2^1 */ |
|
242 square(t1, z2_100_0); |
|
243 /* 2^102 - 2^2 */ |
|
244 square(t0, t1); |
|
245 /* 2^200 - 2^100 */ |
|
246 for (int i = 2; i < 100; i += 2) { square(t1, t0); square(t0, t1); } |
|
247 /* 2^200 - 2^0 */ |
|
248 mult(t1, t0, z2_100_0); |
|
249 |
|
250 /* 2^201 - 2^1 */ |
|
251 square(t0, t1); |
|
252 /* 2^202 - 2^2 */ |
|
253 square(t1, t0); |
|
254 /* 2^250 - 2^50 */ |
|
255 for (int i = 2; i < 50; i += 2) { square(t0, t1); square(t1, t0); } |
|
256 /* 2^250 - 2^0 */ |
|
257 mult(t0, t1, z2_50_0); |
|
258 |
|
259 /* 2^251 - 2^1 */ |
|
260 square(t1, t0); |
|
261 /* 2^252 - 2^2 */ |
|
262 square(t0, t1); |
|
263 /* 2^253 - 2^3 */ |
|
264 square(t1, t0); |
|
265 /* 2^254 - 2^4 */ |
|
266 square(t0, t1); |
|
267 /* 2^255 - 2^5 */ |
|
268 square(t1, t0); |
|
269 /* 2^255 - 21 */ |
|
270 fixed (UInt32* t1p = t1, z11p = z11) mult(outv, t1p, z11p); |
|
271 } |
|
272 |
|
273 public static void crypto_scalarmult(Byte* q, Byte* n, Byte* p) { |
|
274 UInt32[] work = new UInt32[96]; |
|
275 Byte[] e = new Byte[32]; |
|
276 for (int i = 0; i < 32; ++i) e[i] = n[i]; |
|
277 e[0] &= 248; |
|
278 e[31] &= 127; |
|
279 e[31] |= 64; |
|
280 for (int i = 0; i < 32; ++i) work[i] = p[i]; |
|
281 mainloop(work, e); |
|
282 fixed (UInt32* workp = work) { |
|
283 recip(workp + 32, workp + 32); |
|
284 mult(workp + 64, workp, workp + 32); |
|
285 freeze(workp + 64); |
|
286 } |
|
287 for (int i = 0; i < 32; ++i) q[i] = (Byte)work[64 + i]; |
|
288 } |
|
289 } |
|
290 } |