changeset 73:6aca18ee4ec6

NaCl: improved ed25519 implementation, added simple API for ed25519 and sha512
author Ivo Smits <Ivo@UCIS.nl>
date Sat, 02 Nov 2013 16:01:09 +0100
parents b7d981ccd434
children 8f31b164ce7e 50d4aed66c67
files NaCl/APIv2.cs NaCl/crypto_sign/ed25519.cs
diffstat 2 files changed, 1088 insertions(+), 1200 deletions(-) [+]
line wrap: on
line diff
--- a/NaCl/APIv2.cs	Sat Nov 02 15:59:51 2013 +0100
+++ b/NaCl/APIv2.cs	Sat Nov 02 16:01:09 2013 +0100
@@ -4,6 +4,8 @@
 using curve25519xsalsa20poly1305impl = UCIS.NaCl.crypto_box.curve25519xsalsa20poly1305;
 using edwards25519sha512batchimpl = UCIS.NaCl.crypto_sign.edwards25519sha512batch;
 using xsalsa20poly1305impl = UCIS.NaCl.crypto_secretbox.xsalsa20poly1305;
+using sha512impl = UCIS.NaCl.crypto_hash.sha512;
+using ed25519impl = UCIS.NaCl.crypto_sign.ed25519;
 
 namespace UCIS.NaCl.v2 {
 	public class curve25519keypair {
@@ -208,29 +210,147 @@
 		}
 	}
 	public class edwards25519sha512batch {
-		public Byte[] Sign(Byte[] message, Byte[] secretkey) {
+		public static Byte[] Sign(Byte[] message, Byte[] secretkey) {
 			return edwards25519sha512batchimpl.crypto_sign(message, secretkey);
 		}
-		public int GetSignedSize(int size) {
+		public static int GetSignedSize(int size) {
 			return size + 64;
 		}
-		public Byte[] Open(Byte[] signed, Byte[] publickey) {
+		public static Byte[] Open(Byte[] signed, Byte[] publickey) {
 			return edwards25519sha512batchimpl.crypto_sign_open(signed, publickey);
 		}
-		public unsafe Boolean Verify(Byte[] signed, Byte[] publickey) {
+		public static unsafe Boolean Verify(Byte[] signed, Byte[] publickey) {
 			if (publickey.Length != edwards25519sha512batchimpl.PUBLICKEYBYTES) throw new ArgumentException("publickey.Length != PUBLICKEYBYTES");
 			UInt64 mlen;
 			fixed (Byte* smp = signed, pkp = publickey) return edwards25519sha512batchimpl.crypto_sign_open(null, out mlen, smp, (ulong)signed.Length, pkp) == 0;
 		}
-		public Byte[] Extract(Byte[] signed) {
+		public static Byte[] Extract(Byte[] signed) {
 			if (signed.Length < 64) return null;
 			Byte[] ret = new Byte[signed.Length - 64];
 			Buffer.BlockCopy(signed, 32, ret, 0, ret.Length);
 			return ret;
 		}
-		public int GetExtractedSize(int size) {
+		public static int GetExtractedSize(int size) {
 			if (size < 64) return -1;
 			return size - 64;
 		}
 	}
+	public class sha512 {
+		sha512impl.sha512state state = new sha512impl.sha512state();
+		public sha512() {
+			state.init();
+		}
+		public unsafe void Process(Byte[] buffer, int offset, int count) {
+			if (offset < 0 || count < 0 || offset + count > buffer.Length) throw new ArgumentException("buffer");
+			fixed (Byte* p = buffer) state.process(p + offset, count);
+		}
+		public unsafe void GetHash(Byte[] hash, int offset) {
+			if (offset < 0 || offset + 64 > hash.Length) throw new ArgumentException("hash");
+			fixed (Byte* p = hash) state.finish(p + offset);
+		}
+		public unsafe Byte[] GetHash() {
+			Byte[] hash = new Byte[64];
+			GetHash(hash, 0);
+			return hash;
+		}
+		public static unsafe void GetHash(Byte[] buffer, int offset, int count, Byte[] hash, int hashoffset) {
+			if (offset < 0 || offset + count > buffer.Length) throw new ArgumentException("buffer");
+			if (offset < 0 || offset + 64 > hash.Length) throw new ArgumentException("hash");
+			sha512impl.sha512state state = new sha512impl.sha512state();
+			state.init();
+			fixed (Byte* p = buffer) state.process(p + offset, count);
+			fixed (Byte* p = hash) state.finish(p + offset);
+		}
+		public static unsafe Byte[] GetHash(Byte[] buffer, int offset, int count) {
+			Byte[] hash = new Byte[64];
+			GetHash(buffer,offset,count,hash,0);
+			return hash;
+		}
+	}
+	public class ed25519keypair {
+		internal Byte[] key;
+
+		public ed25519keypair() {
+			Byte[] pk;
+			ed25519impl.crypto_sign_keypair(out pk, out key);
+		}
+		public ed25519keypair(Byte[] key) {
+			if (key.Length == 64) {
+				this.key = ArrayUtil.ToArray(key);
+			}else {
+			Byte[] pk;
+				ed25519impl.crypto_sign_seed_keypair(out pk, out this.key, key);
+				}
+		}
+		public ed25519keypair(String key) : this(curve25519keypair.DecodeHexString(key, key.Length)) { }
+		public Byte[] PublicKey { get { return ArrayUtil.Slice(key, 32, 32); } }
+		public Byte[] SecretKey { get { return ArrayUtil.Slice(key, 0, 32); } }
+		public Byte[] ExpandedKey { get { return ArrayUtil.ToArray(key); } }
+
+		public Byte[] GetSignature(Byte[] message) {
+			return ed25519.GetSignature(message, key);
+		}
+		public Byte[] GetSignature(Byte[] message, int offset, int count) {
+			return ed25519.GetSignature(new ArraySegment<Byte>(message, offset, count), key);
+		}
+		public Byte[] SignMessage(Byte[] message) {
+			return ed25519.SignMessage(message, key);
+		}
+	}
+	public class ed25519 {
+		public static unsafe Byte[] GetSignature(Byte[] message, Byte[] key) {
+			if (message == null) throw new ArgumentNullException("message");
+			if (key.Length != 64) throw new ArgumentException("key");
+			Byte[] sig = new Byte[64];
+			fixed (Byte* sigp = sig, msgp = message, kp = key) ed25519impl.crypto_getsignature(sigp, msgp, message.Length, kp);
+			return sig;
+		}
+		public static unsafe Byte[] GetSignature(ArraySegment<Byte> message, Byte[] key) {
+			if (message == null) throw new ArgumentNullException("message");
+			if (key.Length != 64) throw new ArgumentException("key");
+			if (message.Offset < 0 || message.Count < 0 || message.Offset + message.Count > message.Array.Length) throw new ArgumentException("message");
+			Byte[] sig = new Byte[64];
+			fixed (Byte* sigp = sig, msgp = message.Array, kp = key) ed25519impl.crypto_getsignature(sigp, msgp + message.Offset, message.Count, kp);
+			return sig;
+		}
+		public static unsafe Byte[] SignMessage(Byte[] message, Byte[] key) {
+			if (key.Length != 64) throw new ArgumentException("key");
+			Byte[] ret = new Byte[message.Length + 64];
+			int smlen;
+			fixed (Byte* sm = ret, msgp = message, kp = key) ed25519impl.crypto_sign(sm, out smlen, msgp, message.Length, kp);
+			return ret;
+		}
+		public static unsafe Boolean VerifySignature(Byte[] message, Byte[] signature, Byte[] pk) {
+			if (signature.Length < 64) throw new ArgumentException("signature");
+			if (pk.Length < 32) throw new ArgumentException("pk");
+			fixed (Byte* sp = signature, mp = message, kp = pk) return ed25519impl.crypto_sign_verify(sp, mp, message.Length, kp);
+		}
+		public static unsafe Boolean VerifySignature(ArraySegment<Byte> message, ArraySegment<Byte> signature, Byte[] pk) {
+			if (signature.Offset < 0 || signature.Count < 64 || signature.Offset + signature.Count < signature.Array.Length) throw new ArgumentException("signature");
+			if (message.Offset < 0 || message.Count < 0 || message.Offset + message.Count < message.Array.Length) throw new ArgumentException("message");
+			if (pk.Length < 32) throw new ArgumentException("pk");
+			fixed (Byte* sp = signature.Array, mp = message.Array, kp = pk) return ed25519impl.crypto_sign_verify(sp + signature.Offset, mp + message.Offset, message.Count, kp);
+		}
+		public static unsafe Boolean VerifySignedMessage(Byte[] signedmessage, Byte[] pk) {
+			if (signedmessage.Length < 64) throw new ArgumentException("signedmessage");
+			if (pk.Length < 32) throw new ArgumentException("pk");
+			fixed (Byte* mp = signedmessage, kp = pk) return ed25519impl.crypto_sign_verify(mp, mp + 64, signedmessage.Length - 64, kp);
+		}
+		public static Byte[] ExtractSignedMessage(Byte[] signedmessage) {
+			return ArrayUtil.Slice(signedmessage, 64);
+		}
+		public static Byte[] ExtractSignedMessage(ArraySegment<Byte> signedmessage) {
+			return ArrayUtil.Slice(signedmessage.Array, signedmessage.Offset + 64, signedmessage.Count - 64);
+		}
+		public static ArraySegment<Byte> ExtractSignedMessageFast(Byte[] signedmessage) {
+			return new ArraySegment<Byte>(signedmessage, 64, signedmessage.Length - 64);
+		}
+		public static ArraySegment<Byte> ExtractSignedMessageFast(ArraySegment<Byte> signedmessage) {
+			return new ArraySegment<Byte>(signedmessage.Array, signedmessage.Offset + 64, signedmessage.Count - 64);
+		}
+		public static Byte[] OpenSignedMessage(Byte[] signedmessage, Byte[] pk) {
+			if (!VerifySignedMessage(signedmessage, pk)) return null;
+			return ExtractSignedMessage(signedmessage);
+		}
+	}
 }
--- a/NaCl/crypto_sign/ed25519.cs	Sat Nov 02 15:59:51 2013 +0100
+++ b/NaCl/crypto_sign/ed25519.cs	Sat Nov 02 16:01:09 2013 +0100
@@ -1,5 +1,6 @@
 using System;
 using System.Text;
+using UCIS.NaCl.crypto_hash;
 
 namespace UCIS.NaCl.crypto_sign {
 	public static class ed25519 {
@@ -9,13 +10,728 @@
 		public const int BYTES = 64;
 
 		unsafe struct fe {
-			fixed Int32 v[10];
-			public int this[int index] {
-				get { fixed (Int32* vp = v) return vp[index]; }
-				set { fixed (Int32* vp = v) vp[index] = value; }
+			Int32 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9;
+			public override string ToString() {
+				return String.Format("{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}", v0, v1, v2, v3, v4, v5, v6, v7, v8, v9);
+			}
+			public fe(int offset, Int32[] data) {
+				v0 = data[offset + 0];
+				v1 = data[offset + 1];
+				v2 = data[offset + 2];
+				v3 = data[offset + 3];
+				v4 = data[offset + 4];
+				v5 = data[offset + 5];
+				v6 = data[offset + 6];
+				v7 = data[offset + 7];
+				v8 = data[offset + 8];
+				v9 = data[offset + 9];
+			}
+			public void set_zero() {
+				v0 = v1 = v2 = v3 = v4 = v5 = v6 = v7 = v8 = v9 = 0;
+			}
+			public void set_one() {
+				v0 = 1;
+				v1 = v2 = v3 = v4 = v5 = v6 = v7 = v8 = v9 = 0;
+			}
+			public void cmov(ref fe g, Int32 b) {
+				b = -b;
+				v0 ^= (v0 ^ g.v0) & b;
+				v1 ^= (v1 ^ g.v1) & b;
+				v2 ^= (v2 ^ g.v2) & b;
+				v3 ^= (v3 ^ g.v3) & b;
+				v4 ^= (v4 ^ g.v4) & b;
+				v5 ^= (v5 ^ g.v5) & b;
+				v6 ^= (v6 ^ g.v6) & b;
+				v7 ^= (v7 ^ g.v7) & b;
+				v8 ^= (v8 ^ g.v8) & b;
+				v9 ^= (v9 ^ g.v9) & b;
+			}
+			public void neg() {
+				v0 = -v0;
+				v1 = -v1;
+				v2 = -v2;
+				v3 = -v3;
+				v4 = -v4;
+				v5 = -v5;
+				v6 = -v6;
+				v7 = -v7;
+				v8 = -v8;
+				v9 = -v9;
+			}
+			public void add(ref fe g) {
+				v0 += g.v0;
+				v1 += g.v1;
+				v2 += g.v2;
+				v3 += g.v3;
+				v4 += g.v4;
+				v5 += g.v5;
+				v6 += g.v6;
+				v7 += g.v7;
+				v8 += g.v8;
+				v9 += g.v9;
+			}
+			public void sub(ref fe g) {
+				v0 -= g.v0;
+				v1 -= g.v1;
+				v2 -= g.v2;
+				v3 -= g.v3;
+				v4 -= g.v4;
+				v5 -= g.v5;
+				v6 -= g.v6;
+				v7 -= g.v7;
+				v8 -= g.v8;
+				v9 -= g.v9;
+			}
+			public void mul(ref fe g) {
+				Int32 f0 = v0;
+				Int32 f1 = v1;
+				Int32 f2 = v2;
+				Int32 f3 = v3;
+				Int32 f4 = v4;
+				Int32 f5 = v5;
+				Int32 f6 = v6;
+				Int32 f7 = v7;
+				Int32 f8 = v8;
+				Int32 f9 = v9;
+				Int32 g0 = g.v0;
+				Int32 g1 = g.v1;
+				Int32 g2 = g.v2;
+				Int32 g3 = g.v3;
+				Int32 g4 = g.v4;
+				Int32 g5 = g.v5;
+				Int32 g6 = g.v6;
+				Int32 g7 = g.v7;
+				Int32 g8 = g.v8;
+				Int32 g9 = g.v9;
+				Int32 g1_19 = 19 * g1; /* 1.959375*2^29 */
+				Int32 g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
+				Int32 g3_19 = 19 * g3;
+				Int32 g4_19 = 19 * g4;
+				Int32 g5_19 = 19 * g5;
+				Int32 g6_19 = 19 * g6;
+				Int32 g7_19 = 19 * g7;
+				Int32 g8_19 = 19 * g8;
+				Int32 g9_19 = 19 * g9;
+				Int32 f1_2 = 2 * f1;
+				Int32 f3_2 = 2 * f3;
+				Int32 f5_2 = 2 * f5;
+				Int32 f7_2 = 2 * f7;
+				Int32 f9_2 = 2 * f9;
+				Int64 f0g0 = f0 * (Int64)g0;
+				Int64 f0g1 = f0 * (Int64)g1;
+				Int64 f0g2 = f0 * (Int64)g2;
+				Int64 f0g3 = f0 * (Int64)g3;
+				Int64 f0g4 = f0 * (Int64)g4;
+				Int64 f0g5 = f0 * (Int64)g5;
+				Int64 f0g6 = f0 * (Int64)g6;
+				Int64 f0g7 = f0 * (Int64)g7;
+				Int64 f0g8 = f0 * (Int64)g8;
+				Int64 f0g9 = f0 * (Int64)g9;
+				Int64 f1g0 = f1 * (Int64)g0;
+				Int64 f1g1_2 = f1_2 * (Int64)g1;
+				Int64 f1g2 = f1 * (Int64)g2;
+				Int64 f1g3_2 = f1_2 * (Int64)g3;
+				Int64 f1g4 = f1 * (Int64)g4;
+				Int64 f1g5_2 = f1_2 * (Int64)g5;
+				Int64 f1g6 = f1 * (Int64)g6;
+				Int64 f1g7_2 = f1_2 * (Int64)g7;
+				Int64 f1g8 = f1 * (Int64)g8;
+				Int64 f1g9_38 = f1_2 * (Int64)g9_19;
+				Int64 f2g0 = f2 * (Int64)g0;
+				Int64 f2g1 = f2 * (Int64)g1;
+				Int64 f2g2 = f2 * (Int64)g2;
+				Int64 f2g3 = f2 * (Int64)g3;
+				Int64 f2g4 = f2 * (Int64)g4;
+				Int64 f2g5 = f2 * (Int64)g5;
+				Int64 f2g6 = f2 * (Int64)g6;
+				Int64 f2g7 = f2 * (Int64)g7;
+				Int64 f2g8_19 = f2 * (Int64)g8_19;
+				Int64 f2g9_19 = f2 * (Int64)g9_19;
+				Int64 f3g0 = f3 * (Int64)g0;
+				Int64 f3g1_2 = f3_2 * (Int64)g1;
+				Int64 f3g2 = f3 * (Int64)g2;
+				Int64 f3g3_2 = f3_2 * (Int64)g3;
+				Int64 f3g4 = f3 * (Int64)g4;
+				Int64 f3g5_2 = f3_2 * (Int64)g5;
+				Int64 f3g6 = f3 * (Int64)g6;
+				Int64 f3g7_38 = f3_2 * (Int64)g7_19;
+				Int64 f3g8_19 = f3 * (Int64)g8_19;
+				Int64 f3g9_38 = f3_2 * (Int64)g9_19;
+				Int64 f4g0 = f4 * (Int64)g0;
+				Int64 f4g1 = f4 * (Int64)g1;
+				Int64 f4g2 = f4 * (Int64)g2;
+				Int64 f4g3 = f4 * (Int64)g3;
+				Int64 f4g4 = f4 * (Int64)g4;
+				Int64 f4g5 = f4 * (Int64)g5;
+				Int64 f4g6_19 = f4 * (Int64)g6_19;
+				Int64 f4g7_19 = f4 * (Int64)g7_19;
+				Int64 f4g8_19 = f4 * (Int64)g8_19;
+				Int64 f4g9_19 = f4 * (Int64)g9_19;
+				Int64 f5g0 = f5 * (Int64)g0;
+				Int64 f5g1_2 = f5_2 * (Int64)g1;
+				Int64 f5g2 = f5 * (Int64)g2;
+				Int64 f5g3_2 = f5_2 * (Int64)g3;
+				Int64 f5g4 = f5 * (Int64)g4;
+				Int64 f5g5_38 = f5_2 * (Int64)g5_19;
+				Int64 f5g6_19 = f5 * (Int64)g6_19;
+				Int64 f5g7_38 = f5_2 * (Int64)g7_19;
+				Int64 f5g8_19 = f5 * (Int64)g8_19;
+				Int64 f5g9_38 = f5_2 * (Int64)g9_19;
+				Int64 f6g0 = f6 * (Int64)g0;
+				Int64 f6g1 = f6 * (Int64)g1;
+				Int64 f6g2 = f6 * (Int64)g2;
+				Int64 f6g3 = f6 * (Int64)g3;
+				Int64 f6g4_19 = f6 * (Int64)g4_19;
+				Int64 f6g5_19 = f6 * (Int64)g5_19;
+				Int64 f6g6_19 = f6 * (Int64)g6_19;
+				Int64 f6g7_19 = f6 * (Int64)g7_19;
+				Int64 f6g8_19 = f6 * (Int64)g8_19;
+				Int64 f6g9_19 = f6 * (Int64)g9_19;
+				Int64 f7g0 = f7 * (Int64)g0;
+				Int64 f7g1_2 = f7_2 * (Int64)g1;
+				Int64 f7g2 = f7 * (Int64)g2;
+				Int64 f7g3_38 = f7_2 * (Int64)g3_19;
+				Int64 f7g4_19 = f7 * (Int64)g4_19;
+				Int64 f7g5_38 = f7_2 * (Int64)g5_19;
+				Int64 f7g6_19 = f7 * (Int64)g6_19;
+				Int64 f7g7_38 = f7_2 * (Int64)g7_19;
+				Int64 f7g8_19 = f7 * (Int64)g8_19;
+				Int64 f7g9_38 = f7_2 * (Int64)g9_19;
+				Int64 f8g0 = f8 * (Int64)g0;
+				Int64 f8g1 = f8 * (Int64)g1;
+				Int64 f8g2_19 = f8 * (Int64)g2_19;
+				Int64 f8g3_19 = f8 * (Int64)g3_19;
+				Int64 f8g4_19 = f8 * (Int64)g4_19;
+				Int64 f8g5_19 = f8 * (Int64)g5_19;
+				Int64 f8g6_19 = f8 * (Int64)g6_19;
+				Int64 f8g7_19 = f8 * (Int64)g7_19;
+				Int64 f8g8_19 = f8 * (Int64)g8_19;
+				Int64 f8g9_19 = f8 * (Int64)g9_19;
+				Int64 f9g0 = f9 * (Int64)g0;
+				Int64 f9g1_38 = f9_2 * (Int64)g1_19;
+				Int64 f9g2_19 = f9 * (Int64)g2_19;
+				Int64 f9g3_38 = f9_2 * (Int64)g3_19;
+				Int64 f9g4_19 = f9 * (Int64)g4_19;
+				Int64 f9g5_38 = f9_2 * (Int64)g5_19;
+				Int64 f9g6_19 = f9 * (Int64)g6_19;
+				Int64 f9g7_38 = f9_2 * (Int64)g7_19;
+				Int64 f9g8_19 = f9 * (Int64)g8_19;
+				Int64 f9g9_38 = f9_2 * (Int64)g9_19;
+				Int64 h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
+				Int64 h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
+				Int64 h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
+				Int64 h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
+				Int64 h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
+				Int64 h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
+				Int64 h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
+				Int64 h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
+				Int64 h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
+				Int64 h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
+				Int64 carry0;
+				Int64 carry1;
+				Int64 carry2;
+				Int64 carry3;
+				Int64 carry4;
+				Int64 carry5;
+				Int64 carry6;
+				Int64 carry7;
+				Int64 carry8;
+				Int64 carry9;
+				carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+				carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+				carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+				carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+				carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+				carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+				carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+				carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+				carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+				carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+				carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+				carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+				v0 = (Int32)h0;
+				v1 = (Int32)h1;
+				v2 = (Int32)h2;
+				v3 = (Int32)h3;
+				v4 = (Int32)h4;
+				v5 = (Int32)h5;
+				v6 = (Int32)h6;
+				v7 = (Int32)h7;
+				v8 = (Int32)h8;
+				v9 = (Int32)h9;
 			}
-			public override string ToString() {
-				return String.Format("{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}", this[0], this[1], this[2], this[3], this[4], this[5], this[6], this[7], this[8], this[9]);
+			public void sq() {
+				Int32 f0 = v0;
+				Int32 f1 = v1;
+				Int32 f2 = v2;
+				Int32 f3 = v3;
+				Int32 f4 = v4;
+				Int32 f5 = v5;
+				Int32 f6 = v6;
+				Int32 f7 = v7;
+				Int32 f8 = v8;
+				Int32 f9 = v9;
+				Int32 f0_2 = 2 * f0;
+				Int32 f1_2 = 2 * f1;
+				Int32 f2_2 = 2 * f2;
+				Int32 f3_2 = 2 * f3;
+				Int32 f4_2 = 2 * f4;
+				Int32 f5_2 = 2 * f5;
+				Int32 f6_2 = 2 * f6;
+				Int32 f7_2 = 2 * f7;
+				Int32 f5_38 = 38 * f5; /* 1.959375*2^30 */
+				Int32 f6_19 = 19 * f6; /* 1.959375*2^30 */
+				Int32 f7_38 = 38 * f7; /* 1.959375*2^30 */
+				Int32 f8_19 = 19 * f8; /* 1.959375*2^30 */
+				Int32 f9_38 = 38 * f9; /* 1.959375*2^30 */
+				Int64 f0f0 = f0 * (Int64)f0;
+				Int64 f0f1_2 = f0_2 * (Int64)f1;
+				Int64 f0f2_2 = f0_2 * (Int64)f2;
+				Int64 f0f3_2 = f0_2 * (Int64)f3;
+				Int64 f0f4_2 = f0_2 * (Int64)f4;
+				Int64 f0f5_2 = f0_2 * (Int64)f5;
+				Int64 f0f6_2 = f0_2 * (Int64)f6;
+				Int64 f0f7_2 = f0_2 * (Int64)f7;
+				Int64 f0f8_2 = f0_2 * (Int64)f8;
+				Int64 f0f9_2 = f0_2 * (Int64)f9;
+				Int64 f1f1_2 = f1_2 * (Int64)f1;
+				Int64 f1f2_2 = f1_2 * (Int64)f2;
+				Int64 f1f3_4 = f1_2 * (Int64)f3_2;
+				Int64 f1f4_2 = f1_2 * (Int64)f4;
+				Int64 f1f5_4 = f1_2 * (Int64)f5_2;
+				Int64 f1f6_2 = f1_2 * (Int64)f6;
+				Int64 f1f7_4 = f1_2 * (Int64)f7_2;
+				Int64 f1f8_2 = f1_2 * (Int64)f8;
+				Int64 f1f9_76 = f1_2 * (Int64)f9_38;
+				Int64 f2f2 = f2 * (Int64)f2;
+				Int64 f2f3_2 = f2_2 * (Int64)f3;
+				Int64 f2f4_2 = f2_2 * (Int64)f4;
+				Int64 f2f5_2 = f2_2 * (Int64)f5;
+				Int64 f2f6_2 = f2_2 * (Int64)f6;
+				Int64 f2f7_2 = f2_2 * (Int64)f7;
+				Int64 f2f8_38 = f2_2 * (Int64)f8_19;
+				Int64 f2f9_38 = f2 * (Int64)f9_38;
+				Int64 f3f3_2 = f3_2 * (Int64)f3;
+				Int64 f3f4_2 = f3_2 * (Int64)f4;
+				Int64 f3f5_4 = f3_2 * (Int64)f5_2;
+				Int64 f3f6_2 = f3_2 * (Int64)f6;
+				Int64 f3f7_76 = f3_2 * (Int64)f7_38;
+				Int64 f3f8_38 = f3_2 * (Int64)f8_19;
+				Int64 f3f9_76 = f3_2 * (Int64)f9_38;
+				Int64 f4f4 = f4 * (Int64)f4;
+				Int64 f4f5_2 = f4_2 * (Int64)f5;
+				Int64 f4f6_38 = f4_2 * (Int64)f6_19;
+				Int64 f4f7_38 = f4 * (Int64)f7_38;
+				Int64 f4f8_38 = f4_2 * (Int64)f8_19;
+				Int64 f4f9_38 = f4 * (Int64)f9_38;
+				Int64 f5f5_38 = f5 * (Int64)f5_38;
+				Int64 f5f6_38 = f5_2 * (Int64)f6_19;
+				Int64 f5f7_76 = f5_2 * (Int64)f7_38;
+				Int64 f5f8_38 = f5_2 * (Int64)f8_19;
+				Int64 f5f9_76 = f5_2 * (Int64)f9_38;
+				Int64 f6f6_19 = f6 * (Int64)f6_19;
+				Int64 f6f7_38 = f6 * (Int64)f7_38;
+				Int64 f6f8_38 = f6_2 * (Int64)f8_19;
+				Int64 f6f9_38 = f6 * (Int64)f9_38;
+				Int64 f7f7_38 = f7 * (Int64)f7_38;
+				Int64 f7f8_38 = f7_2 * (Int64)f8_19;
+				Int64 f7f9_76 = f7_2 * (Int64)f9_38;
+				Int64 f8f8_19 = f8 * (Int64)f8_19;
+				Int64 f8f9_38 = f8 * (Int64)f9_38;
+				Int64 f9f9_38 = f9 * (Int64)f9_38;
+				Int64 h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+				Int64 h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+				Int64 h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+				Int64 h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
+				Int64 h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
+				Int64 h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
+				Int64 h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
+				Int64 h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
+				Int64 h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
+				Int64 h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
+				Int64 carry0;
+				Int64 carry1;
+				Int64 carry2;
+				Int64 carry3;
+				Int64 carry4;
+				Int64 carry5;
+				Int64 carry6;
+				Int64 carry7;
+				Int64 carry8;
+				Int64 carry9;
+
+				carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+				carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+
+				carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+				carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+
+				carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+				carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+
+				carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+				carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+
+				carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+				carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+
+				carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+
+				carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+
+				v0 = (Int32)h0;
+				v1 = (Int32)h1;
+				v2 = (Int32)h2;
+				v3 = (Int32)h3;
+				v4 = (Int32)h4;
+				v5 = (Int32)h5;
+				v6 = (Int32)h6;
+				v7 = (Int32)h7;
+				v8 = (Int32)h8;
+				v9 = (Int32)h9;
+			}
+			public void sq2() {
+				Int32 f0 = v0;
+				Int32 f1 = v1;
+				Int32 f2 = v2;
+				Int32 f3 = v3;
+				Int32 f4 = v4;
+				Int32 f5 = v5;
+				Int32 f6 = v6;
+				Int32 f7 = v7;
+				Int32 f8 = v8;
+				Int32 f9 = v9;
+				Int32 f0_2 = 2 * f0;
+				Int32 f1_2 = 2 * f1;
+				Int32 f2_2 = 2 * f2;
+				Int32 f3_2 = 2 * f3;
+				Int32 f4_2 = 2 * f4;
+				Int32 f5_2 = 2 * f5;
+				Int32 f6_2 = 2 * f6;
+				Int32 f7_2 = 2 * f7;
+				Int32 f5_38 = 38 * f5; /* 1.959375*2^30 */
+				Int32 f6_19 = 19 * f6; /* 1.959375*2^30 */
+				Int32 f7_38 = 38 * f7; /* 1.959375*2^30 */
+				Int32 f8_19 = 19 * f8; /* 1.959375*2^30 */
+				Int32 f9_38 = 38 * f9; /* 1.959375*2^30 */
+				Int64 f0f0 = f0 * (Int64)f0;
+				Int64 f0f1_2 = f0_2 * (Int64)f1;
+				Int64 f0f2_2 = f0_2 * (Int64)f2;
+				Int64 f0f3_2 = f0_2 * (Int64)f3;
+				Int64 f0f4_2 = f0_2 * (Int64)f4;
+				Int64 f0f5_2 = f0_2 * (Int64)f5;
+				Int64 f0f6_2 = f0_2 * (Int64)f6;
+				Int64 f0f7_2 = f0_2 * (Int64)f7;
+				Int64 f0f8_2 = f0_2 * (Int64)f8;
+				Int64 f0f9_2 = f0_2 * (Int64)f9;
+				Int64 f1f1_2 = f1_2 * (Int64)f1;
+				Int64 f1f2_2 = f1_2 * (Int64)f2;
+				Int64 f1f3_4 = f1_2 * (Int64)f3_2;
+				Int64 f1f4_2 = f1_2 * (Int64)f4;
+				Int64 f1f5_4 = f1_2 * (Int64)f5_2;
+				Int64 f1f6_2 = f1_2 * (Int64)f6;
+				Int64 f1f7_4 = f1_2 * (Int64)f7_2;
+				Int64 f1f8_2 = f1_2 * (Int64)f8;
+				Int64 f1f9_76 = f1_2 * (Int64)f9_38;
+				Int64 f2f2 = f2 * (Int64)f2;
+				Int64 f2f3_2 = f2_2 * (Int64)f3;
+				Int64 f2f4_2 = f2_2 * (Int64)f4;
+				Int64 f2f5_2 = f2_2 * (Int64)f5;
+				Int64 f2f6_2 = f2_2 * (Int64)f6;
+				Int64 f2f7_2 = f2_2 * (Int64)f7;
+				Int64 f2f8_38 = f2_2 * (Int64)f8_19;
+				Int64 f2f9_38 = f2 * (Int64)f9_38;
+				Int64 f3f3_2 = f3_2 * (Int64)f3;
+				Int64 f3f4_2 = f3_2 * (Int64)f4;
+				Int64 f3f5_4 = f3_2 * (Int64)f5_2;
+				Int64 f3f6_2 = f3_2 * (Int64)f6;
+				Int64 f3f7_76 = f3_2 * (Int64)f7_38;
+				Int64 f3f8_38 = f3_2 * (Int64)f8_19;
+				Int64 f3f9_76 = f3_2 * (Int64)f9_38;
+				Int64 f4f4 = f4 * (Int64)f4;
+				Int64 f4f5_2 = f4_2 * (Int64)f5;
+				Int64 f4f6_38 = f4_2 * (Int64)f6_19;
+				Int64 f4f7_38 = f4 * (Int64)f7_38;
+				Int64 f4f8_38 = f4_2 * (Int64)f8_19;
+				Int64 f4f9_38 = f4 * (Int64)f9_38;
+				Int64 f5f5_38 = f5 * (Int64)f5_38;
+				Int64 f5f6_38 = f5_2 * (Int64)f6_19;
+				Int64 f5f7_76 = f5_2 * (Int64)f7_38;
+				Int64 f5f8_38 = f5_2 * (Int64)f8_19;
+				Int64 f5f9_76 = f5_2 * (Int64)f9_38;
+				Int64 f6f6_19 = f6 * (Int64)f6_19;
+				Int64 f6f7_38 = f6 * (Int64)f7_38;
+				Int64 f6f8_38 = f6_2 * (Int64)f8_19;
+				Int64 f6f9_38 = f6 * (Int64)f9_38;
+				Int64 f7f7_38 = f7 * (Int64)f7_38;
+				Int64 f7f8_38 = f7_2 * (Int64)f8_19;
+				Int64 f7f9_76 = f7_2 * (Int64)f9_38;
+				Int64 f8f8_19 = f8 * (Int64)f8_19;
+				Int64 f8f9_38 = f8 * (Int64)f9_38;
+				Int64 f9f9_38 = f9 * (Int64)f9_38;
+				Int64 h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
+				Int64 h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
+				Int64 h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
+				Int64 h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
+				Int64 h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
+				Int64 h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
+				Int64 h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
+				Int64 h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
+				Int64 h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
+				Int64 h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
+				Int64 carry0;
+				Int64 carry1;
+				Int64 carry2;
+				Int64 carry3;
+				Int64 carry4;
+				Int64 carry5;
+				Int64 carry6;
+				Int64 carry7;
+				Int64 carry8;
+				Int64 carry9;
+
+				h0 += h0;
+				h1 += h1;
+				h2 += h2;
+				h3 += h3;
+				h4 += h4;
+				h5 += h5;
+				h6 += h6;
+				h7 += h7;
+				h8 += h8;
+				h9 += h9;
+
+				carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+				carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+
+				carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+				carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+
+				carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+				carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+
+				carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+				carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+
+				carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+				carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+
+				carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+
+				carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+
+				v0 = (Int32)h0;
+				v1 = (Int32)h1;
+				v2 = (Int32)h2;
+				v3 = (Int32)h3;
+				v4 = (Int32)h4;
+				v5 = (Int32)h5;
+				v6 = (Int32)h6;
+				v7 = (Int32)h7;
+				v8 = (Int32)h8;
+				v9 = (Int32)h9;
+			}
+			public void invert() {
+				fe t0 = this;
+				t0.sq();
+				fe t1 = t0;
+				t1.sq();
+				t1.sq();
+				mul(ref t1);
+				t0.mul(ref this);
+				fe t2 = t0;
+				t2.sq();
+				mul(ref t2);
+				t1 = this;
+				for (int i = 1; i < 6; i++) sq();
+				mul(ref t1);
+				t1 = this;
+				for (int i = 1; i < 11; i++) sq();
+				mul(ref t1);
+				t2 = this;
+				for (int i = 1; i < 21; i++) sq();
+				mul(ref t2);
+				for (int i = 1; i < 11; ++i) sq();
+				this.mul(ref t1);
+				t1 = this;
+				for (int i = 1; i < 51; i++) sq();
+				mul(ref t1);
+				t2 = this;
+				for (int i = 1; i < 101; i++) sq();
+				mul(ref t2);
+				for (int i = 1; i < 51; i++) sq();
+				mul(ref t1);
+				for (int i = 1; i < 6; i++) sq();
+				mul(ref t0);
+			}
+			public unsafe void tobytes(Byte* s) {
+				Int32 h0 = v0, h1 = v1, h2 = v2, h3 = v3, h4 = v4, h5 = v5, h6 = v6, h7 = v7, h8 = v8, h9 = v9;
+				Int32 q = (19 * h9 + (((Int32)1) << 24)) >> 25;
+				q = (h0 + q) >> 26;
+				q = (h1 + q) >> 25;
+				q = (h2 + q) >> 26;
+				q = (h3 + q) >> 25;
+				q = (h4 + q) >> 26;
+				q = (h5 + q) >> 25;
+				q = (h6 + q) >> 26;
+				q = (h7 + q) >> 25;
+				q = (h8 + q) >> 26;
+				q = (h9 + q) >> 25;
+				h0 += 19 * q;
+				Int32 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
+				Int32 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
+				Int32 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
+				Int32 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
+				Int32 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
+				Int32 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
+				Int32 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
+				Int32 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
+				Int32 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
+				Int32 carry9 = h9 >> 25; h9 -= carry9 << 25;
+				s[0] = (Byte)(h0 >> 0);
+				s[1] = (Byte)(h0 >> 8);
+				s[2] = (Byte)(h0 >> 16);
+				s[3] = (Byte)((h0 >> 24) | (h1 << 2));
+				s[4] = (Byte)(h1 >> 6);
+				s[5] = (Byte)(h1 >> 14);
+				s[6] = (Byte)((h1 >> 22) | (h2 << 3));
+				s[7] = (Byte)(h2 >> 5);
+				s[8] = (Byte)(h2 >> 13);
+				s[9] = (Byte)((h2 >> 21) | (h3 << 5));
+				s[10] = (Byte)(h3 >> 3);
+				s[11] = (Byte)(h3 >> 11);
+				s[12] = (Byte)((h3 >> 19) | (h4 << 6));
+				s[13] = (Byte)(h4 >> 2);
+				s[14] = (Byte)(h4 >> 10);
+				s[15] = (Byte)(h4 >> 18);
+				s[16] = (Byte)(h5 >> 0);
+				s[17] = (Byte)(h5 >> 8);
+				s[18] = (Byte)(h5 >> 16);
+				s[19] = (Byte)((h5 >> 24) | (h6 << 1));
+				s[20] = (Byte)(h6 >> 7);
+				s[21] = (Byte)(h6 >> 15);
+				s[22] = (Byte)((h6 >> 23) | (h7 << 3));
+				s[23] = (Byte)(h7 >> 5);
+				s[24] = (Byte)(h7 >> 13);
+				s[25] = (Byte)((h7 >> 21) | (h8 << 4));
+				s[26] = (Byte)(h8 >> 4);
+				s[27] = (Byte)(h8 >> 12);
+				s[28] = (Byte)((h8 >> 20) | (h9 << 6));
+				s[29] = (Byte)(h9 >> 2);
+				s[30] = (Byte)(h9 >> 10);
+				s[31] = (Byte)(h9 >> 18);
+			}
+			public int isnegative() {
+				Int32 h0 = v0, h9 = v9;
+				Int32 q = (19 * h9 + (1 << 24)) >> 25;
+				q = (h0 + q) >> 26;
+				q = (v1 + q) >> 25;
+				q = (v2 + q) >> 26;
+				q = (v3 + q) >> 25;
+				q = (v4 + q) >> 26;
+				q = (v5 + q) >> 25;
+				q = (v6 + q) >> 26;
+				q = (v7 + q) >> 25;
+				q = (v8 + q) >> 26;
+				q = (h9 + q) >> 25;
+				h0 += 19 * q;
+				h0 -= (h0 >> 26) << 26;
+				return (h0 >> 0) & 1;
+			}
+			public unsafe void frombytes(Byte* s) {
+				Int64 h0 = load_4(s);
+				Int64 h1 = load_3(s + 4) << 6;
+				Int64 h2 = load_3(s + 7) << 5;
+				Int64 h3 = load_3(s + 10) << 3;
+				Int64 h4 = load_3(s + 13) << 2;
+				Int64 h5 = load_4(s + 16);
+				Int64 h6 = load_3(s + 20) << 7;
+				Int64 h7 = load_3(s + 23) << 5;
+				Int64 h8 = load_3(s + 26) << 4;
+				Int64 h9 = (load_3(s + 29) & 8388607) << 2;
+
+				Int64 carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
+				Int64 carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
+				Int64 carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
+				Int64 carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
+				Int64 carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
+
+				Int64 carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
+				Int64 carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
+				Int64 carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
+				Int64 carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
+				Int64 carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+
+				v0 = (Int32)h0;
+				v1 = (Int32)h1;
+				v2 = (Int32)h2;
+				v3 = (Int32)h3;
+				v4 = (Int32)h4;
+				v5 = (Int32)h5;
+				v6 = (Int32)h6;
+				v7 = (Int32)h7;
+				v8 = (Int32)h8;
+				v9 = (Int32)h9;
+			}
+			public int isnonzero() {
+				Int32 h0 = v0, h1 = v1, h2 = v2, h3 = v3, h4 = v4, h5 = v5, h6 = v6, h7 = v7, h8 = v8, h9 = v9;
+				Int32 q = (19 * h9 + (((Int32)1) << 24)) >> 25;
+				q = (h0 + q) >> 26;
+				q = (h1 + q) >> 25;
+				q = (h2 + q) >> 26;
+				q = (h3 + q) >> 25;
+				q = (h4 + q) >> 26;
+				q = (h5 + q) >> 25;
+				q = (h6 + q) >> 26;
+				q = (h7 + q) >> 25;
+				q = (h8 + q) >> 26;
+				q = (h9 + q) >> 25;
+				h0 += 19 * q;
+				Int32 carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
+				Int32 carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
+				Int32 carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
+				Int32 carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
+				Int32 carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
+				Int32 carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
+				Int32 carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
+				Int32 carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
+				Int32 carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
+				Int32 carry9 = h9 >> 25; h9 -= carry9 << 25;
+				Int32 b = h0 | h1 | h2 | h3 | h4 | h5 | h6 | h7 | h8 | h9;
+				b = (b | (b >> 8) | (b >> 16) | (b >> 24)) & 0xff;
+				return (1 & ((b - 1) >> 8)) - 1;
+			}
+			public void pow22523() {
+				fe t0 = this; 
+				t0.sq();
+				fe t1 = t0; 
+				for (int i = 1; i < 3; i++) t1.sq();
+				fe t2 = this;
+				mul(ref t1);
+				t0.mul(ref this);
+				t0.sq();
+				mul(ref t0);
+				t0 = this;
+				for (int i = 1; i < 6; i++) sq();
+				mul(ref t0);
+				t0 = this;
+				for (int i = 1; i < 11; i++) sq();
+				mul(ref t0);
+				t1 = this;
+				for (int i = 1; i < 21; i++) sq();
+				mul(ref t1);
+				for (int i = 1; i < 11; i++) sq();
+				mul(ref t0);
+				t0 = this;
+				for (int i = 1; i < 51; i++) sq();
+				mul(ref t0);
+				t1 = this;
+				for (int i = 1; i < 101; i++) sq();
+				mul(ref t1);
+				for (int i = 1; i < 51; i++) sq();
+				mul(ref t0);
+				for (int i = 1; i < 3; i++) sq();
+				mul(ref t2);
 			}
 		}
 		struct ge_precomp {
@@ -24,9 +740,19 @@
 			public fe xy2d;
 			public ge_precomp(Int32[] data, int offset)
 				: this() {
-				yplusx = fe_unpack(offset + 0 * 10, data);
-				yminusx = fe_unpack(offset + 1 * 10, data);
-				xy2d = fe_unpack(offset + 2 * 10, data);
+				yplusx = new fe(offset + 0 * 10, data);
+				yminusx = new fe(offset + 1 * 10, data);
+				xy2d = new fe(offset + 2 * 10, data);
+			}
+			public void set_zero() {
+				yplusx.set_one();
+				yminusx.set_one();
+				xy2d.set_zero();
+			}
+			public void cmov(ref ge_precomp u, Byte b) {
+				yplusx.cmov(ref u.yplusx, b);
+				yminusx.cmov(ref u.yminusx, b);
+				xy2d.cmov(ref u.xy2d, b);
 			}
 		}
 		struct ge_p1p1 {
@@ -39,30 +765,37 @@
 			public fe X;
 			public fe Y;
 			public fe Z;
+			public void set_zero() {
+				X.set_zero();
+				Y.set_one();
+				Z.set_one();
+			}
 		}
 		struct ge_p3 {
 			public fe X;
 			public fe Y;
 			public fe Z;
 			public fe T;
+			public void set_zero() {
+				X.set_zero();
+				Y.set_one();
+				Z.set_one();
+				T.set_zero();
+			}
+		}
+		struct ge_cached {
+			public fe YplusX;
+			public fe YminusX;
+			public fe Z;
+			public fe T2d;
 		}
 
-		static fe fe_unpack(int offset, Int32[] data) {
-			fe ret = new fe();
-			for (int i = 0; i < 10; i++) ret[i] = data[offset + i];
+		static ge_precomp[] base_unpack(int n, Int32[] data) {
+			ge_precomp[] ret = new ge_precomp[n];
+			for (int i = 0; i < n; i++) ret[i] = new ge_precomp(data, i * 3 * 10);
 			return ret;
 		}
-		static ge_precomp[] base_unpack(int n, int offset, Int32[] data) {
-			ge_precomp[] ret = new ge_precomp[n];
-			for (int i = 0; i < n; i++) ret[i] = new ge_precomp(data, offset + i * 3 * 10);
-			return ret;
-		}
-		static ge_precomp[][] base_unpack_a(int a, int b, Int32[] data) {
-			ge_precomp[][] ret = new ge_precomp[a][];
-			for (int i = 0; i < a; i++) ret[i] = base_unpack(b, i * b * 3 * 10, data);
-			return ret;
-		}
-		static ge_precomp[][] basev = base_unpack_a(32, 8,
+		static ge_precomp[] basev = base_unpack(32 * 8,
 			new Int32[32 * 8 * 3 * 10] {
 #region base point data
 			25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605,
@@ -836,453 +1569,51 @@
 #endregion
 		});
 
-		static void fe_0(out fe h) {
-			h = new fe();
-			h[0] = 0;
-			h[1] = 0;
-			h[2] = 0;
-			h[3] = 0;
-			h[4] = 0;
-			h[5] = 0;
-			h[6] = 0;
-			h[7] = 0;
-			h[8] = 0;
-			h[9] = 0;
-		}
-		static void fe_1(out fe h) {
-			h = new fe();
-			h[0] = 1;
-			h[1] = 0;
-			h[2] = 0;
-			h[3] = 0;
-			h[4] = 0;
-			h[5] = 0;
-			h[6] = 0;
-			h[7] = 0;
-			h[8] = 0;
-			h[9] = 0;
-		}
-		static void ge_p3_0(out ge_p3 h) {
-			fe_0(out h.X);
-			fe_1(out h.Y);
-			fe_1(out h.Z);
-			fe_0(out h.T);
-		}
 		static Byte negative(SByte b) {
 			UInt64 x = (UInt64)(Int64)b; /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
 			x >>= 63; /* 1: yes; 0: no */
 			return (Byte)x;
 		}
-		static void ge_precomp_0(out ge_precomp h) {
-			fe_1(out h.yplusx);
-			fe_1(out h.yminusx);
-			fe_0(out h.xy2d);
-		}
-		static void fe_cmov(ref fe f, ref fe g, Int32 b) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 g0 = g[0];
-			Int32 g1 = g[1];
-			Int32 g2 = g[2];
-			Int32 g3 = g[3];
-			Int32 g4 = g[4];
-			Int32 g5 = g[5];
-			Int32 g6 = g[6];
-			Int32 g7 = g[7];
-			Int32 g8 = g[8];
-			Int32 g9 = g[9];
-			Int32 x0 = f0 ^ g0;
-			Int32 x1 = f1 ^ g1;
-			Int32 x2 = f2 ^ g2;
-			Int32 x3 = f3 ^ g3;
-			Int32 x4 = f4 ^ g4;
-			Int32 x5 = f5 ^ g5;
-			Int32 x6 = f6 ^ g6;
-			Int32 x7 = f7 ^ g7;
-			Int32 x8 = f8 ^ g8;
-			Int32 x9 = f9 ^ g9;
-			b = -b;
-			x0 &= b;
-			x1 &= b;
-			x2 &= b;
-			x3 &= b;
-			x4 &= b;
-			x5 &= b;
-			x6 &= b;
-			x7 &= b;
-			x8 &= b;
-			x9 &= b;
-			f[0] = f0 ^ x0;
-			f[1] = f1 ^ x1;
-			f[2] = f2 ^ x2;
-			f[3] = f3 ^ x3;
-			f[4] = f4 ^ x4;
-			f[5] = f5 ^ x5;
-			f[6] = f6 ^ x6;
-			f[7] = f7 ^ x7;
-			f[8] = f8 ^ x8;
-			f[9] = f9 ^ x9;
-		}
-		static void cmov(ref ge_precomp t, ref ge_precomp u, Byte b) {
-			fe_cmov(ref t.yplusx, ref u.yplusx, b);
-			fe_cmov(ref t.yminusx, ref u.yminusx, b);
-			fe_cmov(ref t.xy2d, ref u.xy2d, b);
-		}
-		static Byte equal(Byte b, SByte c) {
-			Byte ub = (Byte)b;
-			Byte uc = (Byte)c;
-			Byte x = (Byte)(ub ^ uc); /* 0: yes; 1..255: no */
-			UInt32 y = x; /* 0: yes; 1..255: no */
+		static Byte equal(Byte b, Byte c) {
+			UInt32 y = (UInt32)(b ^ c); /* 0: yes; 1..255: no */
 			y -= 1; /* 4294967295: yes; 0..254: no */
 			y >>= 31; /* 1: yes; 0: no */
 			return (Byte)y;
 		}
-		static void fe_copy(out fe h, ref fe f) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			h = new fe();
-			h[0] = f0;
-			h[1] = f1;
-			h[2] = f2;
-			h[3] = f3;
-			h[4] = f4;
-			h[5] = f5;
-			h[6] = f6;
-			h[7] = f7;
-			h[8] = f8;
-			h[9] = f9;
-		}
-		static void fe_neg(out fe h, ref fe f) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 h0 = -f0;
-			Int32 h1 = -f1;
-			Int32 h2 = -f2;
-			Int32 h3 = -f3;
-			Int32 h4 = -f4;
-			Int32 h5 = -f5;
-			Int32 h6 = -f6;
-			Int32 h7 = -f7;
-			Int32 h8 = -f8;
-			Int32 h9 = -f9;
-			h = new fe();
-			h[0] = h0;
-			h[1] = h1;
-			h[2] = h2;
-			h[3] = h3;
-			h[4] = h4;
-			h[5] = h5;
-			h[6] = h6;
-			h[7] = h7;
-			h[8] = h8;
-			h[9] = h9;
-		}
 		static void select(out ge_precomp t, int pos, SByte b) {
 			ge_precomp minust;
 			Byte bnegative = negative(b);
 			Byte babs = (Byte)(b - (((-bnegative) & b) << 1));
-			ge_precomp_0(out t);
-			cmov(ref t, ref basev[pos][0], equal(babs, 1));
-			cmov(ref t, ref basev[pos][1], equal(babs, 2));
-			cmov(ref t, ref basev[pos][2], equal(babs, 3));
-			cmov(ref t, ref basev[pos][3], equal(babs, 4));
-			cmov(ref t, ref basev[pos][4], equal(babs, 5));
-			cmov(ref t, ref basev[pos][5], equal(babs, 6));
-			cmov(ref t, ref basev[pos][6], equal(babs, 7));
-			cmov(ref t, ref basev[pos][7], equal(babs, 8));
-			fe_copy(out minust.yplusx, ref t.yminusx);
-			fe_copy(out minust.yminusx, ref t.yplusx);
-			fe_neg(out minust.xy2d, ref t.xy2d);
-			cmov(ref t, ref minust, bnegative);
+			t = new ge_precomp();
+			t.set_zero();
+			int basei = pos * 8;
+			t.cmov(ref basev[basei + 0], equal(babs, 1));
+			t.cmov(ref basev[basei + 1], equal(babs, 2));
+			t.cmov(ref basev[basei + 2], equal(babs, 3));
+			t.cmov(ref basev[basei + 3], equal(babs, 4));
+			t.cmov(ref basev[basei + 4], equal(babs, 5));
+			t.cmov(ref basev[basei + 5], equal(babs, 6));
+			t.cmov(ref basev[basei + 6], equal(babs, 7));
+			t.cmov(ref basev[basei + 7], equal(babs, 8));
+			minust.yplusx = t.yminusx;
+			minust.yminusx = t.yplusx;
+			minust.xy2d = t.xy2d; minust.xy2d.neg();
+			t.cmov(ref minust, bnegative);
 		}
 		static void fe_add(out fe h, ref fe f, ref fe g) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 g0 = g[0];
-			Int32 g1 = g[1];
-			Int32 g2 = g[2];
-			Int32 g3 = g[3];
-			Int32 g4 = g[4];
-			Int32 g5 = g[5];
-			Int32 g6 = g[6];
-			Int32 g7 = g[7];
-			Int32 g8 = g[8];
-			Int32 g9 = g[9];
-			Int32 h0 = f0 + g0;
-			Int32 h1 = f1 + g1;
-			Int32 h2 = f2 + g2;
-			Int32 h3 = f3 + g3;
-			Int32 h4 = f4 + g4;
-			Int32 h5 = f5 + g5;
-			Int32 h6 = f6 + g6;
-			Int32 h7 = f7 + g7;
-			Int32 h8 = f8 + g8;
-			Int32 h9 = f9 + g9;
-			h = new fe();
-			h[0] = h0;
-			h[1] = h1;
-			h[2] = h2;
-			h[3] = h3;
-			h[4] = h4;
-			h[5] = h5;
-			h[6] = h6;
-			h[7] = h7;
-			h[8] = h8;
-			h[9] = h9;
+			fe r = f;
+			r.add(ref g);
+			h = r;
 		}
 		static void fe_sub(out fe h, ref fe f, ref fe g) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 g0 = g[0];
-			Int32 g1 = g[1];
-			Int32 g2 = g[2];
-			Int32 g3 = g[3];
-			Int32 g4 = g[4];
-			Int32 g5 = g[5];
-			Int32 g6 = g[6];
-			Int32 g7 = g[7];
-			Int32 g8 = g[8];
-			Int32 g9 = g[9];
-			Int32 h0 = f0 - g0;
-			Int32 h1 = f1 - g1;
-			Int32 h2 = f2 - g2;
-			Int32 h3 = f3 - g3;
-			Int32 h4 = f4 - g4;
-			Int32 h5 = f5 - g5;
-			Int32 h6 = f6 - g6;
-			Int32 h7 = f7 - g7;
-			Int32 h8 = f8 - g8;
-			Int32 h9 = f9 - g9;
-			h = new fe();
-			h[0] = h0;
-			h[1] = h1;
-			h[2] = h2;
-			h[3] = h3;
-			h[4] = h4;
-			h[5] = h5;
-			h[6] = h6;
-			h[7] = h7;
-			h[8] = h8;
-			h[9] = h9;
+			fe r = f;
+			r.sub(ref g);
+			h = r;
 		}
 		static void fe_mul(out fe h, ref fe f, ref fe g) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 g0 = g[0];
-			Int32 g1 = g[1];
-			Int32 g2 = g[2];
-			Int32 g3 = g[3];
-			Int32 g4 = g[4];
-			Int32 g5 = g[5];
-			Int32 g6 = g[6];
-			Int32 g7 = g[7];
-			Int32 g8 = g[8];
-			Int32 g9 = g[9];
-			Int32 g1_19 = 19 * g1; /* 1.959375*2^29 */
-			Int32 g2_19 = 19 * g2; /* 1.959375*2^30; still ok */
-			Int32 g3_19 = 19 * g3;
-			Int32 g4_19 = 19 * g4;
-			Int32 g5_19 = 19 * g5;
-			Int32 g6_19 = 19 * g6;
-			Int32 g7_19 = 19 * g7;
-			Int32 g8_19 = 19 * g8;
-			Int32 g9_19 = 19 * g9;
-			Int32 f1_2 = 2 * f1;
-			Int32 f3_2 = 2 * f3;
-			Int32 f5_2 = 2 * f5;
-			Int32 f7_2 = 2 * f7;
-			Int32 f9_2 = 2 * f9;
-			Int64 f0g0 = f0 * (Int64)g0;
-			Int64 f0g1 = f0 * (Int64)g1;
-			Int64 f0g2 = f0 * (Int64)g2;
-			Int64 f0g3 = f0 * (Int64)g3;
-			Int64 f0g4 = f0 * (Int64)g4;
-			Int64 f0g5 = f0 * (Int64)g5;
-			Int64 f0g6 = f0 * (Int64)g6;
-			Int64 f0g7 = f0 * (Int64)g7;
-			Int64 f0g8 = f0 * (Int64)g8;
-			Int64 f0g9 = f0 * (Int64)g9;
-			Int64 f1g0 = f1 * (Int64)g0;
-			Int64 f1g1_2 = f1_2 * (Int64)g1;
-			Int64 f1g2 = f1 * (Int64)g2;
-			Int64 f1g3_2 = f1_2 * (Int64)g3;
-			Int64 f1g4 = f1 * (Int64)g4;
-			Int64 f1g5_2 = f1_2 * (Int64)g5;
-			Int64 f1g6 = f1 * (Int64)g6;
-			Int64 f1g7_2 = f1_2 * (Int64)g7;
-			Int64 f1g8 = f1 * (Int64)g8;
-			Int64 f1g9_38 = f1_2 * (Int64)g9_19;
-			Int64 f2g0 = f2 * (Int64)g0;
-			Int64 f2g1 = f2 * (Int64)g1;
-			Int64 f2g2 = f2 * (Int64)g2;
-			Int64 f2g3 = f2 * (Int64)g3;
-			Int64 f2g4 = f2 * (Int64)g4;
-			Int64 f2g5 = f2 * (Int64)g5;
-			Int64 f2g6 = f2 * (Int64)g6;
-			Int64 f2g7 = f2 * (Int64)g7;
-			Int64 f2g8_19 = f2 * (Int64)g8_19;
-			Int64 f2g9_19 = f2 * (Int64)g9_19;
-			Int64 f3g0 = f3 * (Int64)g0;
-			Int64 f3g1_2 = f3_2 * (Int64)g1;
-			Int64 f3g2 = f3 * (Int64)g2;
-			Int64 f3g3_2 = f3_2 * (Int64)g3;
-			Int64 f3g4 = f3 * (Int64)g4;
-			Int64 f3g5_2 = f3_2 * (Int64)g5;
-			Int64 f3g6 = f3 * (Int64)g6;
-			Int64 f3g7_38 = f3_2 * (Int64)g7_19;
-			Int64 f3g8_19 = f3 * (Int64)g8_19;
-			Int64 f3g9_38 = f3_2 * (Int64)g9_19;
-			Int64 f4g0 = f4 * (Int64)g0;
-			Int64 f4g1 = f4 * (Int64)g1;
-			Int64 f4g2 = f4 * (Int64)g2;
-			Int64 f4g3 = f4 * (Int64)g3;
-			Int64 f4g4 = f4 * (Int64)g4;
-			Int64 f4g5 = f4 * (Int64)g5;
-			Int64 f4g6_19 = f4 * (Int64)g6_19;
-			Int64 f4g7_19 = f4 * (Int64)g7_19;
-			Int64 f4g8_19 = f4 * (Int64)g8_19;
-			Int64 f4g9_19 = f4 * (Int64)g9_19;
-			Int64 f5g0 = f5 * (Int64)g0;
-			Int64 f5g1_2 = f5_2 * (Int64)g1;
-			Int64 f5g2 = f5 * (Int64)g2;
-			Int64 f5g3_2 = f5_2 * (Int64)g3;
-			Int64 f5g4 = f5 * (Int64)g4;
-			Int64 f5g5_38 = f5_2 * (Int64)g5_19;
-			Int64 f5g6_19 = f5 * (Int64)g6_19;
-			Int64 f5g7_38 = f5_2 * (Int64)g7_19;
-			Int64 f5g8_19 = f5 * (Int64)g8_19;
-			Int64 f5g9_38 = f5_2 * (Int64)g9_19;
-			Int64 f6g0 = f6 * (Int64)g0;
-			Int64 f6g1 = f6 * (Int64)g1;
-			Int64 f6g2 = f6 * (Int64)g2;
-			Int64 f6g3 = f6 * (Int64)g3;
-			Int64 f6g4_19 = f6 * (Int64)g4_19;
-			Int64 f6g5_19 = f6 * (Int64)g5_19;
-			Int64 f6g6_19 = f6 * (Int64)g6_19;
-			Int64 f6g7_19 = f6 * (Int64)g7_19;
-			Int64 f6g8_19 = f6 * (Int64)g8_19;
-			Int64 f6g9_19 = f6 * (Int64)g9_19;
-			Int64 f7g0 = f7 * (Int64)g0;
-			Int64 f7g1_2 = f7_2 * (Int64)g1;
-			Int64 f7g2 = f7 * (Int64)g2;
-			Int64 f7g3_38 = f7_2 * (Int64)g3_19;
-			Int64 f7g4_19 = f7 * (Int64)g4_19;
-			Int64 f7g5_38 = f7_2 * (Int64)g5_19;
-			Int64 f7g6_19 = f7 * (Int64)g6_19;
-			Int64 f7g7_38 = f7_2 * (Int64)g7_19;
-			Int64 f7g8_19 = f7 * (Int64)g8_19;
-			Int64 f7g9_38 = f7_2 * (Int64)g9_19;
-			Int64 f8g0 = f8 * (Int64)g0;
-			Int64 f8g1 = f8 * (Int64)g1;
-			Int64 f8g2_19 = f8 * (Int64)g2_19;
-			Int64 f8g3_19 = f8 * (Int64)g3_19;
-			Int64 f8g4_19 = f8 * (Int64)g4_19;
-			Int64 f8g5_19 = f8 * (Int64)g5_19;
-			Int64 f8g6_19 = f8 * (Int64)g6_19;
-			Int64 f8g7_19 = f8 * (Int64)g7_19;
-			Int64 f8g8_19 = f8 * (Int64)g8_19;
-			Int64 f8g9_19 = f8 * (Int64)g9_19;
-			Int64 f9g0 = f9 * (Int64)g0;
-			Int64 f9g1_38 = f9_2 * (Int64)g1_19;
-			Int64 f9g2_19 = f9 * (Int64)g2_19;
-			Int64 f9g3_38 = f9_2 * (Int64)g3_19;
-			Int64 f9g4_19 = f9 * (Int64)g4_19;
-			Int64 f9g5_38 = f9_2 * (Int64)g5_19;
-			Int64 f9g6_19 = f9 * (Int64)g6_19;
-			Int64 f9g7_38 = f9_2 * (Int64)g7_19;
-			Int64 f9g8_19 = f9 * (Int64)g8_19;
-			Int64 f9g9_38 = f9_2 * (Int64)g9_19;
-			Int64 h0 = f0g0 + f1g9_38 + f2g8_19 + f3g7_38 + f4g6_19 + f5g5_38 + f6g4_19 + f7g3_38 + f8g2_19 + f9g1_38;
-			Int64 h1 = f0g1 + f1g0 + f2g9_19 + f3g8_19 + f4g7_19 + f5g6_19 + f6g5_19 + f7g4_19 + f8g3_19 + f9g2_19;
-			Int64 h2 = f0g2 + f1g1_2 + f2g0 + f3g9_38 + f4g8_19 + f5g7_38 + f6g6_19 + f7g5_38 + f8g4_19 + f9g3_38;
-			Int64 h3 = f0g3 + f1g2 + f2g1 + f3g0 + f4g9_19 + f5g8_19 + f6g7_19 + f7g6_19 + f8g5_19 + f9g4_19;
-			Int64 h4 = f0g4 + f1g3_2 + f2g2 + f3g1_2 + f4g0 + f5g9_38 + f6g8_19 + f7g7_38 + f8g6_19 + f9g5_38;
-			Int64 h5 = f0g5 + f1g4 + f2g3 + f3g2 + f4g1 + f5g0 + f6g9_19 + f7g8_19 + f8g7_19 + f9g6_19;
-			Int64 h6 = f0g6 + f1g5_2 + f2g4 + f3g3_2 + f4g2 + f5g1_2 + f6g0 + f7g9_38 + f8g8_19 + f9g7_38;
-			Int64 h7 = f0g7 + f1g6 + f2g5 + f3g4 + f4g3 + f5g2 + f6g1 + f7g0 + f8g9_19 + f9g8_19;
-			Int64 h8 = f0g8 + f1g7_2 + f2g6 + f3g5_2 + f4g4 + f5g3_2 + f6g2 + f7g1_2 + f8g0 + f9g9_38;
-			Int64 h9 = f0g9 + f1g8 + f2g7 + f3g6 + f4g5 + f5g4 + f6g3 + f7g2 + f8g1 + f9g0;
-			Int64 carry0;
-			Int64 carry1;
-			Int64 carry2;
-			Int64 carry3;
-			Int64 carry4;
-			Int64 carry5;
-			Int64 carry6;
-			Int64 carry7;
-			Int64 carry8;
-			Int64 carry9;
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-			carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-			carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
-			carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-			carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
-			carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-			carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-			carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
-			carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-			h = new fe();
-			h[0] = (Int32)h0;
-			h[1] = (Int32)h1;
-			h[2] = (Int32)h2;
-			h[3] = (Int32)h3;
-			h[4] = (Int32)h4;
-			h[5] = (Int32)h5;
-			h[6] = (Int32)h6;
-			h[7] = (Int32)h7;
-			h[8] = (Int32)h8;
-			h[9] = (Int32)h9;
+			fe r = f;
+			r.mul(ref g);
+			h = r;
 		}
 		static void ge_madd(out ge_p1p1 r, ref ge_p3 p, ref ge_precomp q) {
 			fe t0;
@@ -1304,294 +1635,24 @@
 			fe_mul(out r.T, ref p.X, ref p.Y);
 		}
 		static void ge_p3_to_p2(out ge_p2 r, ref ge_p3 p) {
-			fe_copy(out r.X, ref p.X);
-			fe_copy(out r.Y, ref p.Y);
-			fe_copy(out r.Z, ref p.Z);
+			r.X = p.X;
+			r.Y = p.Y;
+			r.Z = p.Z;
 		}
 		static void fe_sq(out fe h, ref fe f) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 f0_2 = 2 * f0;
-			Int32 f1_2 = 2 * f1;
-			Int32 f2_2 = 2 * f2;
-			Int32 f3_2 = 2 * f3;
-			Int32 f4_2 = 2 * f4;
-			Int32 f5_2 = 2 * f5;
-			Int32 f6_2 = 2 * f6;
-			Int32 f7_2 = 2 * f7;
-			Int32 f5_38 = 38 * f5; /* 1.959375*2^30 */
-			Int32 f6_19 = 19 * f6; /* 1.959375*2^30 */
-			Int32 f7_38 = 38 * f7; /* 1.959375*2^30 */
-			Int32 f8_19 = 19 * f8; /* 1.959375*2^30 */
-			Int32 f9_38 = 38 * f9; /* 1.959375*2^30 */
-			Int64 f0f0 = f0 * (Int64)f0;
-			Int64 f0f1_2 = f0_2 * (Int64)f1;
-			Int64 f0f2_2 = f0_2 * (Int64)f2;
-			Int64 f0f3_2 = f0_2 * (Int64)f3;
-			Int64 f0f4_2 = f0_2 * (Int64)f4;
-			Int64 f0f5_2 = f0_2 * (Int64)f5;
-			Int64 f0f6_2 = f0_2 * (Int64)f6;
-			Int64 f0f7_2 = f0_2 * (Int64)f7;
-			Int64 f0f8_2 = f0_2 * (Int64)f8;
-			Int64 f0f9_2 = f0_2 * (Int64)f9;
-			Int64 f1f1_2 = f1_2 * (Int64)f1;
-			Int64 f1f2_2 = f1_2 * (Int64)f2;
-			Int64 f1f3_4 = f1_2 * (Int64)f3_2;
-			Int64 f1f4_2 = f1_2 * (Int64)f4;
-			Int64 f1f5_4 = f1_2 * (Int64)f5_2;
-			Int64 f1f6_2 = f1_2 * (Int64)f6;
-			Int64 f1f7_4 = f1_2 * (Int64)f7_2;
-			Int64 f1f8_2 = f1_2 * (Int64)f8;
-			Int64 f1f9_76 = f1_2 * (Int64)f9_38;
-			Int64 f2f2 = f2 * (Int64)f2;
-			Int64 f2f3_2 = f2_2 * (Int64)f3;
-			Int64 f2f4_2 = f2_2 * (Int64)f4;
-			Int64 f2f5_2 = f2_2 * (Int64)f5;
-			Int64 f2f6_2 = f2_2 * (Int64)f6;
-			Int64 f2f7_2 = f2_2 * (Int64)f7;
-			Int64 f2f8_38 = f2_2 * (Int64)f8_19;
-			Int64 f2f9_38 = f2 * (Int64)f9_38;
-			Int64 f3f3_2 = f3_2 * (Int64)f3;
-			Int64 f3f4_2 = f3_2 * (Int64)f4;
-			Int64 f3f5_4 = f3_2 * (Int64)f5_2;
-			Int64 f3f6_2 = f3_2 * (Int64)f6;
-			Int64 f3f7_76 = f3_2 * (Int64)f7_38;
-			Int64 f3f8_38 = f3_2 * (Int64)f8_19;
-			Int64 f3f9_76 = f3_2 * (Int64)f9_38;
-			Int64 f4f4 = f4 * (Int64)f4;
-			Int64 f4f5_2 = f4_2 * (Int64)f5;
-			Int64 f4f6_38 = f4_2 * (Int64)f6_19;
-			Int64 f4f7_38 = f4 * (Int64)f7_38;
-			Int64 f4f8_38 = f4_2 * (Int64)f8_19;
-			Int64 f4f9_38 = f4 * (Int64)f9_38;
-			Int64 f5f5_38 = f5 * (Int64)f5_38;
-			Int64 f5f6_38 = f5_2 * (Int64)f6_19;
-			Int64 f5f7_76 = f5_2 * (Int64)f7_38;
-			Int64 f5f8_38 = f5_2 * (Int64)f8_19;
-			Int64 f5f9_76 = f5_2 * (Int64)f9_38;
-			Int64 f6f6_19 = f6 * (Int64)f6_19;
-			Int64 f6f7_38 = f6 * (Int64)f7_38;
-			Int64 f6f8_38 = f6_2 * (Int64)f8_19;
-			Int64 f6f9_38 = f6 * (Int64)f9_38;
-			Int64 f7f7_38 = f7 * (Int64)f7_38;
-			Int64 f7f8_38 = f7_2 * (Int64)f8_19;
-			Int64 f7f9_76 = f7_2 * (Int64)f9_38;
-			Int64 f8f8_19 = f8 * (Int64)f8_19;
-			Int64 f8f9_38 = f8 * (Int64)f9_38;
-			Int64 f9f9_38 = f9 * (Int64)f9_38;
-			Int64 h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
-			Int64 h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
-			Int64 h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
-			Int64 h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
-			Int64 h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
-			Int64 h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
-			Int64 h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
-			Int64 h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
-			Int64 h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
-			Int64 h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
-			Int64 carry0;
-			Int64 carry1;
-			Int64 carry2;
-			Int64 carry3;
-			Int64 carry4;
-			Int64 carry5;
-			Int64 carry6;
-			Int64 carry7;
-			Int64 carry8;
-			Int64 carry9;
-
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-
-			carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-			carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
-
-			carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-			carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
-
-			carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-			carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
-
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-			carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
-
-			carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
-
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-
-			h = new fe();
-			h[0] = (Int32)h0;
-			h[1] = (Int32)h1;
-			h[2] = (Int32)h2;
-			h[3] = (Int32)h3;
-			h[4] = (Int32)h4;
-			h[5] = (Int32)h5;
-			h[6] = (Int32)h6;
-			h[7] = (Int32)h7;
-			h[8] = (Int32)h8;
-			h[9] = (Int32)h9;
-		}
-		static void fe_sq2(out fe h, ref fe f) {
-			Int32 f0 = f[0];
-			Int32 f1 = f[1];
-			Int32 f2 = f[2];
-			Int32 f3 = f[3];
-			Int32 f4 = f[4];
-			Int32 f5 = f[5];
-			Int32 f6 = f[6];
-			Int32 f7 = f[7];
-			Int32 f8 = f[8];
-			Int32 f9 = f[9];
-			Int32 f0_2 = 2 * f0;
-			Int32 f1_2 = 2 * f1;
-			Int32 f2_2 = 2 * f2;
-			Int32 f3_2 = 2 * f3;
-			Int32 f4_2 = 2 * f4;
-			Int32 f5_2 = 2 * f5;
-			Int32 f6_2 = 2 * f6;
-			Int32 f7_2 = 2 * f7;
-			Int32 f5_38 = 38 * f5; /* 1.959375*2^30 */
-			Int32 f6_19 = 19 * f6; /* 1.959375*2^30 */
-			Int32 f7_38 = 38 * f7; /* 1.959375*2^30 */
-			Int32 f8_19 = 19 * f8; /* 1.959375*2^30 */
-			Int32 f9_38 = 38 * f9; /* 1.959375*2^30 */
-			Int64 f0f0 = f0 * (Int64)f0;
-			Int64 f0f1_2 = f0_2 * (Int64)f1;
-			Int64 f0f2_2 = f0_2 * (Int64)f2;
-			Int64 f0f3_2 = f0_2 * (Int64)f3;
-			Int64 f0f4_2 = f0_2 * (Int64)f4;
-			Int64 f0f5_2 = f0_2 * (Int64)f5;
-			Int64 f0f6_2 = f0_2 * (Int64)f6;
-			Int64 f0f7_2 = f0_2 * (Int64)f7;
-			Int64 f0f8_2 = f0_2 * (Int64)f8;
-			Int64 f0f9_2 = f0_2 * (Int64)f9;
-			Int64 f1f1_2 = f1_2 * (Int64)f1;
-			Int64 f1f2_2 = f1_2 * (Int64)f2;
-			Int64 f1f3_4 = f1_2 * (Int64)f3_2;
-			Int64 f1f4_2 = f1_2 * (Int64)f4;
-			Int64 f1f5_4 = f1_2 * (Int64)f5_2;
-			Int64 f1f6_2 = f1_2 * (Int64)f6;
-			Int64 f1f7_4 = f1_2 * (Int64)f7_2;
-			Int64 f1f8_2 = f1_2 * (Int64)f8;
-			Int64 f1f9_76 = f1_2 * (Int64)f9_38;
-			Int64 f2f2 = f2 * (Int64)f2;
-			Int64 f2f3_2 = f2_2 * (Int64)f3;
-			Int64 f2f4_2 = f2_2 * (Int64)f4;
-			Int64 f2f5_2 = f2_2 * (Int64)f5;
-			Int64 f2f6_2 = f2_2 * (Int64)f6;
-			Int64 f2f7_2 = f2_2 * (Int64)f7;
-			Int64 f2f8_38 = f2_2 * (Int64)f8_19;
-			Int64 f2f9_38 = f2 * (Int64)f9_38;
-			Int64 f3f3_2 = f3_2 * (Int64)f3;
-			Int64 f3f4_2 = f3_2 * (Int64)f4;
-			Int64 f3f5_4 = f3_2 * (Int64)f5_2;
-			Int64 f3f6_2 = f3_2 * (Int64)f6;
-			Int64 f3f7_76 = f3_2 * (Int64)f7_38;
-			Int64 f3f8_38 = f3_2 * (Int64)f8_19;
-			Int64 f3f9_76 = f3_2 * (Int64)f9_38;
-			Int64 f4f4 = f4 * (Int64)f4;
-			Int64 f4f5_2 = f4_2 * (Int64)f5;
-			Int64 f4f6_38 = f4_2 * (Int64)f6_19;
-			Int64 f4f7_38 = f4 * (Int64)f7_38;
-			Int64 f4f8_38 = f4_2 * (Int64)f8_19;
-			Int64 f4f9_38 = f4 * (Int64)f9_38;
-			Int64 f5f5_38 = f5 * (Int64)f5_38;
-			Int64 f5f6_38 = f5_2 * (Int64)f6_19;
-			Int64 f5f7_76 = f5_2 * (Int64)f7_38;
-			Int64 f5f8_38 = f5_2 * (Int64)f8_19;
-			Int64 f5f9_76 = f5_2 * (Int64)f9_38;
-			Int64 f6f6_19 = f6 * (Int64)f6_19;
-			Int64 f6f7_38 = f6 * (Int64)f7_38;
-			Int64 f6f8_38 = f6_2 * (Int64)f8_19;
-			Int64 f6f9_38 = f6 * (Int64)f9_38;
-			Int64 f7f7_38 = f7 * (Int64)f7_38;
-			Int64 f7f8_38 = f7_2 * (Int64)f8_19;
-			Int64 f7f9_76 = f7_2 * (Int64)f9_38;
-			Int64 f8f8_19 = f8 * (Int64)f8_19;
-			Int64 f8f9_38 = f8 * (Int64)f9_38;
-			Int64 f9f9_38 = f9 * (Int64)f9_38;
-			Int64 h0 = f0f0 + f1f9_76 + f2f8_38 + f3f7_76 + f4f6_38 + f5f5_38;
-			Int64 h1 = f0f1_2 + f2f9_38 + f3f8_38 + f4f7_38 + f5f6_38;
-			Int64 h2 = f0f2_2 + f1f1_2 + f3f9_76 + f4f8_38 + f5f7_76 + f6f6_19;
-			Int64 h3 = f0f3_2 + f1f2_2 + f4f9_38 + f5f8_38 + f6f7_38;
-			Int64 h4 = f0f4_2 + f1f3_4 + f2f2 + f5f9_76 + f6f8_38 + f7f7_38;
-			Int64 h5 = f0f5_2 + f1f4_2 + f2f3_2 + f6f9_38 + f7f8_38;
-			Int64 h6 = f0f6_2 + f1f5_4 + f2f4_2 + f3f3_2 + f7f9_76 + f8f8_19;
-			Int64 h7 = f0f7_2 + f1f6_2 + f2f5_2 + f3f4_2 + f8f9_38;
-			Int64 h8 = f0f8_2 + f1f7_4 + f2f6_2 + f3f5_4 + f4f4 + f9f9_38;
-			Int64 h9 = f0f9_2 + f1f8_2 + f2f7_2 + f3f6_2 + f4f5_2;
-			Int64 carry0;
-			Int64 carry1;
-			Int64 carry2;
-			Int64 carry3;
-			Int64 carry4;
-			Int64 carry5;
-			Int64 carry6;
-			Int64 carry7;
-			Int64 carry8;
-			Int64 carry9;
-
-			h0 += h0;
-			h1 += h1;
-			h2 += h2;
-			h3 += h3;
-			h4 += h4;
-			h5 += h5;
-			h6 += h6;
-			h7 += h7;
-			h8 += h8;
-			h9 += h9;
-
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-
-			carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-			carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
-
-			carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-			carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
-
-			carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-			carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
-
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-			carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
-
-			carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
-
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-
-			h = new fe();
-			h[0] = (Int32)h0;
-			h[1] = (Int32)h1;
-			h[2] = (Int32)h2;
-			h[3] = (Int32)h3;
-			h[4] = (Int32)h4;
-			h[5] = (Int32)h5;
-			h[6] = (Int32)h6;
-			h[7] = (Int32)h7;
-			h[8] = (Int32)h8;
-			h[9] = (Int32)h9;
+			h = f;
+			h.sq();
 		}
 		static void ge_p2_dbl(out ge_p1p1 r, ref ge_p2 p) {
-			fe t0;
-			fe_sq(out r.X, ref p.X);
-			fe_sq(out r.Z, ref p.Y);
-			fe_sq2(out r.T, ref p.Z);
-			fe_add(out r.Y, ref p.X, ref p.Y);
-			fe_sq(out t0, ref r.Y);
-			fe_add(out r.Y, ref r.Z, ref r.X);
-			fe_sub(out r.Z, ref r.Z, ref r.X);
-			fe_sub(out r.X, ref t0, ref r.Y);
-			fe_sub(out r.T, ref r.T, ref r.Z);
+			r.X = p.X; r.X.sq();
+			r.Z = p.Y; r.Z.sq();
+			r.T = p.Z; r.T.sq2();
+			r.Y = p.X; r.Y.add(ref p.Y);
+			fe t0 = r.Y; t0.sq();
+			r.Y = r.Z; r.Y.add(ref r.X);
+			r.Z.sub(ref r.X);
+			r.X = t0; r.X.sub(ref r.Y);
+			r.T.sub(ref r.Z);
 		}
 		static void ge_p3_dbl(out ge_p1p1 r, ref ge_p3 p) {
 			ge_p2 q;
@@ -1620,7 +1681,8 @@
 			}
 			e[63] += carry;
 
-			ge_p3_0(out h);
+			h = new ge_p3();
+			h.set_zero();
 			ge_precomp t;
 			ge_p1p1 r;
 			for (int i = 1; i < 64; i += 2) {
@@ -1639,130 +1701,12 @@
 				ge_madd(out r, ref h, ref t); ge_p1p1_to_p3(out h, ref r);
 			}
 		}
-		static void fe_invert(out fe outv, ref fe z) {
-			fe t0;
-			fe t1;
-			fe t2;
-			fe t3;
-			fe_sq(out t0, ref z); for (int i = 1; i < 1; ++i) fe_sq(out t0, ref t0);
-			fe_sq(out t1, ref t0); for (int i = 1; i < 2; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t1, ref z, ref t1);
-			fe_mul(out t0, ref t0, ref t1);
-			fe_sq(out t2, ref t0); for (int i = 1; i < 1; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t1, ref t1, ref t2);
-			fe_sq(out t2, ref t1); for (int i = 1; i < 5; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t1, ref t2, ref t1);
-			fe_sq(out t2, ref t1); for (int i = 1; i < 10; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t2, ref t2, ref t1);
-			fe_sq(out t3, ref t2); for (int i = 1; i < 20; ++i) fe_sq(out t3, ref t3);
-			fe_mul(out t2, ref t3, ref t2);
-			fe_sq(out t2, ref t2); for (int i = 1; i < 10; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t1, ref t2, ref t1);
-			fe_sq(out t2, ref t1); for (int i = 1; i < 50; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t2, ref t2, ref t1);
-			fe_sq(out t3, ref t2); for (int i = 1; i < 100; ++i) fe_sq(out t3, ref t3);
-			fe_mul(out t2, ref t3, ref t2);
-			fe_sq(out t2, ref t2); for (int i = 1; i < 50; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t1, ref t2, ref t1);
-			fe_sq(out t1, ref t1); for (int i = 1; i < 5; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out outv, ref t1, ref t0);
-		}
-		static unsafe void fe_tobytes(Byte* s, ref fe h) {
-			Int32 h0 = h[0];
-			Int32 h1 = h[1];
-			Int32 h2 = h[2];
-			Int32 h3 = h[3];
-			Int32 h4 = h[4];
-			Int32 h5 = h[5];
-			Int32 h6 = h[6];
-			Int32 h7 = h[7];
-			Int32 h8 = h[8];
-			Int32 h9 = h[9];
-			Int32 q;
-			Int32 carry0;
-			Int32 carry1;
-			Int32 carry2;
-			Int32 carry3;
-			Int32 carry4;
-			Int32 carry5;
-			Int32 carry6;
-			Int32 carry7;
-			Int32 carry8;
-			Int32 carry9;
-
-			q = (19 * h9 + (((Int32)1) << 24)) >> 25;
-			q = (h0 + q) >> 26;
-			q = (h1 + q) >> 25;
-			q = (h2 + q) >> 26;
-			q = (h3 + q) >> 25;
-			q = (h4 + q) >> 26;
-			q = (h5 + q) >> 25;
-			q = (h6 + q) >> 26;
-			q = (h7 + q) >> 25;
-			q = (h8 + q) >> 26;
-			q = (h9 + q) >> 25;
-
-			h0 += 19 * q;
-
-			carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26;
-			carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25;
-			carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26;
-			carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25;
-			carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26;
-			carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25;
-			carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26;
-			carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25;
-			carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26;
-			carry9 = h9 >> 25; h9 -= carry9 << 25;
-			/* h10 = carry9 */
-
-			s[0] = (Byte)(h0 >> 0);
-			s[1] = (Byte)(h0 >> 8);
-			s[2] = (Byte)(h0 >> 16);
-			s[3] = (Byte)((h0 >> 24) | (h1 << 2));
-			s[4] = (Byte)(h1 >> 6);
-			s[5] = (Byte)(h1 >> 14);
-			s[6] = (Byte)((h1 >> 22) | (h2 << 3));
-			s[7] = (Byte)(h2 >> 5);
-			s[8] = (Byte)(h2 >> 13);
-			s[9] = (Byte)((h2 >> 21) | (h3 << 5));
-			s[10] = (Byte)(h3 >> 3);
-			s[11] = (Byte)(h3 >> 11);
-			s[12] = (Byte)((h3 >> 19) | (h4 << 6));
-			s[13] = (Byte)(h4 >> 2);
-			s[14] = (Byte)(h4 >> 10);
-			s[15] = (Byte)(h4 >> 18);
-			s[16] = (Byte)(h5 >> 0);
-			s[17] = (Byte)(h5 >> 8);
-			s[18] = (Byte)(h5 >> 16);
-			s[19] = (Byte)((h5 >> 24) | (h6 << 1));
-			s[20] = (Byte)(h6 >> 7);
-			s[21] = (Byte)(h6 >> 15);
-			s[22] = (Byte)((h6 >> 23) | (h7 << 3));
-			s[23] = (Byte)(h7 >> 5);
-			s[24] = (Byte)(h7 >> 13);
-			s[25] = (Byte)((h7 >> 21) | (h8 << 4));
-			s[26] = (Byte)(h8 >> 4);
-			s[27] = (Byte)(h8 >> 12);
-			s[28] = (Byte)((h8 >> 20) | (h9 << 6));
-			s[29] = (Byte)(h9 >> 2);
-			s[30] = (Byte)(h9 >> 10);
-			s[31] = (Byte)(h9 >> 18);
-		}
-		static unsafe int fe_isnegative(ref fe f) {
-			Byte* s = stackalloc Byte[32];
-			fe_tobytes(s, ref f);
-			return s[0] & 1;
-		}
 		static unsafe void ge_p3_tobytes(Byte* s, ref ge_p3 h) {
-			fe recip;
-			fe x;
-			fe y;
-			fe_invert(out recip, ref h.Z);
-			fe_mul(out x, ref h.X, ref recip);
-			fe_mul(out y, ref h.Y, ref recip);
-			fe_tobytes(s, ref y);
-			s[31] ^= (Byte)(fe_isnegative(ref x) << 7);
+			fe recip = h.Z; recip.invert();
+			fe x = h.X; x.mul(ref recip);
+			fe y = h.Y; y.mul(ref recip);
+			y.tobytes(s);
+			s[31] ^= (Byte)(x.isnegative() << 7);
 		}
 
 		public static unsafe void crypto_sign_seed_keypair(out Byte[] pk, out Byte[] sk, Byte[] seed) {
@@ -1791,20 +1735,11 @@
 			pk = UCIS.Util.ArrayUtil.Slice(sk, 32, 32);
 		}
 
-		static unsafe Int64 load_3(Byte* inv) {
-			UInt64 result;
-			result = (UInt64)inv[0];
-			result |= ((UInt64)inv[1]) << 8;
-			result |= ((UInt64)inv[2]) << 16;
-			return (Int64)result;
+		static unsafe UInt32 load_3(Byte* inv) {
+			return (UInt32)inv[0] | ((UInt32)inv[1] << 8) | ((UInt32)inv[2] << 16);
 		}
-		static unsafe Int64 load_4(Byte* inv) {
-			UInt64 result;
-			result = (UInt64)inv[0];
-			result |= ((UInt64)inv[1]) << 8;
-			result |= ((UInt64)inv[2]) << 16;
-			result |= ((UInt64)inv[3]) << 24;
-			return (Int64)result;
+		static unsafe UInt32 load_4(Byte* inv) {
+			return (UInt32)inv[0] | ((UInt32)inv[1] << 8) | ((UInt32)inv[2] << 16) | ((UInt32)inv[3] << 24);
 		}
 		static unsafe void sc_reduce(Byte* s) {
 			Int64 s0 = 2097151 & load_3(s);
@@ -1831,23 +1766,6 @@
 			Int64 s21 = 2097151 & (load_3(s + 55) >> 1);
 			Int64 s22 = 2097151 & (load_4(s + 57) >> 6);
 			Int64 s23 = (load_4(s + 60) >> 3);
-			Int64 carry0;
-			Int64 carry1;
-			Int64 carry2;
-			Int64 carry3;
-			Int64 carry4;
-			Int64 carry5;
-			Int64 carry6;
-			Int64 carry7;
-			Int64 carry8;
-			Int64 carry9;
-			Int64 carry10;
-			Int64 carry11;
-			Int64 carry12;
-			Int64 carry13;
-			Int64 carry14;
-			Int64 carry15;
-			Int64 carry16;
 
 			s11 += s23 * 666643;
 			s12 += s23 * 470296;
@@ -1864,23 +1782,20 @@
 			s14 += s22 * 136657;
 			s15 -= s22 * 683901;
 
-
 			s9 += s21 * 666643;
 			s10 += s21 * 470296;
 			s11 += s21 * 654183;
 			s12 -= s21 * 997805;
 			s13 += s21 * 136657;
 			s14 -= s21 * 683901;
-
-
+			
 			s8 += s20 * 666643;
 			s9 += s20 * 470296;
 			s10 += s20 * 654183;
 			s11 -= s20 * 997805;
 			s12 += s20 * 136657;
 			s13 -= s20 * 683901;
-
-
+			
 			s7 += s19 * 666643;
 			s8 += s19 * 470296;
 			s9 += s19 * 654183;
@@ -1895,18 +1810,18 @@
 			s10 += s18 * 136657;
 			s11 -= s18 * 683901;
 
-			carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
-			carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
-			carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
-			carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
-			carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
-			carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
+			Int64 carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
+			Int64 carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
+			Int64 carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
+			Int64 carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
+			Int64 carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
+			Int64 carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
 
-			carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
-			carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
-			carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
-			carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
-			carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
+			Int64 carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
+			Int64 carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
+			Int64 carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
+			Int64 carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
+			Int64 carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
 
 			s5 += s17 * 666643;
 			s6 += s17 * 470296;
@@ -1951,16 +1866,16 @@
 			s5 -= s12 * 683901;
 			s12 = 0;
 
-			carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
-			carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
-			carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
+			Int64 carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
+			Int64 carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
+			Int64 carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
 			carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
 			carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
 			carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
 
-			carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
-			carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
-			carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
+			Int64 carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
+			Int64 carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
+			Int64 carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
 			carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
 			carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
 			carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
@@ -1992,8 +1907,7 @@
 			s3 -= s12 * 997805;
 			s4 += s12 * 136657;
 			s5 -= s12 * 683901;
-
-
+			
 			carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
 			carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
 			carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
@@ -2076,103 +1990,56 @@
 			Int64 c9 = 2097151 & (load_4(c + 23) >> 5);
 			Int64 c10 = 2097151 & (load_3(c + 26) >> 2);
 			Int64 c11 = (load_4(c + 28) >> 7);
-			Int64 s0;
-			Int64 s1;
-			Int64 s2;
-			Int64 s3;
-			Int64 s4;
-			Int64 s5;
-			Int64 s6;
-			Int64 s7;
-			Int64 s8;
-			Int64 s9;
-			Int64 s10;
-			Int64 s11;
-			Int64 s12;
-			Int64 s13;
-			Int64 s14;
-			Int64 s15;
-			Int64 s16;
-			Int64 s17;
-			Int64 s18;
-			Int64 s19;
-			Int64 s20;
-			Int64 s21;
-			Int64 s22;
-			Int64 s23;
-			Int64 carry0;
-			Int64 carry1;
-			Int64 carry2;
-			Int64 carry3;
-			Int64 carry4;
-			Int64 carry5;
-			Int64 carry6;
-			Int64 carry7;
-			Int64 carry8;
-			Int64 carry9;
-			Int64 carry10;
-			Int64 carry11;
-			Int64 carry12;
-			Int64 carry13;
-			Int64 carry14;
-			Int64 carry15;
-			Int64 carry16;
-			Int64 carry17;
-			Int64 carry18;
-			Int64 carry19;
-			Int64 carry20;
-			Int64 carry21;
-			Int64 carry22;
 
-			s0 = c0 + a0 * b0;
-			s1 = c1 + a0 * b1 + a1 * b0;
-			s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
-			s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
-			s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
-			s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
-			s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
-			s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
-			s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1 + a8 * b0;
-			s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
-			s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
-			s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
-			s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
-			s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
-			s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + a10 * b4 + a11 * b3;
-			s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + a11 * b4;
-			s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
-			s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
-			s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
-			s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
-			s20 = a9 * b11 + a10 * b10 + a11 * b9;
-			s21 = a10 * b11 + a11 * b10;
-			s22 = a11 * b11;
-			s23 = 0;
+			Int64 s0 = c0 + a0 * b0;
+			Int64 s1 = c1 + a0 * b1 + a1 * b0;
+			Int64 s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
+			Int64 s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
+			Int64 s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
+			Int64 s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
+			Int64 s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0;
+			Int64 s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + a6 * b1 + a7 * b0;
+			Int64 s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + a6 * b2 + a7 * b1 + a8 * b0;
+			Int64 s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
+			Int64 s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
+			Int64 s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
+			Int64 s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
+			Int64 s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
+			Int64 s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + a10 * b4 + a11 * b3;
+			Int64 s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + a11 * b4;
+			Int64 s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
+			Int64 s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
+			Int64 s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
+			Int64 s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
+			Int64 s20 = a9 * b11 + a10 * b10 + a11 * b9;
+			Int64 s21 = a10 * b11 + a11 * b10;
+			Int64 s22 = a11 * b11;
+			Int64 s23 = 0;
 
-			carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
-			carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
-			carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
-			carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
-			carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
-			carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
-			carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
-			carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
-			carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
-			carry18 = (s18 + (1 << 20)) >> 21; s19 += carry18; s18 -= carry18 << 21;
-			carry20 = (s20 + (1 << 20)) >> 21; s21 += carry20; s20 -= carry20 << 21;
-			carry22 = (s22 + (1 << 20)) >> 21; s23 += carry22; s22 -= carry22 << 21;
+			Int64 carry0 = (s0 + (1 << 20)) >> 21; s1 += carry0; s0 -= carry0 << 21;
+			Int64 carry2 = (s2 + (1 << 20)) >> 21; s3 += carry2; s2 -= carry2 << 21;
+			Int64 carry4 = (s4 + (1 << 20)) >> 21; s5 += carry4; s4 -= carry4 << 21;
+			Int64 carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
+			Int64 carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
+			Int64 carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
+			Int64 carry12 = (s12 + (1 << 20)) >> 21; s13 += carry12; s12 -= carry12 << 21;
+			Int64 carry14 = (s14 + (1 << 20)) >> 21; s15 += carry14; s14 -= carry14 << 21;
+			Int64 carry16 = (s16 + (1 << 20)) >> 21; s17 += carry16; s16 -= carry16 << 21;
+			Int64 carry18 = (s18 + (1 << 20)) >> 21; s19 += carry18; s18 -= carry18 << 21;
+			Int64 carry20 = (s20 + (1 << 20)) >> 21; s21 += carry20; s20 -= carry20 << 21;
+			Int64 carry22 = (s22 + (1 << 20)) >> 21; s23 += carry22; s22 -= carry22 << 21;
 
-			carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
-			carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
-			carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
-			carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
-			carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
-			carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
-			carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
-			carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
-			carry17 = (s17 + (1 << 20)) >> 21; s18 += carry17; s17 -= carry17 << 21;
-			carry19 = (s19 + (1 << 20)) >> 21; s20 += carry19; s19 -= carry19 << 21;
-			carry21 = (s21 + (1 << 20)) >> 21; s22 += carry21; s21 -= carry21 << 21;
+			Int64 carry1 = (s1 + (1 << 20)) >> 21; s2 += carry1; s1 -= carry1 << 21;
+			Int64 carry3 = (s3 + (1 << 20)) >> 21; s4 += carry3; s3 -= carry3 << 21;
+			Int64 carry5 = (s5 + (1 << 20)) >> 21; s6 += carry5; s5 -= carry5 << 21;
+			Int64 carry7 = (s7 + (1 << 20)) >> 21; s8 += carry7; s7 -= carry7 << 21;
+			Int64 carry9 = (s9 + (1 << 20)) >> 21; s10 += carry9; s9 -= carry9 << 21;
+			Int64 carry11 = (s11 + (1 << 20)) >> 21; s12 += carry11; s11 -= carry11 << 21;
+			Int64 carry13 = (s13 + (1 << 20)) >> 21; s14 += carry13; s13 -= carry13 << 21;
+			Int64 carry15 = (s15 + (1 << 20)) >> 21; s16 += carry15; s15 -= carry15 << 21;
+			Int64 carry17 = (s17 + (1 << 20)) >> 21; s18 += carry17; s17 -= carry17 << 21;
+			Int64 carry19 = (s19 + (1 << 20)) >> 21; s20 += carry19; s19 -= carry19 << 21;
+			Int64 carry21 = (s21 + (1 << 20)) >> 21; s22 += carry21; s21 -= carry21 << 21;
 
 			s11 += s23 * 666643;
 			s12 += s23 * 470296;
@@ -2181,47 +2048,41 @@
 			s15 += s23 * 136657;
 			s16 -= s23 * 683901;
 
-
 			s10 += s22 * 666643;
 			s11 += s22 * 470296;
 			s12 += s22 * 654183;
 			s13 -= s22 * 997805;
 			s14 += s22 * 136657;
 			s15 -= s22 * 683901;
-
-
+			
 			s9 += s21 * 666643;
 			s10 += s21 * 470296;
 			s11 += s21 * 654183;
 			s12 -= s21 * 997805;
 			s13 += s21 * 136657;
 			s14 -= s21 * 683901;
-
-
+			
 			s8 += s20 * 666643;
 			s9 += s20 * 470296;
 			s10 += s20 * 654183;
 			s11 -= s20 * 997805;
 			s12 += s20 * 136657;
 			s13 -= s20 * 683901;
-
-
+			
 			s7 += s19 * 666643;
 			s8 += s19 * 470296;
 			s9 += s19 * 654183;
 			s10 -= s19 * 997805;
 			s11 += s19 * 136657;
 			s12 -= s19 * 683901;
-
-
+			
 			s6 += s18 * 666643;
 			s7 += s18 * 470296;
 			s8 += s18 * 654183;
 			s9 -= s18 * 997805;
 			s10 += s18 * 136657;
 			s11 -= s18 * 683901;
-
-
+			
 			carry6 = (s6 + (1 << 20)) >> 21; s7 += carry6; s6 -= carry6 << 21;
 			carry8 = (s8 + (1 << 20)) >> 21; s9 += carry8; s8 -= carry8 << 21;
 			carry10 = (s10 + (1 << 20)) >> 21; s11 += carry10; s10 -= carry10 << 21;
@@ -2241,24 +2102,21 @@
 			s8 -= s17 * 997805;
 			s9 += s17 * 136657;
 			s10 -= s17 * 683901;
-
-
+			
 			s4 += s16 * 666643;
 			s5 += s16 * 470296;
 			s6 += s16 * 654183;
 			s7 -= s16 * 997805;
 			s8 += s16 * 136657;
 			s9 -= s16 * 683901;
-
-
+			
 			s3 += s15 * 666643;
 			s4 += s15 * 470296;
 			s5 += s15 * 654183;
 			s6 -= s15 * 997805;
 			s7 += s15 * 136657;
 			s8 -= s15 * 683901;
-
-
+			
 			s2 += s14 * 666643;
 			s3 += s14 * 470296;
 			s4 += s14 * 654183;
@@ -2266,15 +2124,13 @@
 			s6 += s14 * 136657;
 			s7 -= s14 * 683901;
 
-
 			s1 += s13 * 666643;
 			s2 += s13 * 470296;
 			s3 += s13 * 654183;
 			s4 -= s13 * 997805;
 			s5 += s13 * 136657;
 			s6 -= s13 * 683901;
-
-
+			
 			s0 += s12 * 666643;
 			s1 += s12 * 470296;
 			s2 += s12 * 654183;
@@ -2324,8 +2180,7 @@
 			s3 -= s12 * 997805;
 			s4 += s12 * 136657;
 			s5 -= s12 * 683901;
-
-
+			
 			carry0 = s0 >> 21; s1 += carry0; s0 -= carry0 << 21;
 			carry1 = s1 >> 21; s2 += carry1; s1 -= carry1 << 21;
 			carry2 = s2 >> 21; s3 += carry2; s2 -= carry2 << 21;
@@ -2371,119 +2226,49 @@
 			s[30] = (Byte)(s11 >> 9);
 			s[31] = (Byte)(s11 >> 17);
 		}
-		public static unsafe void crypto_sign(Byte[] sm, int smoffset, out UInt64 smlen, Byte[] m, int moffset, UInt64 mlen, Byte[] sk) {
+		public static unsafe void crypto_sign(Byte[] sm, int smoffset, out int smlen, Byte[] m, int moffset, int mlen, Byte[] sk) {
 			if (smoffset + (int)mlen + 64 > sm.Length) throw new ArgumentException("signed message buffer is too small");
 			if (moffset + (int)mlen > m.Length) throw new ArgumentException("message buffer is too small");
 			if (64 > sk.Length) throw new ArgumentException("key buffer is too small");
 			fixed (Byte* smp = sm, mp = m, skp = sk) crypto_sign(smp + smoffset, out smlen, mp + moffset, mlen, skp);
 		}
-		public static unsafe void crypto_sign(Byte* sm, out UInt64 smlen, Byte* m, UInt64 mlen, Byte* sk) {
+		public static unsafe void crypto_sign(Byte* sm, out int smlen, Byte* m, int mlen, Byte* sk) {
+			smlen = mlen + 64;
+			for (int i = 0; i < mlen; ++i) sm[64 + i] = m[i];
+			crypto_getsignature(sm, m, mlen, sk);
+		}
+		public static unsafe void crypto_getsignature(Byte* sig, Byte* m, int mlen, Byte* sk) {
 			Byte* az = stackalloc Byte[64];
-			Byte* r = stackalloc Byte[64];
-			Byte* hram = stackalloc Byte[64];
-
-			crypto_hash.sha512.crypto_hash(az, sk, 32);
+			sha512.crypto_hash(az, sk, 32);
 			az[0] &= 248;
 			az[31] &= 63;
 			az[31] |= 64;
 
-			smlen = mlen + 64;
-			for (UInt64 i = 0; i < mlen; ++i) sm[64 + i] = m[i];
-			for (UInt64 i = 0; i < 32; ++i) sm[32 + i] = az[32 + i];
-			crypto_hash.sha512.crypto_hash(r, sm + 32, mlen + 32);
-			for (UInt64 i = 0; i < 32; ++i) sm[32 + i] = sk[32 + i];
+			sha512.sha512state hashstate = new sha512.sha512state();
+			hashstate.init();
+			hashstate.process(az + 32, 32);
+			hashstate.process(m, (int)mlen);
+			Byte* r = stackalloc Byte[64];
+			hashstate.finish(r);
 
 			sc_reduce(r);
 			ge_p3 R;
 			ge_scalarmult_base(out R, r);
-			ge_p3_tobytes(sm, ref R);
+			ge_p3_tobytes(sig, ref R);
 
-			crypto_hash.sha512.crypto_hash(hram, sm, mlen + 64);
+			hashstate.init();
+			hashstate.process(sig, 32);
+			hashstate.process(sk + 32, 32);
+			hashstate.process(m, (int)mlen);
+			Byte* hram = stackalloc Byte[64];
+			hashstate.finish(hram);
 			sc_reduce(hram);
-			sc_muladd(sm + 32, hram, az, r);
+			sc_muladd(sig + 32, hram, az, r);
 		}
 
-		static unsafe void fe_frombytes(out fe h, Byte* s) {
-			Int64 h0 = load_4(s);
-			Int64 h1 = load_3(s + 4) << 6;
-			Int64 h2 = load_3(s + 7) << 5;
-			Int64 h3 = load_3(s + 10) << 3;
-			Int64 h4 = load_3(s + 13) << 2;
-			Int64 h5 = load_4(s + 16);
-			Int64 h6 = load_3(s + 20) << 7;
-			Int64 h7 = load_3(s + 23) << 5;
-			Int64 h8 = load_3(s + 26) << 4;
-			Int64 h9 = (load_3(s + 29) & 8388607) << 2;
-			Int64 carry0;
-			Int64 carry1;
-			Int64 carry2;
-			Int64 carry3;
-			Int64 carry4;
-			Int64 carry5;
-			Int64 carry6;
-			Int64 carry7;
-			Int64 carry8;
-			Int64 carry9;
-
-			carry9 = (h9 + (Int64)(1 << 24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25;
-			carry1 = (h1 + (Int64)(1 << 24)) >> 25; h2 += carry1; h1 -= carry1 << 25;
-			carry3 = (h3 + (Int64)(1 << 24)) >> 25; h4 += carry3; h3 -= carry3 << 25;
-			carry5 = (h5 + (Int64)(1 << 24)) >> 25; h6 += carry5; h5 -= carry5 << 25;
-			carry7 = (h7 + (Int64)(1 << 24)) >> 25; h8 += carry7; h7 -= carry7 << 25;
-
-			carry0 = (h0 + (Int64)(1 << 25)) >> 26; h1 += carry0; h0 -= carry0 << 26;
-			carry2 = (h2 + (Int64)(1 << 25)) >> 26; h3 += carry2; h2 -= carry2 << 26;
-			carry4 = (h4 + (Int64)(1 << 25)) >> 26; h5 += carry4; h4 -= carry4 << 26;
-			carry6 = (h6 + (Int64)(1 << 25)) >> 26; h7 += carry6; h6 -= carry6 << 26;
-			carry8 = (h8 + (Int64)(1 << 25)) >> 26; h9 += carry8; h8 -= carry8 << 26;
+		static fe d = new fe(0, new Int32[10] { -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116 });
+		static fe sqrtm1 = new fe(0, new Int32[10] { -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482 });
 
-			h = new fe();
-			h[0] = (Int32)h0;
-			h[1] = (Int32)h1;
-			h[2] = (Int32)h2;
-			h[3] = (Int32)h3;
-			h[4] = (Int32)h4;
-			h[5] = (Int32)h5;
-			h[6] = (Int32)h6;
-			h[7] = (Int32)h7;
-			h[8] = (Int32)h8;
-			h[9] = (Int32)h9;
-		}
-		static Byte[] zero = new Byte[32];
-		static unsafe int fe_isnonzero(ref fe f) {
-			Byte* s = stackalloc Byte[32];
-			fe_tobytes(s, ref f);
-			fixed (Byte* zerop = zero) return crypto_verify._32.crypto_verify(s, zerop);
-		}
-		static fe d = fe_unpack(0, new Int32[10] { -10913610, 13857413, -15372611, 6949391, 114729, -8787816, -6275908, -3247719, -18696448, -12055116 });
-		static fe sqrtm1 = fe_unpack(0, new Int32[10] { -32595792, -7943725, 9377950, 3500415, 12389472, -272473, -25146209, -2005654, 326686, 11406482 });
-		static void fe_pow22523(out fe outv, ref fe z) {
-			fe t0;
-			fe t1;
-			fe t2;
-			fe_sq(out t0, ref z); for (int i = 1; i < 1; ++i) fe_sq(out t0, ref t0);
-			fe_sq(out t1, ref t0); for (int i = 1; i < 2; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t1, ref z, ref t1);
-			fe_mul(out t0, ref t0, ref t1);
-			fe_sq(out t0, ref t0); for (int i = 1; i < 1; ++i) fe_sq(out t0, ref t0);
-			fe_mul(out t0, ref t1, ref t0);
-			fe_sq(out t1, ref t0); for (int i = 1; i < 5; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t0, ref t1, ref t0);
-			fe_sq(out t1, ref t0); for (int i = 1; i < 10; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t1, ref t1, ref t0);
-			fe_sq(out t2, ref t1); for (int i = 1; i < 20; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t1, ref t2, ref t1);
-			fe_sq(out t1, ref t1); for (int i = 1; i < 10; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t0, ref t1, ref t0);
-			fe_sq(out t1, ref t0); for (int i = 1; i < 50; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t1, ref t1, ref t0);
-			fe_sq(out t2, ref t1); for (int i = 1; i < 100; ++i) fe_sq(out t2, ref t2);
-			fe_mul(out t1, ref t2, ref t1);
-			fe_sq(out t1, ref t1); for (int i = 1; i < 50; ++i) fe_sq(out t1, ref t1);
-			fe_mul(out t0, ref t1, ref t0);
-			fe_sq(out t0, ref t0); for (int i = 1; i < 2; ++i) fe_sq(out t0, ref t0);
-			fe_mul(out outv, ref t0, ref z);
-		}
 		static unsafe Boolean ge_frombytes_negate_vartime(out ge_p3 h, Byte* s) {
 			fe u;
 			fe v;
@@ -2491,44 +2276,39 @@
 			fe vxx;
 			fe check;
 
-			fe_frombytes(out h.Y, s);
-			fe_1(out h.Z);
+			h = new ge_p3();
+
+			h.Y.frombytes(s);
+			h.Z.set_one();
 			fe_sq(out u, ref h.Y);
 			fe_mul(out v, ref u, ref d);
-			fe_sub(out u, ref u, ref h.Z); /* u = y^2-1 */
-			fe_add(out v, ref v, ref h.Z); /* v = dy^2+1 */
+			u.sub(ref h.Z); /* u = y^2-1 */
+			v.add(ref h.Z); /* v = dy^2+1 */
 
 			fe_sq(out v3, ref v);
-			fe_mul(out v3, ref v3, ref v); /* v3 = v^3 */
+			v3.mul(ref v); /* v3 = v^3 */
 			fe_sq(out h.X, ref v3);
-			fe_mul(out h.X, ref h.X, ref v);
-			fe_mul(out h.X, ref h.X, ref u); /* x = uv^7 */
+			h.X.mul(ref v);
+			h.X.mul(ref u); /* x = uv^7 */
 
-			fe_pow22523(out h.X, ref h.X); /* x = (uv^7)^((q-5)/8) */
-			fe_mul(out h.X, ref h.X, ref v3);
-			fe_mul(out h.X, ref h.X, ref u); /* x = uv^3(uv^7)^((q-5)/8) */
+			h.X.pow22523(); /* x = (uv^7)^((q-5)/8) */
+			h.X.mul(ref v3);
+			h.X.mul(ref u); /* x = uv^3(uv^7)^((q-5)/8) */
 
 			fe_sq(out vxx, ref h.X);
-			fe_mul(out vxx, ref vxx, ref v);
+			vxx.mul(ref v);
 			fe_sub(out check, ref vxx, ref u); /* vx^2-u */
-			if (fe_isnonzero(ref check) != 0) {
+			if (check.isnonzero() != 0) {
 				fe_add(out check, ref vxx, ref u); /* vx^2+u */
-				if (fe_isnonzero(ref check) != 0) return false;
-				fe_mul(out h.X, ref h.X, ref sqrtm1);
+				if (check.isnonzero() != 0) return false;
+				h.X.mul(ref sqrtm1);
 			}
 
-			if (fe_isnegative(ref h.X) == (s[31] >> 7))
-				fe_neg(out h.X, ref h.X);
+			if (h.X.isnegative() == (s[31] >> 7)) h.X.neg();
 
 			fe_mul(out h.T, ref h.X, ref h.Y);
 			return true;
 		}
-		struct ge_cached {
-			public fe YplusX;
-			public fe YminusX;
-			public fe Z;
-			public fe T2d;
-		}
 		static unsafe void slide(SByte* r, Byte* a) {
 			for (int i = 0; i < 256; ++i) r[i] = (SByte)(1 & (a[i >> 3] >> (i & 7)));
 
@@ -2553,7 +2333,7 @@
 					}
 				}
 		}
-		static ge_precomp[] Bi = base_unpack(8, 0, new Int32[8 * 3 * 10] {
+		static ge_precomp[] Bi = base_unpack(8, new Int32[8 * 3 * 10] {
 #region Base number
 25967493,-14356035,29566456,3660896,-12694345,4014787,27544626,-11754271,-6079156,2047605,
 -12545711,934262,-2722910,3049990,-727428,9406986,12720692,5043384,19500929,-15469378,
@@ -2581,11 +2361,11 @@
 -3099351,10324967,-2241613,7453183,-5446979,-2735503,-13812022,-16236442,-32461234,-12290683,
 #endregion
 });
-		static fe d2 = fe_unpack(0, new Int32[10] { -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 });
+		static fe d2 = new fe(0, new Int32[10] { -21827239, -5839606, -30745221, 13898782, 229458, 15978800, -12551817, -6495438, 29715968, 9444199 });
 		static void ge_p3_to_cached(out ge_cached r, ref ge_p3 p) {
 			fe_add(out r.YplusX, ref p.Y, ref p.X);
 			fe_sub(out r.YminusX, ref p.Y, ref p.X);
-			fe_copy(out r.Z, ref p.Z);
+			r.Z = p.Z;
 			fe_mul(out r.T2d, ref p.T, ref d2);
 		}
 		static void ge_add(out ge_p1p1 r, ref ge_p3 p, ref ge_cached q) {
@@ -2602,11 +2382,6 @@
 			fe_add(out r.Z, ref t0, ref r.T);
 			fe_sub(out r.T, ref t0, ref r.T);
 		}
-		static void ge_p2_0(out ge_p2 h) {
-			fe_0(out h.X);
-			fe_1(out h.Y);
-			fe_1(out h.Z);
-		}
 		static void ge_sub(out ge_p1p1 r, ref ge_p3 p, ref ge_cached q) {
 			fe t0;
 			fe_add(out r.X, ref p.Y, ref p.X);
@@ -2641,7 +2416,6 @@
 			ge_p1p1 t;
 			ge_p3 u;
 			ge_p3 A2;
-			int i;
 
 			slide(aslide, a);
 			slide(bslide, b);
@@ -2656,13 +2430,11 @@
 			ge_add(out t, ref A2, ref Ai[5]); ge_p1p1_to_p3(out u, ref t); ge_p3_to_cached(out Ai[6], ref u);
 			ge_add(out t, ref A2, ref Ai[6]); ge_p1p1_to_p3(out u, ref t); ge_p3_to_cached(out Ai[7], ref u);
 
-			ge_p2_0(out r);
+			r = new ge_p2(); r.set_zero();
 
-			for (i = 255; i >= 0; --i) {
-				if ((aslide[i] != 0) || (bslide[i] != 0)) break;
-			}
-
-			for (; i >= 0; --i) {
+			int i;
+			for (i = 255; i >= 0 && aslide[i] == 0 && bslide[i] == 0; i--);
+			for (; i >= 0; i--) {
 				ge_p2_dbl(out t, ref r);
 
 				if (aslide[i] > 0) {
@@ -2685,48 +2457,44 @@
 			}
 		}
 		static unsafe void ge_tobytes(Byte* s, ref ge_p2 h) {
-			fe recip;
-			fe x;
-			fe y;
-			fe_invert(out recip, ref h.Z);
-			fe_mul(out x, ref h.X, ref recip);
-			fe_mul(out y, ref h.Y, ref recip);
-			fe_tobytes(s, ref y);
-			s[31] ^= (Byte)(fe_isnegative(ref x) << 7);
+			fe recip = h.Z; recip.invert();
+			fe x = h.X; x.mul(ref recip);
+			fe y = h.Y; y.mul(ref recip);
+			y.tobytes(s);
+			s[31] ^= (Byte)(x.isnegative() << 7);
 		}
-		public static unsafe Boolean crypto_sign_open(Byte[] m, int moffset, out UInt64 mlen, Byte[] sm, int smoffset, UInt64 smlen, Byte[] pk) {
+		public static unsafe Boolean crypto_sign_open(Byte[] m, int moffset, out int mlen, Byte[] sm, int smoffset, int smlen, Byte[] pk) {
 			if (smoffset + (int)smlen > sm.Length) throw new ArgumentException("signed message buffer is too small");
 			if (moffset + (int)smlen > m.Length) throw new ArgumentException("message buffer is too small");
 			if (32 > pk.Length) throw new ArgumentException("key buffer is too small");
 			fixed (Byte* mp = m, smp = sm, pkp = pk) return crypto_sign_open(mp + moffset, out mlen, smp + smoffset, smlen, pkp);
 		}
-		public static unsafe Boolean crypto_sign_open(Byte* m, out UInt64 mlen, Byte* sm, UInt64 smlen, Byte* pk) {
-			Byte* h = stackalloc Byte[64];
-			Byte* checkr = stackalloc Byte[32];
-			ge_p3 A;
-			ge_p2 R;
-
-			mlen = unchecked((UInt64)(-1));
-			if (smlen < 64) return false;
-			if ((sm[63] & 224) != 0) return false;
-			if (!ge_frombytes_negate_vartime(out A, pk)) return false;
-
-			for (UInt64 i = 0; i < smlen; ++i) m[i] = sm[i];
-			for (UInt64 i = 0; i < 32; ++i) m[32 + i] = pk[i];
-			crypto_hash.sha512.crypto_hash(h, m, smlen);
-			sc_reduce(h);
-
-			ge_double_scalarmult_vartime(out R, h, ref A, sm + 32);
-			ge_tobytes(checkr, ref R);
-			if (crypto_verify._32.crypto_verify(checkr, sm) != 0) {
-				for (UInt64 i = 0; i < smlen; ++i) m[i] = 0;
-				return false;
-			}
-
-			for (UInt64 i = 0; i < smlen - 64; ++i) m[i] = sm[64 + i];
-			for (UInt64 i = smlen - 64; i < smlen; ++i) m[i] = 0;
+		public static unsafe Boolean crypto_sign_open(Byte* m, out int mlen, Byte* sm, int smlen, Byte* pk) {
+			mlen = 0;
+			if (!crypto_sign_verify(sm, sm + 64, (int)smlen - 64, pk)) return false;
+			for (int i = 0; i < smlen - 64; ++i) m[i] = sm[64 + i];
 			mlen = smlen - 64;
 			return true;
 		}
+		public static unsafe Boolean crypto_sign_verify(Byte* sig, Byte* m, int mlen, Byte* pk) {
+			if (mlen < 0) return false;
+			if ((sig[63] & 224) != 0) return false;
+			ge_p3 A;
+			if (!ge_frombytes_negate_vartime(out A, pk)) return false;
+
+			sha512.sha512state hashstate = new sha512.sha512state();
+			hashstate.init();
+			hashstate.process(sig, 32);
+			hashstate.process(pk, 32);
+			hashstate.process(m, mlen);
+			Byte* h = stackalloc Byte[64];
+			hashstate.finish(h);
+			sc_reduce(h);
+
+			ge_p2 R;
+			ge_double_scalarmult_vartime(out R, h, ref A, sig + 32);
+			ge_tobytes(h, ref R);
+			return crypto_verify._32.crypto_verify(h, sig) == 0;
+		}
 	}
 }