From c22c15a2e856d5c6a5d14de477256fb706e72deb Mon Sep 17 00:00:00 2001 From: Evgeny Poberezkin <2769109+epoberezkin@users.noreply.github.com> Date: Tue, 31 Oct 2023 22:40:15 +0000 Subject: [PATCH] sntrup761 (#865) * add sntrup761 source * it compiles * Wrap bindings in non-FFI types Test passes with a dummy RNG. * pass ChaChaDRG via FunPtr * Add iOS smoke test at createAgentStore * style * add "ssl" library dep Attempt to fix missing _SHA512 symbol on macos. * remove sha512 wrapper and use openssl directly * restore names, remove dummy RNG * Revert "remove sha512 wrapper and use openssl directly" This reverts commit f9f7781f097dcac6f4896656ef16a6c4af11dcdb. * restore code from RFC * shorter names * enable all tests * remove run test --------- Co-authored-by: IC Rainbow --- cbits/README.md | 3 + cbits/sha512.c | 9 + cbits/sha512.h | 3 + cbits/sntrup761.c | 1035 +++++++++++++++++ cbits/sntrup761.h | 33 + package.yaml | 7 + simplexmq.cabal | 13 + .../Messaging/Crypto/SNTRUP761/Bindings.hs | 38 + .../Crypto/SNTRUP761/Bindings/Defines.hsc | 17 + .../Crypto/SNTRUP761/Bindings/FFI.hs | 23 + .../Crypto/SNTRUP761/Bindings/RNG.hs | 48 + tests/CoreTests/CryptoTests.hs | 10 + 12 files changed, 1239 insertions(+) create mode 100644 cbits/README.md create mode 100644 cbits/sha512.c create mode 100644 cbits/sha512.h create mode 100644 cbits/sntrup761.c create mode 100644 cbits/sntrup761.h create mode 100644 src/Simplex/Messaging/Crypto/SNTRUP761/Bindings.hs create mode 100644 src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/Defines.hsc create mode 100644 src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/FFI.hs create mode 100644 src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/RNG.hs diff --git a/cbits/README.md b/cbits/README.md new file mode 100644 index 000000000..88e72d008 --- /dev/null +++ b/cbits/README.md @@ -0,0 +1,3 @@ +# Streamlined NTRU Prime: sntrup761 + +The implementation of sntrup761 is the _exact_ copy from this [Internet draft](https://www.ietf.org/archive/id/draft-josefsson-ntruprime-streamlined-00.html). diff --git a/cbits/sha512.c b/cbits/sha512.c new file mode 100644 index 000000000..54b838674 --- /dev/null +++ b/cbits/sha512.c @@ -0,0 +1,9 @@ +#include +#include "sha512.h" + +void crypto_hash_sha512 (unsigned char *out, + const unsigned char *in, + unsigned long long inlen) +{ + SHA512(in, inlen, out); +} diff --git a/cbits/sha512.h b/cbits/sha512.h new file mode 100644 index 000000000..af16f5888 --- /dev/null +++ b/cbits/sha512.h @@ -0,0 +1,3 @@ +void crypto_hash_sha512 (unsigned char *out, + const unsigned char *in, + unsigned long long inlen); diff --git a/cbits/sntrup761.c b/cbits/sntrup761.c new file mode 100644 index 000000000..7d64eeb08 --- /dev/null +++ b/cbits/sntrup761.c @@ -0,0 +1,1035 @@ +/* + * Derived from public domain source, written by (in alphabetical order): + * - Daniel J. Bernstein + * - Chitchanok Chuengsatiansup + * - Tanja Lange + * - Christine van Vredendaal + */ + +#include "sntrup761.h" + +#include "sha512.h" + +/* from supercop-20201130/crypto_sort/int32/portable4/int32_minmax.inc */ +#define int32_MINMAX(a,b) \ +do { \ + int64_t ab = (int64_t)b ^ (int64_t)a; \ + int64_t c = (int64_t)b - (int64_t)a; \ + c ^= ab & (c ^ b); \ + c >>= 31; \ + c &= ab; \ + a ^= c; \ + b ^= c; \ +} while(0) + +/* from supercop-20201130/crypto_sort/int32/portable4/sort.c */ +static void +crypto_sort_int32 (void *array, long long n) +{ + long long top, p, q, r, i, j; + int32_t *x = array; + + if (n < 2) + return; + top = 1; + while (top < n - top) + top += top; + + for (p = top; p >= 1; p >>= 1) + { + i = 0; + while (i + 2 * p <= n) + { + for (j = i; j < i + p; ++j) + int32_MINMAX (x[j], x[j + p]); + i += 2 * p; + } + for (j = i; j < n - p; ++j) + int32_MINMAX (x[j], x[j + p]); + + i = 0; + j = 0; + for (q = top; q > p; q >>= 1) + { + if (j != i) + for (;;) + { + if (j == n - q) + goto done; + int32_t a = x[j + p]; + for (r = q; r > p; r >>= 1) + int32_MINMAX (a, x[j + r]); + x[j + p] = a; + ++j; + if (j == i + p) + { + i += 2 * p; + break; + } + } + while (i + p <= n - q) + { + for (j = i; j < i + p; ++j) + { + int32_t a = x[j + p]; + for (r = q; r > p; r >>= 1) + int32_MINMAX (a, x[j + r]); + x[j + p] = a; + } + i += 2 * p; + } + /* now i + p > n - q */ + j = i; + while (j < n - q) + { + int32_t a = x[j + p]; + for (r = q; r > p; r >>= 1) + int32_MINMAX (a, x[j + r]); + x[j + p] = a; + ++j; + } + + done:; + } + } +} + +/* from supercop-20201130/crypto_sort/uint32/useint32/sort.c */ + +/* can save time by vectorizing xor loops */ +/* can save time by integrating xor loops with int32_sort */ + +static void +crypto_sort_uint32 (void *array, long long n) +{ + uint32_t *x = array; + long long j; + for (j = 0; j < n; ++j) + x[j] ^= 0x80000000; + crypto_sort_int32 (array, n); + for (j = 0; j < n; ++j) + x[j] ^= 0x80000000; +} + +/* from supercop-20201130/crypto_kem/sntrup761/ref/uint32.c */ + +/* +CPU division instruction typically takes time depending on x. +This software is designed to take time independent of x. +Time still varies depending on m; user must ensure that m is constant. +Time also varies on CPUs where multiplication is variable-time. +There could be more CPU issues. +There could also be compiler issues. +*/ + +static void +uint32_divmod_uint14 (uint32_t * q, uint16_t * r, uint32_t x, uint16_t m) +{ + uint32_t v = 0x80000000; + uint32_t qpart; + uint32_t mask; + + v /= m; + + /* caller guarantees m > 0 */ + /* caller guarantees m < 16384 */ + /* vm <= 2^31 <= vm+m-1 */ + /* xvm <= 2^31 x <= xvm+x(m-1) */ + + *q = 0; + + qpart = (x * (uint64_t) v) >> 31; + /* 2^31 qpart <= xv <= 2^31 qpart + 2^31-1 */ + /* 2^31 qpart m <= xvm <= 2^31 qpart m + (2^31-1)m */ + /* 2^31 qpart m <= 2^31 x <= 2^31 qpart m + (2^31-1)m + x(m-1) */ + /* 0 <= 2^31 newx <= (2^31-1)m + x(m-1) */ + /* 0 <= newx <= (1-1/2^31)m + x(m-1)/2^31 */ + /* 0 <= newx <= (1-1/2^31)(2^14-1) + (2^32-1)((2^14-1)-1)/2^31 */ + + x -= qpart * m; + *q += qpart; + /* x <= 49146 */ + + qpart = (x * (uint64_t) v) >> 31; + /* 0 <= newx <= (1-1/2^31)m + x(m-1)/2^31 */ + /* 0 <= newx <= m + 49146(2^14-1)/2^31 */ + /* 0 <= newx <= m + 0.4 */ + /* 0 <= newx <= m */ + + x -= qpart * m; + *q += qpart; + /* x <= m */ + + x -= m; + *q += 1; + mask = -(x >> 31); + x += mask & (uint32_t) m; + *q += mask; + /* x < m */ + + *r = x; +} + + +static uint16_t +uint32_mod_uint14 (uint32_t x, uint16_t m) +{ + uint32_t q; + uint16_t r; + uint32_divmod_uint14 (&q, &r, x, m); + return r; +} + +/* from supercop-20201130/crypto_kem/sntrup761/ref/int32.c */ + +static void +int32_divmod_uint14 (int32_t * q, uint16_t * r, int32_t x, uint16_t m) +{ + uint32_t uq, uq2; + uint16_t ur, ur2; + uint32_t mask; + + uint32_divmod_uint14 (&uq, &ur, 0x80000000 + (uint32_t) x, m); + uint32_divmod_uint14 (&uq2, &ur2, 0x80000000, m); + ur -= ur2; + uq -= uq2; + mask = -(uint32_t) (ur >> 15); + ur += mask & m; + uq += mask; + *r = ur; + *q = uq; +} + + +static uint16_t +int32_mod_uint14 (int32_t x, uint16_t m) +{ + int32_t q; + uint16_t r; + int32_divmod_uint14 (&q, &r, x, m); + return r; +} + +/* from supercop-20201130/crypto_kem/sntrup761/ref/paramsmenu.h */ +#define p 761 +#define q 4591 +#define Rounded_bytes 1007 +#define Rq_bytes 1158 +#define w 286 + +/* from supercop-20201130/crypto_kem/sntrup761/ref/Decode.h */ + +/* Decode(R,s,M,len) */ +/* assumes 0 < M[i] < 16384 */ +/* produces 0 <= R[i] < M[i] */ + +/* from supercop-20201130/crypto_kem/sntrup761/ref/Decode.c */ + +static void +Decode (uint16_t * out, const unsigned char *S, const uint16_t * M, + long long len) +{ + if (len == 1) + { + if (M[0] == 1) + *out = 0; + else if (M[0] <= 256) + *out = uint32_mod_uint14 (S[0], M[0]); + else + *out = uint32_mod_uint14 (S[0] + (((uint16_t) S[1]) << 8), M[0]); + } + if (len > 1) + { + uint16_t R2[(len + 1) / 2]; + uint16_t M2[(len + 1) / 2]; + uint16_t bottomr[len / 2]; + uint32_t bottomt[len / 2]; + long long i; + for (i = 0; i < len - 1; i += 2) + { + uint32_t m = M[i] * (uint32_t) M[i + 1]; + if (m > 256 * 16383) + { + bottomt[i / 2] = 256 * 256; + bottomr[i / 2] = S[0] + 256 * S[1]; + S += 2; + M2[i / 2] = (((m + 255) >> 8) + 255) >> 8; + } + else if (m >= 16384) + { + bottomt[i / 2] = 256; + bottomr[i / 2] = S[0]; + S += 1; + M2[i / 2] = (m + 255) >> 8; + } + else + { + bottomt[i / 2] = 1; + bottomr[i / 2] = 0; + M2[i / 2] = m; + } + } + if (i < len) + M2[i / 2] = M[i]; + Decode (R2, S, M2, (len + 1) / 2); + for (i = 0; i < len - 1; i += 2) + { + uint32_t r = bottomr[i / 2]; + uint32_t r1; + uint16_t r0; + r += bottomt[i / 2] * R2[i / 2]; + uint32_divmod_uint14 (&r1, &r0, r, M[i]); + r1 = uint32_mod_uint14 (r1, M[i + 1]); /* only needed for invalid inputs */ + *out++ = r0; + *out++ = r1; + } + if (i < len) + *out++ = R2[i / 2]; + } +} + +/* from supercop-20201130/crypto_kem/sntrup761/ref/Encode.h */ + +/* Encode(s,R,M,len) */ +/* assumes 0 <= R[i] < M[i] < 16384 */ + +/* from supercop-20201130/crypto_kem/sntrup761/ref/Encode.c */ + +/* 0 <= R[i] < M[i] < 16384 */ +static void +Encode (unsigned char *out, const uint16_t * R, const uint16_t * M, + long long len) +{ + if (len == 1) + { + uint16_t r = R[0]; + uint16_t m = M[0]; + while (m > 1) + { + *out++ = r; + r >>= 8; + m = (m + 255) >> 8; + } + } + if (len > 1) + { + uint16_t R2[(len + 1) / 2]; + uint16_t M2[(len + 1) / 2]; + long long i; + for (i = 0; i < len - 1; i += 2) + { + uint32_t m0 = M[i]; + uint32_t r = R[i] + R[i + 1] * m0; + uint32_t m = M[i + 1] * m0; + while (m >= 16384) + { + *out++ = r; + r >>= 8; + m = (m + 255) >> 8; + } + R2[i / 2] = r; + M2[i / 2] = m; + } + if (i < len) + { + R2[i / 2] = R[i]; + M2[i / 2] = M[i]; + } + Encode (out, R2, M2, (len + 1) / 2); + } +} + +/* from supercop-20201130/crypto_kem/sntrup761/ref/kem.c */ + +/* ----- masks */ + +/* return -1 if x!=0; else return 0 */ +static int +int16_t_nonzero_mask (int16_t x) +{ + uint16_t u = x; /* 0, else 1...65535 */ + uint32_t v = u; /* 0, else 1...65535 */ + v = -v; /* 0, else 2^32-65535...2^32-1 */ + v >>= 31; /* 0, else 1 */ + return -v; /* 0, else -1 */ +} + +/* return -1 if x<0; otherwise return 0 */ +static int +int16_t_negative_mask (int16_t x) +{ + uint16_t u = x; + u >>= 15; + return -(int) u; + /* alternative with gcc -fwrapv: */ + /* x>>15 compiles to CPU's arithmetic right shift */ +} + +/* ----- arithmetic mod 3 */ + +typedef int8_t small; + +/* F3 is always represented as -1,0,1 */ +/* so ZZ_fromF3 is a no-op */ + +/* x must not be close to top int16_t */ +static small +F3_freeze (int16_t x) +{ + return int32_mod_uint14 (x + 1, 3) - 1; +} + +/* ----- arithmetic mod q */ + +#define q12 ((q-1)/2) +typedef int16_t Fq; +/* always represented as -q12...q12 */ +/* so ZZ_fromFq is a no-op */ + +/* x must not be close to top int32 */ +static Fq +Fq_freeze (int32_t x) +{ + return int32_mod_uint14 (x + q12, q) - q12; +} + +static Fq +Fq_recip (Fq a1) +{ + int i = 1; + Fq ai = a1; + + while (i < q - 2) + { + ai = Fq_freeze (a1 * (int32_t) ai); + i += 1; + } + return ai; +} + +/* ----- small polynomials */ + +/* 0 if Weightw_is(r), else -1 */ +static int +Weightw_mask (small * r) +{ + int weight = 0; + int i; + + for (i = 0; i < p; ++i) + weight += r[i] & 1; + return int16_t_nonzero_mask (weight - w); +} + +/* R3_fromR(R_fromRq(r)) */ +static void +R3_fromRq (small * out, const Fq * r) +{ + int i; + for (i = 0; i < p; ++i) + out[i] = F3_freeze (r[i]); +} + +/* h = f*g in the ring R3 */ +static void +R3_mult (small * h, const small * f, const small * g) +{ + small fg[p + p - 1]; + small result; + int i, j; + + for (i = 0; i < p; ++i) + { + result = 0; + for (j = 0; j <= i; ++j) + result = F3_freeze (result + f[j] * g[i - j]); + fg[i] = result; + } + for (i = p; i < p + p - 1; ++i) + { + result = 0; + for (j = i - p + 1; j < p; ++j) + result = F3_freeze (result + f[j] * g[i - j]); + fg[i] = result; + } + + for (i = p + p - 2; i >= p; --i) + { + fg[i - p] = F3_freeze (fg[i - p] + fg[i]); + fg[i - p + 1] = F3_freeze (fg[i - p + 1] + fg[i]); + } + + for (i = 0; i < p; ++i) + h[i] = fg[i]; +} + +/* returns 0 if recip succeeded; else -1 */ +static int +R3_recip (small * out, const small * in) +{ + small f[p + 1], g[p + 1], v[p + 1], r[p + 1]; + int i, loop, delta; + int sign, swap, t; + + for (i = 0; i < p + 1; ++i) + v[i] = 0; + for (i = 0; i < p + 1; ++i) + r[i] = 0; + r[0] = 1; + for (i = 0; i < p; ++i) + f[i] = 0; + f[0] = 1; + f[p - 1] = f[p] = -1; + for (i = 0; i < p; ++i) + g[p - 1 - i] = in[i]; + g[p] = 0; + + delta = 1; + + for (loop = 0; loop < 2 * p - 1; ++loop) + { + for (i = p; i > 0; --i) + v[i] = v[i - 1]; + v[0] = 0; + + sign = -g[0] * f[0]; + swap = int16_t_negative_mask (-delta) & int16_t_nonzero_mask (g[0]); + delta ^= swap & (delta ^ -delta); + delta += 1; + + for (i = 0; i < p + 1; ++i) + { + t = swap & (f[i] ^ g[i]); + f[i] ^= t; + g[i] ^= t; + t = swap & (v[i] ^ r[i]); + v[i] ^= t; + r[i] ^= t; + } + + for (i = 0; i < p + 1; ++i) + g[i] = F3_freeze (g[i] + sign * f[i]); + for (i = 0; i < p + 1; ++i) + r[i] = F3_freeze (r[i] + sign * v[i]); + + for (i = 0; i < p; ++i) + g[i] = g[i + 1]; + g[p] = 0; + } + + sign = f[0]; + for (i = 0; i < p; ++i) + out[i] = sign * v[p - 1 - i]; + + return int16_t_nonzero_mask (delta); +} + +/* ----- polynomials mod q */ + +/* h = f*g in the ring Rq */ +static void +Rq_mult_small (Fq * h, const Fq * f, const small * g) +{ + Fq fg[p + p - 1]; + Fq result; + int i, j; + + for (i = 0; i < p; ++i) + { + result = 0; + for (j = 0; j <= i; ++j) + result = Fq_freeze (result + f[j] * (int32_t) g[i - j]); + fg[i] = result; + } + for (i = p; i < p + p - 1; ++i) + { + result = 0; + for (j = i - p + 1; j < p; ++j) + result = Fq_freeze (result + f[j] * (int32_t) g[i - j]); + fg[i] = result; + } + + for (i = p + p - 2; i >= p; --i) + { + fg[i - p] = Fq_freeze (fg[i - p] + fg[i]); + fg[i - p + 1] = Fq_freeze (fg[i - p + 1] + fg[i]); + } + + for (i = 0; i < p; ++i) + h[i] = fg[i]; +} + +/* h = 3f in Rq */ +static void +Rq_mult3 (Fq * h, const Fq * f) +{ + int i; + + for (i = 0; i < p; ++i) + h[i] = Fq_freeze (3 * f[i]); +} + +/* out = 1/(3*in) in Rq */ +/* returns 0 if recip succeeded; else -1 */ +static int +Rq_recip3 (Fq * out, const small * in) +{ + Fq f[p + 1], g[p + 1], v[p + 1], r[p + 1]; + int i, loop, delta; + int swap, t; + int32_t f0, g0; + Fq scale; + + for (i = 0; i < p + 1; ++i) + v[i] = 0; + for (i = 0; i < p + 1; ++i) + r[i] = 0; + r[0] = Fq_recip (3); + for (i = 0; i < p; ++i) + f[i] = 0; + f[0] = 1; + f[p - 1] = f[p] = -1; + for (i = 0; i < p; ++i) + g[p - 1 - i] = in[i]; + g[p] = 0; + + delta = 1; + + for (loop = 0; loop < 2 * p - 1; ++loop) + { + for (i = p; i > 0; --i) + v[i] = v[i - 1]; + v[0] = 0; + + swap = int16_t_negative_mask (-delta) & int16_t_nonzero_mask (g[0]); + delta ^= swap & (delta ^ -delta); + delta += 1; + + for (i = 0; i < p + 1; ++i) + { + t = swap & (f[i] ^ g[i]); + f[i] ^= t; + g[i] ^= t; + t = swap & (v[i] ^ r[i]); + v[i] ^= t; + r[i] ^= t; + } + + f0 = f[0]; + g0 = g[0]; + for (i = 0; i < p + 1; ++i) + g[i] = Fq_freeze (f0 * g[i] - g0 * f[i]); + for (i = 0; i < p + 1; ++i) + r[i] = Fq_freeze (f0 * r[i] - g0 * v[i]); + + for (i = 0; i < p; ++i) + g[i] = g[i + 1]; + g[p] = 0; + } + + scale = Fq_recip (f[0]); + for (i = 0; i < p; ++i) + out[i] = Fq_freeze (scale * (int32_t) v[p - 1 - i]); + + return int16_t_nonzero_mask (delta); +} + +/* ----- rounded polynomials mod q */ + +static void +Round (Fq * out, const Fq * a) +{ + int i; + for (i = 0; i < p; ++i) + out[i] = a[i] - F3_freeze (a[i]); +} + +/* ----- sorting to generate short polynomial */ + +static void +Short_fromlist (small * out, const uint32_t * in) +{ + uint32_t L[p]; + int i; + + for (i = 0; i < w; ++i) + L[i] = in[i] & (uint32_t) - 2; + for (i = w; i < p; ++i) + L[i] = (in[i] & (uint32_t) - 3) | 1; + crypto_sort_uint32 (L, p); + for (i = 0; i < p; ++i) + out[i] = (L[i] & 3) - 1; +} + +/* ----- underlying hash function */ + +#define Hash_bytes 32 + +/* e.g., b = 0 means out = Hash0(in) */ +static void +Hash_prefix (unsigned char *out, int b, const unsigned char *in, int inlen) +{ + unsigned char x[inlen + 1]; + unsigned char h[64]; + int i; + + x[0] = b; + for (i = 0; i < inlen; ++i) + x[i + 1] = in[i]; + crypto_hash_sha512 (h, x, inlen + 1); + for (i = 0; i < 32; ++i) + out[i] = h[i]; +} + +/* ----- higher-level randomness */ + +static uint32_t +urandom32 (void *random_ctx, sntrup761_random_func * random) +{ + unsigned char c[4]; + uint32_t out[4]; + + random (random_ctx, 4, c); + out[0] = (uint32_t) c[0]; + out[1] = ((uint32_t) c[1]) << 8; + out[2] = ((uint32_t) c[2]) << 16; + out[3] = ((uint32_t) c[3]) << 24; + return out[0] + out[1] + out[2] + out[3]; +} + +static void +Short_random (small * out, void *random_ctx, sntrup761_random_func * random) +{ + uint32_t L[p]; + int i; + + for (i = 0; i < p; ++i) + L[i] = urandom32 (random_ctx, random); + Short_fromlist (out, L); +} + +static void +Small_random (small * out, void *random_ctx, sntrup761_random_func * random) +{ + int i; + + for (i = 0; i < p; ++i) + out[i] = (((urandom32 (random_ctx, random) & 0x3fffffff) * 3) >> 30) - 1; +} + +/* ----- Streamlined NTRU Prime Core */ + +/* h,(f,ginv) = KeyGen() */ +static void +KeyGen (Fq * h, small * f, small * ginv, void *random_ctx, + sntrup761_random_func * random) +{ + small g[p]; + Fq finv[p]; + + for (;;) + { + Small_random (g, random_ctx, random); + if (R3_recip (ginv, g) == 0) + break; + } + Short_random (f, random_ctx, random); + Rq_recip3 (finv, f); /* always works */ + Rq_mult_small (h, finv, g); +} + +/* c = Encrypt(r,h) */ +static void +Encrypt (Fq * c, const small * r, const Fq * h) +{ + Fq hr[p]; + + Rq_mult_small (hr, h, r); + Round (c, hr); +} + +/* r = Decrypt(c,(f,ginv)) */ +static void +Decrypt (small * r, const Fq * c, const small * f, const small * ginv) +{ + Fq cf[p]; + Fq cf3[p]; + small e[p]; + small ev[p]; + int mask; + int i; + + Rq_mult_small (cf, c, f); + Rq_mult3 (cf3, cf); + R3_fromRq (e, cf3); + R3_mult (ev, e, ginv); + + mask = Weightw_mask (ev); /* 0 if weight w, else -1 */ + for (i = 0; i < w; ++i) + r[i] = ((ev[i] ^ 1) & ~mask) ^ 1; + for (i = w; i < p; ++i) + r[i] = ev[i] & ~mask; +} + +/* ----- encoding small polynomials (including short polynomials) */ + +#define Small_bytes ((p+3)/4) + +/* these are the only functions that rely on p mod 4 = 1 */ + +static void +Small_encode (unsigned char *s, const small * f) +{ + small x; + int i; + + for (i = 0; i < p / 4; ++i) + { + x = *f++ + 1; + x += (*f++ + 1) << 2; + x += (*f++ + 1) << 4; + x += (*f++ + 1) << 6; + *s++ = x; + } + x = *f++ + 1; + *s++ = x; +} + +static void +Small_decode (small * f, const unsigned char *s) +{ + unsigned char x; + int i; + + for (i = 0; i < p / 4; ++i) + { + x = *s++; + *f++ = ((small) (x & 3)) - 1; + x >>= 2; + *f++ = ((small) (x & 3)) - 1; + x >>= 2; + *f++ = ((small) (x & 3)) - 1; + x >>= 2; + *f++ = ((small) (x & 3)) - 1; + } + x = *s++; + *f++ = ((small) (x & 3)) - 1; +} + +/* ----- encoding general polynomials */ + +static void +Rq_encode (unsigned char *s, const Fq * r) +{ + uint16_t R[p], M[p]; + int i; + + for (i = 0; i < p; ++i) + R[i] = r[i] + q12; + for (i = 0; i < p; ++i) + M[i] = q; + Encode (s, R, M, p); +} + +static void +Rq_decode (Fq * r, const unsigned char *s) +{ + uint16_t R[p], M[p]; + int i; + + for (i = 0; i < p; ++i) + M[i] = q; + Decode (R, s, M, p); + for (i = 0; i < p; ++i) + r[i] = ((Fq) R[i]) - q12; +} + +/* ----- encoding rounded polynomials */ + +static void +Rounded_encode (unsigned char *s, const Fq * r) +{ + uint16_t R[p], M[p]; + int i; + + for (i = 0; i < p; ++i) + R[i] = ((r[i] + q12) * 10923) >> 15; + for (i = 0; i < p; ++i) + M[i] = (q + 2) / 3; + Encode (s, R, M, p); +} + +static void +Rounded_decode (Fq * r, const unsigned char *s) +{ + uint16_t R[p], M[p]; + int i; + + for (i = 0; i < p; ++i) + M[i] = (q + 2) / 3; + Decode (R, s, M, p); + for (i = 0; i < p; ++i) + r[i] = R[i] * 3 - q12; +} + +/* ----- Streamlined NTRU Prime Core plus encoding */ + +typedef small Inputs[p]; /* passed by reference */ +#define Inputs_random Short_random +#define Inputs_encode Small_encode +#define Inputs_bytes Small_bytes + +#define Ciphertexts_bytes Rounded_bytes +#define SecretKeys_bytes (2*Small_bytes) +#define PublicKeys_bytes Rq_bytes + +/* pk,sk = ZKeyGen() */ +static void +ZKeyGen (unsigned char *pk, unsigned char *sk, void *random_ctx, + sntrup761_random_func * random) +{ + Fq h[p]; + small f[p], v[p]; + + KeyGen (h, f, v, random_ctx, random); + Rq_encode (pk, h); + Small_encode (sk, f); + sk += Small_bytes; + Small_encode (sk, v); +} + +/* C = ZEncrypt(r,pk) */ +static void +ZEncrypt (unsigned char *C, const Inputs r, const unsigned char *pk) +{ + Fq h[p]; + Fq c[p]; + Rq_decode (h, pk); + Encrypt (c, r, h); + Rounded_encode (C, c); +} + +/* r = ZDecrypt(C,sk) */ +static void +ZDecrypt (Inputs r, const unsigned char *C, const unsigned char *sk) +{ + small f[p], v[p]; + Fq c[p]; + + Small_decode (f, sk); + sk += Small_bytes; + Small_decode (v, sk); + Rounded_decode (c, C); + Decrypt (r, c, f, v); +} + +/* ----- confirmation hash */ + +#define Confirm_bytes 32 + +/* h = HashConfirm(r,pk,cache); cache is Hash4(pk) */ +static void +HashConfirm (unsigned char *h, const unsigned char *r, + /* const unsigned char *pk, */ const unsigned char *cache) +{ + unsigned char x[Hash_bytes * 2]; + int i; + + Hash_prefix (x, 3, r, Inputs_bytes); + for (i = 0; i < Hash_bytes; ++i) + x[Hash_bytes + i] = cache[i]; + Hash_prefix (h, 2, x, sizeof x); +} + +/* ----- session-key hash */ + +/* k = HashSession(b,y,z) */ +static void +HashSession (unsigned char *k, int b, const unsigned char *y, + const unsigned char *z) +{ + unsigned char x[Hash_bytes + Ciphertexts_bytes + Confirm_bytes]; + int i; + + Hash_prefix (x, 3, y, Inputs_bytes); + for (i = 0; i < Ciphertexts_bytes + Confirm_bytes; ++i) + x[Hash_bytes + i] = z[i]; + Hash_prefix (k, b, x, sizeof x); +} + +/* ----- Streamlined NTRU Prime */ + +/* pk,sk = KEM_KeyGen() */ +void +sntrup761_keypair (unsigned char *pk, unsigned char *sk, void *random_ctx, + sntrup761_random_func * random) +{ + int i; + + ZKeyGen (pk, sk, random_ctx, random); + sk += SecretKeys_bytes; + for (i = 0; i < PublicKeys_bytes; ++i) + *sk++ = pk[i]; + random (random_ctx, Inputs_bytes, sk); + sk += Inputs_bytes; + Hash_prefix (sk, 4, pk, PublicKeys_bytes); +} + +/* c,r_enc = Hide(r,pk,cache); cache is Hash4(pk) */ +static void +Hide (unsigned char *c, unsigned char *r_enc, const Inputs r, + const unsigned char *pk, const unsigned char *cache) +{ + Inputs_encode (r_enc, r); + ZEncrypt (c, r, pk); + c += Ciphertexts_bytes; + HashConfirm (c, r_enc, cache); +} + +/* c,k = Encap(pk) */ +void +sntrup761_enc (unsigned char *c, unsigned char *k, const unsigned char *pk, + void *random_ctx, sntrup761_random_func * random) +{ + Inputs r; + unsigned char r_enc[Inputs_bytes]; + unsigned char cache[Hash_bytes]; + + Hash_prefix (cache, 4, pk, PublicKeys_bytes); + Inputs_random (r, random_ctx, random); + Hide (c, r_enc, r, pk, cache); + HashSession (k, 1, r_enc, c); +} + +/* 0 if matching ciphertext+confirm, else -1 */ +static int +Ciphertexts_diff_mask (const unsigned char *c, const unsigned char *c2) +{ + uint16_t differentbits = 0; + int len = Ciphertexts_bytes + Confirm_bytes; + + while (len-- > 0) + differentbits |= (*c++) ^ (*c2++); + return (1 & ((differentbits - 1) >> 8)) - 1; +} + +/* k = Decap(c,sk) */ +void +sntrup761_dec (unsigned char *k, const unsigned char *c, const unsigned char *sk) +{ + const unsigned char *pk = sk + SecretKeys_bytes; + const unsigned char *rho = pk + PublicKeys_bytes; + const unsigned char *cache = rho + Inputs_bytes; + Inputs r; + unsigned char r_enc[Inputs_bytes]; + unsigned char cnew[Ciphertexts_bytes + Confirm_bytes]; + int mask; + int i; + + ZDecrypt (r, c, sk); + Hide (cnew, r_enc, r, pk, cache); + mask = Ciphertexts_diff_mask (c, cnew); + for (i = 0; i < Inputs_bytes; ++i) + r_enc[i] ^= mask & (r_enc[i] ^ rho[i]); + HashSession (k, 1 + mask, r_enc, c); +} diff --git a/cbits/sntrup761.h b/cbits/sntrup761.h new file mode 100644 index 000000000..4b1a23bd9 --- /dev/null +++ b/cbits/sntrup761.h @@ -0,0 +1,33 @@ +/* + * Derived from public domain source, written by (in alphabetical order): + * - Daniel J. Bernstein + * - Chitchanok Chuengsatiansup + * - Tanja Lange + * - Christine van Vredendaal + */ + +#ifndef SNTRUP761_H +#define SNTRUP761_H + +#include +#include + +#define SNTRUP761_SECRETKEY_SIZE 1763 +#define SNTRUP761_PUBLICKEY_SIZE 1158 +#define SNTRUP761_CIPHERTEXT_SIZE 1039 +#define SNTRUP761_SIZE 32 + +typedef void sntrup761_random_func (void *ctx, size_t length, uint8_t *dst); + +void +sntrup761_keypair (uint8_t *pk, uint8_t *sk, + void *random_ctx, sntrup761_random_func *random); + +void +sntrup761_enc (uint8_t *c, uint8_t *k, const uint8_t *pk, + void *random_ctx, sntrup761_random_func *random); + +void +sntrup761_dec (uint8_t *k, const uint8_t *c, const uint8_t *sk); + +#endif /* SNTRUP761_H */ diff --git a/package.yaml b/package.yaml index 9f2aa77cb..8fe00fbfa 100644 --- a/package.yaml +++ b/package.yaml @@ -20,6 +20,8 @@ category: Chat, Network, Web, System, Cryptography extra-source-files: - README.md - CHANGELOG.md + - cbits/sha512.h + - cbits/sntrup761.h dependencies: - aeson == 2.2.* @@ -88,6 +90,11 @@ when: library: source-dirs: src + c-sources: + - cbits/sha512.c + - cbits/sntrup761.c + include-dirs: cbits + extra-libraries: crypto executables: smp-server: diff --git a/simplexmq.cabal b/simplexmq.cabal index 84450caf7..c8ed1f759 100644 --- a/simplexmq.cabal +++ b/simplexmq.cabal @@ -26,6 +26,8 @@ build-type: Simple extra-source-files: README.md CHANGELOG.md + cbits/sha512.h + cbits/sntrup761.h flag swift description: Enable swift JSON format @@ -99,6 +101,10 @@ library Simplex.Messaging.Crypto.File Simplex.Messaging.Crypto.Lazy Simplex.Messaging.Crypto.Ratchet + Simplex.Messaging.Crypto.SNTRUP761.Bindings + Simplex.Messaging.Crypto.SNTRUP761.Bindings.Defines + Simplex.Messaging.Crypto.SNTRUP761.Bindings.FFI + Simplex.Messaging.Crypto.SNTRUP761.Bindings.RNG Simplex.Messaging.Encoding Simplex.Messaging.Encoding.String Simplex.Messaging.Notifications.Client @@ -149,6 +155,13 @@ library hs-source-dirs: src ghc-options: -Wall -Wcompat -Werror=incomplete-patterns -Wredundant-constraints -Wincomplete-record-updates -Wincomplete-uni-patterns -Wunused-type-patterns + include-dirs: + cbits + c-sources: + cbits/sha512.c + cbits/sntrup761.c + extra-libraries: + crypto build-depends: aeson ==2.2.* , ansi-terminal >=0.10 && <0.12 diff --git a/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings.hs b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings.hs new file mode 100644 index 000000000..0da90fbc9 --- /dev/null +++ b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings.hs @@ -0,0 +1,38 @@ +{-# LANGUAGE NamedFieldPuns #-} + +module Simplex.Messaging.Crypto.SNTRUP761.Bindings where + +import Data.ByteArray (ScrubbedBytes) +import qualified Data.ByteArray as BA +import Data.ByteString (ByteString) +import Simplex.Messaging.Crypto.SNTRUP761.Bindings.Defines +import Simplex.Messaging.Crypto.SNTRUP761.Bindings.FFI +import Simplex.Messaging.Crypto.SNTRUP761.Bindings.RNG (RNG (..)) + +type PublicKey = ByteString + +type SecretKey = ScrubbedBytes + +type Ciphertext = ByteString + +type Key = ScrubbedBytes + +sntrup761Keypair :: RNG -> IO (PublicKey, SecretKey) +sntrup761Keypair RNG {rngContext, rngFunc} = do + BA.allocRet c_SNTRUP761_SECRETKEY_SIZE $ \skPtr -> + BA.alloc c_SNTRUP761_PUBLICKEY_SIZE $ \pkPtr -> + c_sntrup761_keypair pkPtr skPtr rngContext rngFunc + +sntrup761Enc :: RNG -> PublicKey -> IO (Ciphertext, Key) +sntrup761Enc RNG {rngContext, rngFunc} pk = + BA.withByteArray pk $ \pkPtr -> + BA.allocRet c_SNTRUP761_SIZE $ \kPtr -> + BA.alloc c_SNTRUP761_CIPHERTEXT_SIZE $ \cPtr -> + c_sntrup761_enc cPtr kPtr pkPtr rngContext rngFunc + +sntrup761Dec :: Ciphertext -> SecretKey -> IO Key +sntrup761Dec c sk = + BA.withByteArray sk $ \skPtr -> + BA.withByteArray c $ \cPtr -> + BA.alloc c_SNTRUP761_SIZE $ \kPtr -> + c_sntrup761_dec kPtr cPtr skPtr diff --git a/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/Defines.hsc b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/Defines.hsc new file mode 100644 index 000000000..a7ba2a09b --- /dev/null +++ b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/Defines.hsc @@ -0,0 +1,17 @@ +module Simplex.Messaging.Crypto.SNTRUP761.Bindings.Defines where + +import Foreign.C + +#include "sntrup761.h" + +c_SNTRUP761_SECRETKEY_SIZE :: Int +c_SNTRUP761_SECRETKEY_SIZE = #{const SNTRUP761_SECRETKEY_SIZE} + +c_SNTRUP761_PUBLICKEY_SIZE :: Int +c_SNTRUP761_PUBLICKEY_SIZE = #{const SNTRUP761_PUBLICKEY_SIZE} + +c_SNTRUP761_CIPHERTEXT_SIZE :: Int +c_SNTRUP761_CIPHERTEXT_SIZE = #{const SNTRUP761_CIPHERTEXT_SIZE} + +c_SNTRUP761_SIZE :: Int +c_SNTRUP761_SIZE = #{const SNTRUP761_SIZE} diff --git a/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/FFI.hs b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/FFI.hs new file mode 100644 index 000000000..922d54eae --- /dev/null +++ b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/FFI.hs @@ -0,0 +1,23 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + +module Simplex.Messaging.Crypto.SNTRUP761.Bindings.FFI + ( c_sntrup761_keypair, + c_sntrup761_enc, + c_sntrup761_dec, + ) where + +import Foreign +import Foreign.C +import Simplex.Messaging.Crypto.SNTRUP761.Bindings.RNG (RNGContext, RNGFunc) + +-- void sntrup761_keypair (uint8_t *pk, uint8_t *sk, void *random_ctx, sntrup761_random_func *random); +foreign import ccall "sntrup761_keypair" + c_sntrup761_keypair :: Ptr Word8 -> Ptr Word8 -> RNGContext -> FunPtr RNGFunc -> IO () + +-- void sntrup761_enc (uint8_t *c, uint8_t *k, const uint8_t *pk, void *random_ctx, sntrup761_random_func *random); +foreign import ccall "sntrup761_enc" + c_sntrup761_enc :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> RNGContext -> FunPtr RNGFunc -> IO () + +-- void sntrup761_dec (uint8_t *k, const uint8_t *c, const uint8_t *sk); +foreign import ccall "sntrup761_dec" + c_sntrup761_dec :: Ptr Word8 -> Ptr Word8 -> Ptr Word8 -> IO () diff --git a/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/RNG.hs b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/RNG.hs new file mode 100644 index 000000000..4ed1d5ee0 --- /dev/null +++ b/src/Simplex/Messaging/Crypto/SNTRUP761/Bindings/RNG.hs @@ -0,0 +1,48 @@ +{-# LANGUAGE NamedFieldPuns #-} + +module Simplex.Messaging.Crypto.SNTRUP761.Bindings.RNG + ( RNG (..), + withRNG, + createRNG, + freeRNG, + RNGContext, + RNGFunc, + mkRNGFunc, + ) where + +import Foreign +import Foreign.C + +import Crypto.Random (drgNew, randomBytesGenerate) +import Data.ByteArray (ByteArrayAccess (copyByteArrayToPtr), Bytes) +import Data.IORef (atomicModifyIORef', newIORef) +import UnliftIO (bracket) + +data RNG = RNG + { rngContext :: RNGContext, + rngFunc :: FunPtr RNGFunc + } + +withRNG :: (RNG -> IO c) -> IO c +withRNG = bracket createRNG freeRNG + +createRNG :: IO RNG +createRNG = do + chachaState <- drgNew >>= newIORef -- XXX: ctxPtr could be used to store drg state, but cryptonite doesn't provide ByteAccess for ChaChaDRG + rngFunc <- mkRNGFunc $ \_ctxPtr sz buf -> do + bs <- atomicModifyIORef' chachaState $ swap . randomBytesGenerate (fromIntegral sz) :: IO Bytes + copyByteArrayToPtr bs buf + pure RNG {rngContext = nullPtr, rngFunc} + where + swap (a, b) = (b, a) + +freeRNG :: RNG -> IO () +freeRNG RNG {rngFunc} = freeHaskellFunPtr rngFunc + +type RNGContext = Ptr RNG + +-- typedef void random_func (void *ctx, size_t length, uint8_t *dst); +type RNGFunc = Ptr RNGContext -> CSize -> Ptr Word8 -> IO () + +foreign import ccall "wrapper" + mkRNGFunc :: RNGFunc -> IO (FunPtr RNGFunc) diff --git a/tests/CoreTests/CryptoTests.hs b/tests/CoreTests/CryptoTests.hs index 6ecc865c1..af613fe84 100644 --- a/tests/CoreTests/CryptoTests.hs +++ b/tests/CoreTests/CryptoTests.hs @@ -15,6 +15,8 @@ import qualified Data.Text.Lazy as LT import qualified Data.Text.Lazy.Encoding as LE import qualified Simplex.Messaging.Crypto as C import qualified Simplex.Messaging.Crypto.Lazy as LC +import Simplex.Messaging.Crypto.SNTRUP761.Bindings +import Simplex.Messaging.Crypto.SNTRUP761.Bindings.RNG import Test.Hspec import Test.Hspec.QuickCheck (modifyMaxSuccess) import Test.QuickCheck @@ -87,6 +89,8 @@ cryptoTests = do describe "Ed448" $ testEncoding C.SEd448 describe "X25519" $ testEncoding C.SX25519 describe "X448" $ testEncoding C.SX448 + describe "sntrup761" $ + it "should enc/dec key" testSNTRUP761 testPadUnpadFile :: IO () testPadUnpadFile = do @@ -197,3 +201,9 @@ testEncoding alg = it "should encode / decode key" . ioProperty $ do pure $ \(_ :: Int) -> C.decodePubKey (C.encodePubKey k) == Right k && C.decodePrivKey (C.encodePrivKey pk) == Right pk + +testSNTRUP761 :: IO () +testSNTRUP761 = withRNG $ \rng -> do + (pk, sk) <- sntrup761Keypair rng + (c, k) <- sntrup761Enc rng pk + sntrup761Dec c sk `shouldReturn` k