summaryrefslogtreecommitdiff
path: root/usr.bin/ssh
diff options
context:
space:
mode:
authorDamien Miller <djm@cvs.openbsd.org>2024-09-15 02:20:52 +0000
committerDamien Miller <djm@cvs.openbsd.org>2024-09-15 02:20:52 +0000
commit55fb48d7605ea07a625f7659ff761bcf916ffa36 (patch)
treeffa7cca9cc51d65741ed8c46d4107f27694b3360 /usr.bin/ssh
parent265cc3c6384248ea6d26ed0b9191d4b9fded5d8d (diff)
update the Streamlined NTRU Prime code from the "ref" implementation
in SUPERCOP 20201130 to the "compact" implementation in SUPERCOP 20240808. The new version is substantially faster. Thanks to Daniel J Bernstein for pointing out the new implementation (and of course for writing it). tested in snaps/ok deraadt@
Diffstat (limited to 'usr.bin/ssh')
-rw-r--r--usr.bin/ssh/kexsntrup761x25519.c6
-rw-r--r--usr.bin/ssh/sntrup761.c2884
-rw-r--r--usr.bin/ssh/sntrup761.sh57
3 files changed, 1924 insertions, 1023 deletions
diff --git a/usr.bin/ssh/kexsntrup761x25519.c b/usr.bin/ssh/kexsntrup761x25519.c
index e3f7831d39f..c2055d76c39 100644
--- a/usr.bin/ssh/kexsntrup761x25519.c
+++ b/usr.bin/ssh/kexsntrup761x25519.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: kexsntrup761x25519.c,v 1.2 2021/12/05 12:28:27 jsg Exp $ */
+/* $OpenBSD: kexsntrup761x25519.c,v 1.3 2024/09/15 02:20:51 djm Exp $ */
/*
* Copyright (c) 2019 Markus Friedl. All rights reserved.
*
@@ -35,6 +35,10 @@
#include "digest.h"
#include "ssherr.h"
+volatile crypto_int16 crypto_int16_optblocker = 0;
+volatile crypto_int32 crypto_int32_optblocker = 0;
+volatile crypto_int64 crypto_int64_optblocker = 0;
+
int
kex_kem_sntrup761x25519_keypair(struct kex *kex)
{
diff --git a/usr.bin/ssh/sntrup761.c b/usr.bin/ssh/sntrup761.c
index 3ec225a0af4..81ae5dc86d7 100644
--- a/usr.bin/ssh/sntrup761.c
+++ b/usr.bin/ssh/sntrup761.c
@@ -1,4 +1,4 @@
-/* $OpenBSD: sntrup761.c,v 1.6 2023/01/11 02:13:52 djm Exp $ */
+/* $OpenBSD: sntrup761.c,v 1.7 2024/09/15 02:20:51 djm Exp $ */
/*
* Public Domain, Authors:
@@ -11,6 +11,8 @@
#include <string.h>
#include "crypto_api.h"
+#define crypto_declassify(x, y) do {} while (0)
+
#define int8 crypto_int8
#define uint8 crypto_uint8
#define int16 crypto_int16
@@ -19,1251 +21,2133 @@
#define uint32 crypto_uint32
#define int64 crypto_int64
#define uint64 crypto_uint64
+extern volatile crypto_int16 crypto_int16_optblocker;
+extern volatile crypto_int32 crypto_int32_optblocker;
+extern volatile crypto_int64 crypto_int64_optblocker;
-/* from supercop-20201130/crypto_sort/int32/portable4/int32_minmax.inc */
-#define int32_MINMAX(a,b) \
-do { \
- int64_t ab = (int64_t)b ^ (int64_t)a; \
- int64_t c = (int64_t)b - (int64_t)a; \
- c ^= ab & (c ^ b); \
- c >>= 31; \
- c &= ab; \
- a ^= c; \
- b ^= c; \
-} while(0)
+/* from supercop-20240808/cryptoint/crypto_int16.h */
+/* auto-generated: cd cryptoint; ./autogen */
+/* cryptoint 20240806 */
-/* from supercop-20201130/crypto_sort/int32/portable4/sort.c */
+#ifndef crypto_int16_h
+#define crypto_int16_h
+#define crypto_int16 int16_t
+#define crypto_int16_unsigned uint16_t
-static void crypto_sort_int32(void *array,long long n)
-{
- long long top,p,q,r,i,j;
- int32 *x = array;
- if (n < 2) return;
- top = 1;
- while (top < n - top) top += top;
-
- for (p = top;p >= 1;p >>= 1) {
- i = 0;
- while (i + 2 * p <= n) {
- for (j = i;j < i + p;++j)
- int32_MINMAX(x[j],x[j+p]);
- i += 2 * p;
- }
- for (j = i;j < n - p;++j)
- int32_MINMAX(x[j],x[j+p]);
- i = 0;
- j = 0;
- for (q = top;q > p;q >>= 1) {
- if (j != i) for (;;) {
- if (j == n - q) goto done;
- int32 a = x[j + p];
- for (r = q;r > p;r >>= 1)
- int32_MINMAX(a,x[j + r]);
- x[j + p] = a;
- ++j;
- if (j == i + p) {
- i += 2 * p;
- break;
- }
- }
- while (i + p <= n - q) {
- for (j = i;j < i + p;++j) {
- int32 a = x[j + p];
- for (r = q;r > p;r >>= 1)
- int32_MINMAX(a,x[j+r]);
- x[j + p] = a;
- }
- i += 2 * p;
- }
- /* now i + p > n - q */
- j = i;
- while (j < n - q) {
- int32 a = x[j + p];
- for (r = q;r > p;r >>= 1)
- int32_MINMAX(a,x[j+r]);
- x[j + p] = a;
- ++j;
- }
-
- done: ;
- }
- }
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_load(const unsigned char *crypto_int16_s) {
+ crypto_int16 crypto_int16_z = 0;
+ crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0;
+ crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8;
+ return crypto_int16_z;
}
-/* from supercop-20201130/crypto_sort/uint32/useint32/sort.c */
-
-/* can save time by vectorizing xor loops */
-/* can save time by integrating xor loops with int32_sort */
-
-static void crypto_sort_uint32(void *array,long long n)
-{
- crypto_uint32 *x = array;
- long long j;
- for (j = 0;j < n;++j) x[j] ^= 0x80000000;
- crypto_sort_int32(array,n);
- for (j = 0;j < n;++j) x[j] ^= 0x80000000;
+__attribute__((unused))
+static inline
+void crypto_int16_store(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) {
+ *crypto_int16_s++ = crypto_int16_x >> 0;
+ *crypto_int16_s++ = crypto_int16_x >> 8;
}
-/* from supercop-20201130/crypto_kem/sntrup761/ref/uint32.c */
-
-/*
-CPU division instruction typically takes time depending on x.
-This software is designed to take time independent of x.
-Time still varies depending on m; user must ensure that m is constant.
-Time also varies on CPUs where multiplication is variable-time.
-There could be more CPU issues.
-There could also be compiler issues.
-*/
-
-static void uint32_divmod_uint14(uint32 *q,uint16 *r,uint32 x,uint16 m)
-{
- uint32 v = 0x80000000;
- uint32 qpart;
- uint32 mask;
-
- v /= m;
-
- /* caller guarantees m > 0 */
- /* caller guarantees m < 16384 */
- /* vm <= 2^31 <= vm+m-1 */
- /* xvm <= 2^31 x <= xvm+x(m-1) */
-
- *q = 0;
-
- qpart = (x*(uint64)v)>>31;
- /* 2^31 qpart <= xv <= 2^31 qpart + 2^31-1 */
- /* 2^31 qpart m <= xvm <= 2^31 qpart m + (2^31-1)m */
- /* 2^31 qpart m <= 2^31 x <= 2^31 qpart m + (2^31-1)m + x(m-1) */
- /* 0 <= 2^31 newx <= (2^31-1)m + x(m-1) */
- /* 0 <= newx <= (1-1/2^31)m + x(m-1)/2^31 */
- /* 0 <= newx <= (1-1/2^31)(2^14-1) + (2^32-1)((2^14-1)-1)/2^31 */
-
- x -= qpart*m; *q += qpart;
- /* x <= 49146 */
-
- qpart = (x*(uint64)v)>>31;
- /* 0 <= newx <= (1-1/2^31)m + x(m-1)/2^31 */
- /* 0 <= newx <= m + 49146(2^14-1)/2^31 */
- /* 0 <= newx <= m + 0.4 */
- /* 0 <= newx <= m */
-
- x -= qpart*m; *q += qpart;
- /* x <= m */
-
- x -= m; *q += 1;
- mask = -(x>>31);
- x += mask&(uint32)m; *q += mask;
- /* x < m */
-
- *r = x;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_negative_mask(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarw $15,%0" : "+r"(crypto_int16_x) : : "cc");
+ return crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_y;
+ __asm__ ("sbfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : );
+ return crypto_int16_y;
+#else
+ crypto_int16_x >>= 16-6;
+ crypto_int16_x ^= crypto_int16_optblocker;
+ crypto_int16_x >>= 5;
+ return crypto_int16_x;
+#endif
}
-
-static uint16 uint32_mod_uint14(uint32 x,uint16 m)
-{
- uint32 q;
- uint16 r;
- uint32_divmod_uint14(&q,&r,x,m);
- return r;
+__attribute__((unused))
+static inline
+crypto_int16_unsigned crypto_int16_unsigned_topbit_01(crypto_int16_unsigned crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("shrw $15,%0" : "+r"(crypto_int16_x) : : "cc");
+ return crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_y;
+ __asm__ ("ubfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : );
+ return crypto_int16_y;
+#else
+ crypto_int16_x >>= 16-6;
+ crypto_int16_x ^= crypto_int16_optblocker;
+ crypto_int16_x >>= 5;
+ return crypto_int16_x;
+#endif
}
-/* from supercop-20201130/crypto_kem/sntrup761/ref/int32.c */
-
-static void int32_divmod_uint14(int32 *q,uint16 *r,int32 x,uint16 m)
-{
- uint32 uq,uq2;
- uint16 ur,ur2;
- uint32 mask;
-
- uint32_divmod_uint14(&uq,&ur,0x80000000+(uint32)x,m);
- uint32_divmod_uint14(&uq2,&ur2,0x80000000,m);
- ur -= ur2; uq -= uq2;
- mask = -(uint32)(ur>>15);
- ur += mask&m; uq += mask;
- *r = ur; *q = uq;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_negative_01(crypto_int16 crypto_int16_x) {
+ return crypto_int16_unsigned_topbit_01(crypto_int16_x);
}
-
-static uint16 int32_mod_uint14(int32 x,uint16 m)
-{
- int32 q;
- uint16 r;
- int32_divmod_uint14(&q,&r,x,m);
- return r;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_topbit_mask(crypto_int16 crypto_int16_x) {
+ return crypto_int16_negative_mask(crypto_int16_x);
}
-/* from supercop-20201130/crypto_kem/sntrup761/ref/paramsmenu.h */
-/* pick one of these three: */
-#define SIZE761
-#undef SIZE653
-#undef SIZE857
-
-/* pick one of these two: */
-#define SNTRUP /* Streamlined NTRU Prime */
-#undef LPR /* NTRU LPRime */
-
-/* from supercop-20201130/crypto_kem/sntrup761/ref/params.h */
-#ifndef params_H
-#define params_H
-
-/* menu of parameter choices: */
-
-
-/* what the menu means: */
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_topbit_01(crypto_int16 crypto_int16_x) {
+ return crypto_int16_unsigned_topbit_01(crypto_int16_x);
+}
-#if defined(SIZE761)
-#define p 761
-#define q 4591
-#define Rounded_bytes 1007
-#ifndef LPR
-#define Rq_bytes 1158
-#define w 286
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_bottombit_mask(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc");
+ return -crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_y;
+ __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : );
+ return crypto_int16_y;
#else
-#define w 250
-#define tau0 2156
-#define tau1 114
-#define tau2 2007
-#define tau3 287
+ crypto_int16_x &= 1 ^ crypto_int16_optblocker;
+ return -crypto_int16_x;
#endif
+}
-#elif defined(SIZE653)
-#define p 653
-#define q 4621
-#define Rounded_bytes 865
-#ifndef LPR
-#define Rq_bytes 994
-#define w 288
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_bottombit_01(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc");
+ return crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_y;
+ __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : );
+ return crypto_int16_y;
#else
-#define w 252
-#define tau0 2175
-#define tau1 113
-#define tau2 2031
-#define tau3 290
+ crypto_int16_x &= 1 ^ crypto_int16_optblocker;
+ return crypto_int16_x;
#endif
+}
-#elif defined(SIZE857)
-#define p 857
-#define q 5167
-#define Rounded_bytes 1152
-#ifndef LPR
-#define Rq_bytes 1322
-#define w 322
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_bitinrangepublicpos_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : );
#else
-#define w 281
-#define tau0 2433
-#define tau1 101
-#define tau2 2265
-#define tau3 324
+ crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker;
#endif
+ return crypto_int16_bottombit_mask(crypto_int16_x);
+}
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_bitinrangepublicpos_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : );
#else
-#error "no parameter set defined"
+ crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker;
#endif
+ return crypto_int16_bottombit_01(crypto_int16_x);
+}
-#ifdef LPR
-#define I 256
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_shlmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16_s &= 15;
+ __asm__ ("shlw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("and %w0,%w0,15\n and %w1,%w1,65535\n lsl %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : );
+#else
+ int crypto_int16_k, crypto_int16_l;
+ for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2)
+ crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x << crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l);
#endif
+ return crypto_int16_x;
+}
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_shrmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16_s &= 15;
+ __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("and %w0,%w0,15\n sxth %w1,%w1\n asr %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : );
+#else
+ int crypto_int16_k, crypto_int16_l;
+ for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2)
+ crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x >> crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l);
#endif
-
-/* from supercop-20201130/crypto_kem/sntrup761/ref/Decode.h */
-#ifndef Decode_H
-#define Decode_H
-
-
-/* Decode(R,s,M,len) */
-/* assumes 0 < M[i] < 16384 */
-/* produces 0 <= R[i] < M[i] */
-
+ return crypto_int16_x;
+}
+
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_bitmod_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) {
+ crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s);
+ return crypto_int16_bottombit_mask(crypto_int16_x);
+}
+
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_bitmod_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) {
+ crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s);
+ return crypto_int16_bottombit_01(crypto_int16_x);
+}
+
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_nonzero_mask(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("tst %w1,65535\n csetm %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#else
+ crypto_int16_x |= -crypto_int16_x;
+ return crypto_int16_negative_mask(crypto_int16_x);
#endif
-
-/* from supercop-20201130/crypto_kem/sntrup761/ref/Decode.c */
-
-static void Decode(uint16 *out,const unsigned char *S,const uint16 *M,long long len)
-{
- if (len == 1) {
- if (M[0] == 1)
- *out = 0;
- else if (M[0] <= 256)
- *out = uint32_mod_uint14(S[0],M[0]);
- else
- *out = uint32_mod_uint14(S[0]+(((uint16)S[1])<<8),M[0]);
- }
- if (len > 1) {
- uint16 R2[(len+1)/2];
- uint16 M2[(len+1)/2];
- uint16 bottomr[len/2];
- uint32 bottomt[len/2];
- long long i;
- for (i = 0;i < len-1;i += 2) {
- uint32 m = M[i]*(uint32) M[i+1];
- if (m > 256*16383) {
- bottomt[i/2] = 256*256;
- bottomr[i/2] = S[0]+256*S[1];
- S += 2;
- M2[i/2] = (((m+255)>>8)+255)>>8;
- } else if (m >= 16384) {
- bottomt[i/2] = 256;
- bottomr[i/2] = S[0];
- S += 1;
- M2[i/2] = (m+255)>>8;
- } else {
- bottomt[i/2] = 1;
- bottomr[i/2] = 0;
- M2[i/2] = m;
- }
- }
- if (i < len)
- M2[i/2] = M[i];
- Decode(R2,S,M2,(len+1)/2);
- for (i = 0;i < len-1;i += 2) {
- uint32 r = bottomr[i/2];
- uint32 r1;
- uint16 r0;
- r += bottomt[i/2]*R2[i/2];
- uint32_divmod_uint14(&r1,&r0,r,M[i]);
- r1 = uint32_mod_uint14(r1,M[i+1]); /* only needed for invalid inputs */
- *out++ = r0;
- *out++ = r1;
- }
- if (i < len)
- *out++ = R2[i/2];
- }
}
-/* from supercop-20201130/crypto_kem/sntrup761/ref/Encode.h */
-#ifndef Encode_H
-#define Encode_H
-
-
-/* Encode(s,R,M,len) */
-/* assumes 0 <= R[i] < M[i] < 16384 */
-
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_nonzero_01(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("tst %w1,65535\n cset %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#else
+ crypto_int16_x |= -crypto_int16_x;
+ return crypto_int16_unsigned_topbit_01(crypto_int16_x);
#endif
-
-/* from supercop-20201130/crypto_kem/sntrup761/ref/Encode.c */
-
-/* 0 <= R[i] < M[i] < 16384 */
-static void Encode(unsigned char *out,const uint16 *R,const uint16 *M,long long len)
-{
- if (len == 1) {
- uint16 r = R[0];
- uint16 m = M[0];
- while (m > 1) {
- *out++ = r;
- r >>= 8;
- m = (m+255)>>8;
- }
- }
- if (len > 1) {
- uint16 R2[(len+1)/2];
- uint16 M2[(len+1)/2];
- long long i;
- for (i = 0;i < len-1;i += 2) {
- uint32 m0 = M[i];
- uint32 r = R[i]+R[i+1]*m0;
- uint32 m = M[i+1]*m0;
- while (m >= 16384) {
- *out++ = r;
- r >>= 8;
- m = (m+255)>>8;
- }
- R2[i/2] = r;
- M2[i/2] = m;
- }
- if (i < len) {
- R2[i/2] = R[i];
- M2[i/2] = M[i];
- }
- Encode(out,R2,M2,(len+1)/2);
- }
}
-/* from supercop-20201130/crypto_kem/sntrup761/ref/kem.c */
-
-#ifdef LPR
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_positive_mask(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("sxth %w0,%w1\n cmp %w0,0\n csetm %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#else
+ crypto_int16 crypto_int16_z = -crypto_int16_x;
+ crypto_int16_z ^= crypto_int16_x & crypto_int16_z;
+ return crypto_int16_negative_mask(crypto_int16_z);
#endif
-
-
-/* ----- masks */
-
-#ifndef LPR
-
-/* return -1 if x!=0; else return 0 */
-static int int16_nonzero_mask(int16 x)
-{
- uint16 u = x; /* 0, else 1...65535 */
- uint32 v = u; /* 0, else 1...65535 */
- v = -v; /* 0, else 2^32-65535...2^32-1 */
- v >>= 31; /* 0, else 1 */
- return -v; /* 0, else -1 */
}
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_positive_01(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("sxth %w0,%w1\n cmp %w0,0\n cset %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#else
+ crypto_int16 crypto_int16_z = -crypto_int16_x;
+ crypto_int16_z ^= crypto_int16_x & crypto_int16_z;
+ return crypto_int16_unsigned_topbit_01(crypto_int16_z);
#endif
-
-/* return -1 if x<0; otherwise return 0 */
-static int int16_negative_mask(int16 x)
-{
- uint16 u = x;
- u >>= 15;
- return -(int) u;
- /* alternative with gcc -fwrapv: */
- /* x>>15 compiles to CPU's arithmetic right shift */
}
-/* ----- arithmetic mod 3 */
-
-typedef int8 small;
-
-/* F3 is always represented as -1,0,1 */
-/* so ZZ_fromF3 is a no-op */
-
-/* x must not be close to top int16 */
-static small F3_freeze(int16 x)
-{
- return int32_mod_uint14(x+1,3)-1;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_zero_mask(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("tst %w1,65535\n csetm %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#else
+ return ~crypto_int16_nonzero_mask(crypto_int16_x);
+#endif
}
-/* ----- arithmetic mod q */
-
-#define q12 ((q-1)/2)
-typedef int16 Fq;
-/* always represented as -q12...q12 */
-/* so ZZ_fromFq is a no-op */
-
-/* x must not be close to top int32 */
-static Fq Fq_freeze(int32 x)
-{
- return int32_mod_uint14(x+q12,q)-q12;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_zero_01(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("tst %w1,65535\n cset %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc");
+ return crypto_int16_z;
+#else
+ return 1-crypto_int16_nonzero_01(crypto_int16_x);
+#endif
}
-#ifndef LPR
-
-static Fq Fq_recip(Fq a1)
-{
- int i = 1;
- Fq ai = a1;
-
- while (i < q-2) {
- ai = Fq_freeze(a1*(int32)ai);
- i += 1;
- }
- return ai;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_unequal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ return crypto_int16_nonzero_mask(crypto_int16_x ^ crypto_int16_y);
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_unequal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ return crypto_int16_nonzero_01(crypto_int16_x ^ crypto_int16_y);
#endif
-
-/* ----- Top and Right */
-
-#ifdef LPR
-#define tau 16
-
-static int8 Top(Fq C)
-{
- return (tau1*(int32)(C+tau0)+16384)>>15;
}
-static Fq Right(int8 T)
-{
- return Fq_freeze(tau3*(int32)T-tau2);
-}
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_equal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ return ~crypto_int16_unequal_mask(crypto_int16_x,crypto_int16_y);
#endif
-
-/* ----- small polynomials */
-
-#ifndef LPR
-
-/* 0 if Weightw_is(r), else -1 */
-static int Weightw_mask(small *r)
-{
- int weight = 0;
- int i;
-
- for (i = 0;i < p;++i) weight += r[i]&1;
- return int16_nonzero_mask(weight-w);
}
-/* R3_fromR(R_fromRq(r)) */
-static void R3_fromRq(small *out,const Fq *r)
-{
- int i;
- for (i = 0;i < p;++i) out[i] = F3_freeze(r[i]);
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_equal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ return 1-crypto_int16_unequal_01(crypto_int16_x,crypto_int16_y);
+#endif
}
-/* h = f*g in the ring R3 */
-static void R3_mult(small *h,const small *f,const small *g)
-{
- small fg[p+p-1];
- small result;
- int i,j;
-
- for (i = 0;i < p;++i) {
- result = 0;
- for (j = 0;j <= i;++j) result = F3_freeze(result+f[j]*g[i-j]);
- fg[i] = result;
- }
- for (i = p;i < p+p-1;++i) {
- result = 0;
- for (j = i-p+1;j < p;++j) result = F3_freeze(result+f[j]*g[i-j]);
- fg[i] = result;
- }
-
- for (i = p+p-2;i >= p;--i) {
- fg[i-p] = F3_freeze(fg[i-p]+fg[i]);
- fg[i-p+1] = F3_freeze(fg[i-p+1]+fg[i]);
- }
-
- for (i = 0;i < p;++i) h[i] = fg[i];
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_min(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("cmpw %1,%0\n cmovgw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc");
+ return crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w0,%w1,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc");
+ return crypto_int16_x;
+#else
+ crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x;
+ crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x;
+ crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y);
+ crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z);
+ crypto_int16_z &= crypto_int16_r;
+ return crypto_int16_x ^ crypto_int16_z;
+#endif
}
-/* returns 0 if recip succeeded; else -1 */
-static int R3_recip(small *out,const small *in)
-{
- small f[p+1],g[p+1],v[p+1],r[p+1];
- int i,loop,delta;
- int sign,swap,t;
-
- for (i = 0;i < p+1;++i) v[i] = 0;
- for (i = 0;i < p+1;++i) r[i] = 0;
- r[0] = 1;
- for (i = 0;i < p;++i) f[i] = 0;
- f[0] = 1; f[p-1] = f[p] = -1;
- for (i = 0;i < p;++i) g[p-1-i] = in[i];
- g[p] = 0;
-
- delta = 1;
-
- for (loop = 0;loop < 2*p-1;++loop) {
- for (i = p;i > 0;--i) v[i] = v[i-1];
- v[0] = 0;
-
- sign = -g[0]*f[0];
- swap = int16_negative_mask(-delta) & int16_nonzero_mask(g[0]);
- delta ^= swap&(delta^-delta);
- delta += 1;
-
- for (i = 0;i < p+1;++i) {
- t = swap&(f[i]^g[i]); f[i] ^= t; g[i] ^= t;
- t = swap&(v[i]^r[i]); v[i] ^= t; r[i] ^= t;
- }
-
- for (i = 0;i < p+1;++i) g[i] = F3_freeze(g[i]+sign*f[i]);
- for (i = 0;i < p+1;++i) r[i] = F3_freeze(r[i]+sign*v[i]);
-
- for (i = 0;i < p;++i) g[i] = g[i+1];
- g[p] = 0;
- }
-
- sign = f[0];
- for (i = 0;i < p;++i) out[i] = sign*v[p-1-i];
-
- return int16_nonzero_mask(delta);
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_max(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("cmpw %1,%0\n cmovlw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc");
+ return crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w1,%w0,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc");
+ return crypto_int16_x;
+#else
+ crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x;
+ crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x;
+ crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y);
+ crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z);
+ crypto_int16_z &= crypto_int16_r;
+ return crypto_int16_y ^ crypto_int16_z;
+#endif
}
+__attribute__((unused))
+static inline
+void crypto_int16_minmax(crypto_int16 *crypto_int16_p,crypto_int16 *crypto_int16_q) {
+ crypto_int16 crypto_int16_x = *crypto_int16_p;
+ crypto_int16 crypto_int16_y = *crypto_int16_q;
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("cmpw %2,%1\n movw %1,%0\n cmovgw %2,%1\n cmovgw %0,%2" : "=&r"(crypto_int16_z), "+&r"(crypto_int16_x), "+r"(crypto_int16_y) : : "cc");
+ *crypto_int16_p = crypto_int16_x;
+ *crypto_int16_q = crypto_int16_y;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_r, crypto_int16_s;
+ __asm__ ("sxth %w0,%w0\n cmp %w0,%w3,sxth\n csel %w1,%w0,%w3,lt\n csel %w2,%w3,%w0,lt" : "+&r"(crypto_int16_x), "=&r"(crypto_int16_r), "=r"(crypto_int16_s) : "r"(crypto_int16_y) : "cc");
+ *crypto_int16_p = crypto_int16_r;
+ *crypto_int16_q = crypto_int16_s;
+#else
+ crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x;
+ crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x;
+ crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y);
+ crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z);
+ crypto_int16_z &= crypto_int16_r;
+ crypto_int16_x ^= crypto_int16_z;
+ crypto_int16_y ^= crypto_int16_z;
+ *crypto_int16_p = crypto_int16_x;
+ *crypto_int16_q = crypto_int16_y;
#endif
+}
-/* ----- polynomials mod q */
-
-/* h = f*g in the ring Rq */
-static void Rq_mult_small(Fq *h,const Fq *f,const small *g)
-{
- Fq fg[p+p-1];
- Fq result;
- int i,j;
-
- for (i = 0;i < p;++i) {
- result = 0;
- for (j = 0;j <= i;++j) result = Fq_freeze(result+f[j]*(int32)g[i-j]);
- fg[i] = result;
- }
- for (i = p;i < p+p-1;++i) {
- result = 0;
- for (j = i-p+1;j < p;++j) result = Fq_freeze(result+f[j]*(int32)g[i-j]);
- fg[i] = result;
- }
-
- for (i = p+p-2;i >= p;--i) {
- fg[i-p] = Fq_freeze(fg[i-p]+fg[i]);
- fg[i-p+1] = Fq_freeze(fg[i-p+1]+fg[i]);
- }
-
- for (i = 0;i < p;++i) h[i] = fg[i];
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_smaller_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y;
+ crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y;
+ crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x);
+ return crypto_int16_negative_mask(crypto_int16_z);
+#endif
}
-#ifndef LPR
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_smaller_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y;
+ crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y;
+ crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x);
+ return crypto_int16_unsigned_topbit_01(crypto_int16_z);
+#endif
+}
-/* h = 3f in Rq */
-static void Rq_mult3(Fq *h,const Fq *f)
-{
- int i;
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_leq_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ return ~crypto_int16_smaller_mask(crypto_int16_y,crypto_int16_x);
+#endif
+}
- for (i = 0;i < p;++i) h[i] = Fq_freeze(3*f[i]);
+__attribute__((unused))
+static inline
+crypto_int16 crypto_int16_leq_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 crypto_int16_q,crypto_int16_z;
+ __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int16 crypto_int16_z;
+ __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc");
+ return crypto_int16_z;
+#else
+ return 1-crypto_int16_smaller_01(crypto_int16_y,crypto_int16_x);
+#endif
}
-/* out = 1/(3*in) in Rq */
-/* returns 0 if recip succeeded; else -1 */
-static int Rq_recip3(Fq *out,const small *in)
-{
- Fq f[p+1],g[p+1],v[p+1],r[p+1];
- int i,loop,delta;
- int swap,t;
- int32 f0,g0;
- Fq scale;
-
- for (i = 0;i < p+1;++i) v[i] = 0;
- for (i = 0;i < p+1;++i) r[i] = 0;
- r[0] = Fq_recip(3);
- for (i = 0;i < p;++i) f[i] = 0;
- f[0] = 1; f[p-1] = f[p] = -1;
- for (i = 0;i < p;++i) g[p-1-i] = in[i];
- g[p] = 0;
+__attribute__((unused))
+static inline
+int crypto_int16_ones_num(crypto_int16 crypto_int16_x) {
+ crypto_int16_unsigned crypto_int16_y = crypto_int16_x;
+ const crypto_int16 C0 = 0x5555;
+ const crypto_int16 C1 = 0x3333;
+ const crypto_int16 C2 = 0x0f0f;
+ crypto_int16_y -= ((crypto_int16_y >> 1) & C0);
+ crypto_int16_y = (crypto_int16_y & C1) + ((crypto_int16_y >> 2) & C1);
+ crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 4)) & C2;
+ crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 8)) & 0xff;
+ return crypto_int16_y;
+}
+
+__attribute__((unused))
+static inline
+int crypto_int16_bottomzeros_num(crypto_int16 crypto_int16_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int16 fallback = 16;
+ __asm__ ("bsfw %0,%0\n cmovew %1,%0" : "+&r"(crypto_int16_x) : "r"(fallback) : "cc");
+ return crypto_int16_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ int64_t crypto_int16_y;
+ __asm__ ("orr %w0,%w1,-65536\n rbit %w0,%w0\n clz %w0,%w0" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : );
+ return crypto_int16_y;
+#else
+ crypto_int16 crypto_int16_y = crypto_int16_x ^ (crypto_int16_x-1);
+ crypto_int16_y = ((crypto_int16) crypto_int16_y) >> 1;
+ crypto_int16_y &= ~(crypto_int16_x & (((crypto_int16) 1) << (16-1)));
+ return crypto_int16_ones_num(crypto_int16_y);
+#endif
+}
- delta = 1;
+#endif
- for (loop = 0;loop < 2*p-1;++loop) {
- for (i = p;i > 0;--i) v[i] = v[i-1];
- v[0] = 0;
+/* from supercop-20240808/cryptoint/crypto_int32.h */
+/* auto-generated: cd cryptoint; ./autogen */
+/* cryptoint 20240806 */
- swap = int16_negative_mask(-delta) & int16_nonzero_mask(g[0]);
- delta ^= swap&(delta^-delta);
- delta += 1;
+#ifndef crypto_int32_h
+#define crypto_int32_h
- for (i = 0;i < p+1;++i) {
- t = swap&(f[i]^g[i]); f[i] ^= t; g[i] ^= t;
- t = swap&(v[i]^r[i]); v[i] ^= t; r[i] ^= t;
- }
+#define crypto_int32 int32_t
+#define crypto_int32_unsigned uint32_t
- f0 = f[0];
- g0 = g[0];
- for (i = 0;i < p+1;++i) g[i] = Fq_freeze(f0*g[i]-g0*f[i]);
- for (i = 0;i < p+1;++i) r[i] = Fq_freeze(f0*r[i]-g0*v[i]);
- for (i = 0;i < p;++i) g[i] = g[i+1];
- g[p] = 0;
- }
- scale = Fq_recip(f[0]);
- for (i = 0;i < p;++i) out[i] = Fq_freeze(scale*(int32)v[p-1-i]);
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_load(const unsigned char *crypto_int32_s) {
+ crypto_int32 crypto_int32_z = 0;
+ crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0;
+ crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8;
+ crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16;
+ crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24;
+ return crypto_int32_z;
+}
- return int16_nonzero_mask(delta);
+__attribute__((unused))
+static inline
+void crypto_int32_store(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) {
+ *crypto_int32_s++ = crypto_int32_x >> 0;
+ *crypto_int32_s++ = crypto_int32_x >> 8;
+ *crypto_int32_s++ = crypto_int32_x >> 16;
+ *crypto_int32_s++ = crypto_int32_x >> 24;
}
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_negative_mask(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarl $31,%0" : "+r"(crypto_int32_x) : : "cc");
+ return crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_y;
+ __asm__ ("asr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : );
+ return crypto_int32_y;
+#else
+ crypto_int32_x >>= 32-6;
+ crypto_int32_x ^= crypto_int32_optblocker;
+ crypto_int32_x >>= 5;
+ return crypto_int32_x;
#endif
-
-/* ----- rounded polynomials mod q */
-
-static void Round(Fq *out,const Fq *a)
-{
- int i;
- for (i = 0;i < p;++i) out[i] = a[i]-F3_freeze(a[i]);
}
-/* ----- sorting to generate short polynomial */
-
-static void Short_fromlist(small *out,const uint32 *in)
-{
- uint32 L[p];
- int i;
-
- for (i = 0;i < w;++i) L[i] = in[i]&(uint32)-2;
- for (i = w;i < p;++i) L[i] = (in[i]&(uint32)-3)|1;
- crypto_sort_uint32(L,p);
- for (i = 0;i < p;++i) out[i] = (L[i]&3)-1;
+__attribute__((unused))
+static inline
+crypto_int32_unsigned crypto_int32_unsigned_topbit_01(crypto_int32_unsigned crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("shrl $31,%0" : "+r"(crypto_int32_x) : : "cc");
+ return crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_y;
+ __asm__ ("lsr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : );
+ return crypto_int32_y;
+#else
+ crypto_int32_x >>= 32-6;
+ crypto_int32_x ^= crypto_int32_optblocker;
+ crypto_int32_x >>= 5;
+ return crypto_int32_x;
+#endif
}
-/* ----- underlying hash function */
-
-#define Hash_bytes 32
-
-/* e.g., b = 0 means out = Hash0(in) */
-static void Hash_prefix(unsigned char *out,int b,const unsigned char *in,int inlen)
-{
- unsigned char x[inlen+1];
- unsigned char h[64];
- int i;
-
- x[0] = b;
- for (i = 0;i < inlen;++i) x[i+1] = in[i];
- crypto_hash_sha512(h,x,inlen+1);
- for (i = 0;i < 32;++i) out[i] = h[i];
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_negative_01(crypto_int32 crypto_int32_x) {
+ return crypto_int32_unsigned_topbit_01(crypto_int32_x);
}
-/* ----- higher-level randomness */
-
-static uint32 urandom32(void)
-{
- unsigned char c[4];
- uint32 out[4];
-
- randombytes(c,4);
- out[0] = (uint32)c[0];
- out[1] = ((uint32)c[1])<<8;
- out[2] = ((uint32)c[2])<<16;
- out[3] = ((uint32)c[3])<<24;
- return out[0]+out[1]+out[2]+out[3];
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_topbit_mask(crypto_int32 crypto_int32_x) {
+ return crypto_int32_negative_mask(crypto_int32_x);
}
-static void Short_random(small *out)
-{
- uint32 L[p];
- int i;
-
- for (i = 0;i < p;++i) L[i] = urandom32();
- Short_fromlist(out,L);
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_topbit_01(crypto_int32 crypto_int32_x) {
+ return crypto_int32_unsigned_topbit_01(crypto_int32_x);
}
-#ifndef LPR
-
-static void Small_random(small *out)
-{
- int i;
-
- for (i = 0;i < p;++i) out[i] = (((urandom32()&0x3fffffff)*3)>>30)-1;
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_bottombit_mask(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc");
+ return -crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_y;
+ __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : );
+ return crypto_int32_y;
+#else
+ crypto_int32_x &= 1 ^ crypto_int32_optblocker;
+ return -crypto_int32_x;
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_bottombit_01(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc");
+ return crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_y;
+ __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : );
+ return crypto_int32_y;
+#else
+ crypto_int32_x &= 1 ^ crypto_int32_optblocker;
+ return crypto_int32_x;
#endif
+}
-/* ----- Streamlined NTRU Prime Core */
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_bitinrangepublicpos_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : );
+#else
+ crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker;
+#endif
+ return crypto_int32_bottombit_mask(crypto_int32_x);
+}
-#ifndef LPR
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_bitinrangepublicpos_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : );
+#else
+ crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker;
+#endif
+ return crypto_int32_bottombit_01(crypto_int32_x);
+}
-/* h,(f,ginv) = KeyGen() */
-static void KeyGen(Fq *h,small *f,small *ginv)
-{
- small g[p];
- Fq finv[p];
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_shlmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("shll %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("lsl %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : );
+#else
+ int crypto_int32_k, crypto_int32_l;
+ for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2)
+ crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x << crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l);
+#endif
+ return crypto_int32_x;
+}
- for (;;) {
- Small_random(g);
- if (R3_recip(ginv,g) == 0) break;
- }
- Short_random(f);
- Rq_recip3(finv,f); /* always works */
- Rq_mult_small(h,finv,g);
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_shrmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : );
+#else
+ int crypto_int32_k, crypto_int32_l;
+ for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2)
+ crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x >> crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l);
+#endif
+ return crypto_int32_x;
+}
+
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_bitmod_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) {
+ crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s);
+ return crypto_int32_bottombit_mask(crypto_int32_x);
+}
+
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_bitmod_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) {
+ crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s);
+ return crypto_int32_bottombit_01(crypto_int32_x);
+}
+
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_nonzero_mask(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,0\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#else
+ crypto_int32_x |= -crypto_int32_x;
+ return crypto_int32_negative_mask(crypto_int32_x);
+#endif
}
-/* c = Encrypt(r,h) */
-static void Encrypt(Fq *c,const small *r,const Fq *h)
-{
- Fq hr[p];
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_nonzero_01(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,0\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#else
+ crypto_int32_x |= -crypto_int32_x;
+ return crypto_int32_unsigned_topbit_01(crypto_int32_x);
+#endif
+}
- Rq_mult_small(hr,h,r);
- Round(c,hr);
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_positive_mask(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,0\n csetm %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#else
+ crypto_int32 crypto_int32_z = -crypto_int32_x;
+ crypto_int32_z ^= crypto_int32_x & crypto_int32_z;
+ return crypto_int32_negative_mask(crypto_int32_z);
+#endif
}
-/* r = Decrypt(c,(f,ginv)) */
-static void Decrypt(small *r,const Fq *c,const small *f,const small *ginv)
-{
- Fq cf[p];
- Fq cf3[p];
- small e[p];
- small ev[p];
- int mask;
- int i;
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_positive_01(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,0\n cset %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#else
+ crypto_int32 crypto_int32_z = -crypto_int32_x;
+ crypto_int32_z ^= crypto_int32_x & crypto_int32_z;
+ return crypto_int32_unsigned_topbit_01(crypto_int32_z);
+#endif
+}
- Rq_mult_small(cf,c,f);
- Rq_mult3(cf3,cf);
- R3_fromRq(e,cf3);
- R3_mult(ev,e,ginv);
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_zero_mask(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,0\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#else
+ return ~crypto_int32_nonzero_mask(crypto_int32_x);
+#endif
+}
- mask = Weightw_mask(ev); /* 0 if weight w, else -1 */
- for (i = 0;i < w;++i) r[i] = ((ev[i]^1)&~mask)^1;
- for (i = w;i < p;++i) r[i] = ev[i]&~mask;
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_zero_01(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,0\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc");
+ return crypto_int32_z;
+#else
+ return 1-crypto_int32_nonzero_01(crypto_int32_x);
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_unequal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ return crypto_int32_nonzero_mask(crypto_int32_x ^ crypto_int32_y);
#endif
+}
-/* ----- NTRU LPRime Core */
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_unequal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ return crypto_int32_nonzero_01(crypto_int32_x ^ crypto_int32_y);
+#endif
+}
-#ifdef LPR
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_equal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ return ~crypto_int32_unequal_mask(crypto_int32_x,crypto_int32_y);
+#endif
+}
-/* (G,A),a = KeyGen(G); leaves G unchanged */
-static void KeyGen(Fq *A,small *a,const Fq *G)
-{
- Fq aG[p];
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_equal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ return 1-crypto_int32_unequal_01(crypto_int32_x,crypto_int32_y);
+#endif
+}
- Short_random(a);
- Rq_mult_small(aG,G,a);
- Round(A,aG);
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_min(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("cmpl %1,%0\n cmovgl %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc");
+ return crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("cmp %w0,%w1\n csel %w0,%w0,%w1,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc");
+ return crypto_int32_x;
+#else
+ crypto_int32 crypto_int32_r = crypto_int32_y ^ crypto_int32_x;
+ crypto_int32 crypto_int32_z = crypto_int32_y - crypto_int32_x;
+ crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y);
+ crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z);
+ crypto_int32_z &= crypto_int32_r;
+ return crypto_int32_x ^ crypto_int32_z;
+#endif
}
-/* B,T = Encrypt(r,(G,A),b) */
-static void Encrypt(Fq *B,int8 *T,const int8 *r,const Fq *G,const Fq *A,const small *b)
-{
- Fq bG[p];
- Fq bA[p];
- int i;
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_max(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("cmpl %1,%0\n cmovll %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc");
+ return crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("cmp %w0,%w1\n csel %w0,%w1,%w0,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc");
+ return crypto_int32_x;
+#else
+ crypto_int32 crypto_int32_r = crypto_int32_y ^ crypto_int32_x;
+ crypto_int32 crypto_int32_z = crypto_int32_y - crypto_int32_x;
+ crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y);
+ crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z);
+ crypto_int32_z &= crypto_int32_r;
+ return crypto_int32_y ^ crypto_int32_z;
+#endif
+}
- Rq_mult_small(bG,G,b);
- Round(B,bG);
- Rq_mult_small(bA,A,b);
- for (i = 0;i < I;++i) T[i] = Top(Fq_freeze(bA[i]+r[i]*q12));
+__attribute__((unused))
+static inline
+void crypto_int32_minmax(crypto_int32 *crypto_int32_p,crypto_int32 *crypto_int32_q) {
+ crypto_int32 crypto_int32_x = *crypto_int32_p;
+ crypto_int32 crypto_int32_y = *crypto_int32_q;
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmpl %2,%1\n movl %1,%0\n cmovgl %2,%1\n cmovgl %0,%2" : "=&r"(crypto_int32_z), "+&r"(crypto_int32_x), "+r"(crypto_int32_y) : : "cc");
+ *crypto_int32_p = crypto_int32_x;
+ *crypto_int32_q = crypto_int32_y;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_r, crypto_int32_s;
+ __asm__ ("cmp %w2,%w3\n csel %w0,%w2,%w3,lt\n csel %w1,%w3,%w2,lt" : "=&r"(crypto_int32_r), "=r"(crypto_int32_s) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ *crypto_int32_p = crypto_int32_r;
+ *crypto_int32_q = crypto_int32_s;
+#else
+ crypto_int32 crypto_int32_r = crypto_int32_y ^ crypto_int32_x;
+ crypto_int32 crypto_int32_z = crypto_int32_y - crypto_int32_x;
+ crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y);
+ crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z);
+ crypto_int32_z &= crypto_int32_r;
+ crypto_int32_x ^= crypto_int32_z;
+ crypto_int32_y ^= crypto_int32_z;
+ *crypto_int32_p = crypto_int32_x;
+ *crypto_int32_q = crypto_int32_y;
+#endif
}
-/* r = Decrypt((B,T),a) */
-static void Decrypt(int8 *r,const Fq *B,const int8 *T,const small *a)
-{
- Fq aB[p];
- int i;
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_smaller_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n csetm %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y;
+ crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y;
+ crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x);
+ return crypto_int32_negative_mask(crypto_int32_z);
+#endif
+}
- Rq_mult_small(aB,B,a);
- for (i = 0;i < I;++i)
- r[i] = -int16_negative_mask(Fq_freeze(Right(T[i])-aB[i]+4*w+1));
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_smaller_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n cset %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y;
+ crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y;
+ crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x);
+ return crypto_int32_unsigned_topbit_01(crypto_int32_z);
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_leq_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n csetm %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ return ~crypto_int32_smaller_mask(crypto_int32_y,crypto_int32_x);
#endif
+}
-/* ----- encoding I-bit inputs */
+__attribute__((unused))
+static inline
+crypto_int32 crypto_int32_leq_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 crypto_int32_q,crypto_int32_z;
+ __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int32 crypto_int32_z;
+ __asm__ ("cmp %w1,%w2\n cset %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc");
+ return crypto_int32_z;
+#else
+ return 1-crypto_int32_smaller_01(crypto_int32_y,crypto_int32_x);
+#endif
+}
-#ifdef LPR
+__attribute__((unused))
+static inline
+int crypto_int32_ones_num(crypto_int32 crypto_int32_x) {
+ crypto_int32_unsigned crypto_int32_y = crypto_int32_x;
+ const crypto_int32 C0 = 0x55555555;
+ const crypto_int32 C1 = 0x33333333;
+ const crypto_int32 C2 = 0x0f0f0f0f;
+ crypto_int32_y -= ((crypto_int32_y >> 1) & C0);
+ crypto_int32_y = (crypto_int32_y & C1) + ((crypto_int32_y >> 2) & C1);
+ crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 4)) & C2;
+ crypto_int32_y += crypto_int32_y >> 8;
+ crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 16)) & 0xff;
+ return crypto_int32_y;
+}
+
+__attribute__((unused))
+static inline
+int crypto_int32_bottomzeros_num(crypto_int32 crypto_int32_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int32 fallback = 32;
+ __asm__ ("bsfl %0,%0\n cmovel %1,%0" : "+&r"(crypto_int32_x) : "r"(fallback) : "cc");
+ return crypto_int32_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ int64_t crypto_int32_y;
+ __asm__ ("rbit %w0,%w1\n clz %w0,%w0" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : );
+ return crypto_int32_y;
+#else
+ crypto_int32 crypto_int32_y = crypto_int32_x ^ (crypto_int32_x-1);
+ crypto_int32_y = ((crypto_int32) crypto_int32_y) >> 1;
+ crypto_int32_y &= ~(crypto_int32_x & (((crypto_int32) 1) << (32-1)));
+ return crypto_int32_ones_num(crypto_int32_y);
+#endif
+}
-#define Inputs_bytes (I/8)
-typedef int8 Inputs[I]; /* passed by reference */
+#endif
-static void Inputs_encode(unsigned char *s,const Inputs r)
-{
- int i;
- for (i = 0;i < Inputs_bytes;++i) s[i] = 0;
- for (i = 0;i < I;++i) s[i>>3] |= r[i]<<(i&7);
+/* from supercop-20240808/cryptoint/crypto_int64.h */
+/* auto-generated: cd cryptoint; ./autogen */
+/* cryptoint 20240806 */
+
+#ifndef crypto_int64_h
+#define crypto_int64_h
+
+#define crypto_int64 int64_t
+#define crypto_int64_unsigned uint64_t
+
+
+
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_load(const unsigned char *crypto_int64_s) {
+ crypto_int64 crypto_int64_z = 0;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48;
+ crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56;
+ return crypto_int64_z;
+}
+
+__attribute__((unused))
+static inline
+void crypto_int64_store(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) {
+ *crypto_int64_s++ = crypto_int64_x >> 0;
+ *crypto_int64_s++ = crypto_int64_x >> 8;
+ *crypto_int64_s++ = crypto_int64_x >> 16;
+ *crypto_int64_s++ = crypto_int64_x >> 24;
+ *crypto_int64_s++ = crypto_int64_x >> 32;
+ *crypto_int64_s++ = crypto_int64_x >> 40;
+ *crypto_int64_s++ = crypto_int64_x >> 48;
+ *crypto_int64_s++ = crypto_int64_x >> 56;
+}
+
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_negative_mask(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarq $63,%0" : "+r"(crypto_int64_x) : : "cc");
+ return crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_y;
+ __asm__ ("asr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : );
+ return crypto_int64_y;
+#else
+ crypto_int64_x >>= 64-6;
+ crypto_int64_x ^= crypto_int64_optblocker;
+ crypto_int64_x >>= 5;
+ return crypto_int64_x;
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int64_unsigned crypto_int64_unsigned_topbit_01(crypto_int64_unsigned crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("shrq $63,%0" : "+r"(crypto_int64_x) : : "cc");
+ return crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_y;
+ __asm__ ("lsr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : );
+ return crypto_int64_y;
+#else
+ crypto_int64_x >>= 64-6;
+ crypto_int64_x ^= crypto_int64_optblocker;
+ crypto_int64_x >>= 5;
+ return crypto_int64_x;
#endif
+}
-/* ----- Expand */
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_negative_01(crypto_int64 crypto_int64_x) {
+ return crypto_int64_unsigned_topbit_01(crypto_int64_x);
+}
-#ifdef LPR
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_topbit_mask(crypto_int64 crypto_int64_x) {
+ return crypto_int64_negative_mask(crypto_int64_x);
+}
-static const unsigned char aes_nonce[16] = {0};
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_topbit_01(crypto_int64 crypto_int64_x) {
+ return crypto_int64_unsigned_topbit_01(crypto_int64_x);
+}
-static void Expand(uint32 *L,const unsigned char *k)
-{
- int i;
- crypto_stream_aes256ctr((unsigned char *) L,4*p,aes_nonce,k);
- for (i = 0;i < p;++i) {
- uint32 L0 = ((unsigned char *) L)[4*i];
- uint32 L1 = ((unsigned char *) L)[4*i+1];
- uint32 L2 = ((unsigned char *) L)[4*i+2];
- uint32 L3 = ((unsigned char *) L)[4*i+3];
- L[i] = L0+(L1<<8)+(L2<<16)+(L3<<24);
- }
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_bottombit_mask(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc");
+ return -crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_y;
+ __asm__ ("sbfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : );
+ return crypto_int64_y;
+#else
+ crypto_int64_x &= 1 ^ crypto_int64_optblocker;
+ return -crypto_int64_x;
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_bottombit_01(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc");
+ return crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_y;
+ __asm__ ("ubfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : );
+ return crypto_int64_y;
+#else
+ crypto_int64_x &= 1 ^ crypto_int64_optblocker;
+ return crypto_int64_x;
#endif
+}
-/* ----- Seeds */
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_bitinrangepublicpos_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : );
+#else
+ crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker;
+#endif
+ return crypto_int64_bottombit_mask(crypto_int64_x);
+}
-#ifdef LPR
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_bitinrangepublicpos_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : );
+#else
+ crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker;
+#endif
+ return crypto_int64_bottombit_01(crypto_int64_x);
+}
-#define Seeds_bytes 32
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_shlmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("shlq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("lsl %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : );
+#else
+ int crypto_int64_k, crypto_int64_l;
+ for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2)
+ crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x << crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l);
+#endif
+ return crypto_int64_x;
+}
-static void Seeds_random(unsigned char *s)
-{
- randombytes(s,Seeds_bytes);
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_shrmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc");
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : );
+#else
+ int crypto_int64_k, crypto_int64_l;
+ for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2)
+ crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x >> crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l);
+#endif
+ return crypto_int64_x;
+}
+
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_bitmod_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) {
+ crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s);
+ return crypto_int64_bottombit_mask(crypto_int64_x);
+}
+
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_bitmod_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) {
+ crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s);
+ return crypto_int64_bottombit_01(crypto_int64_x);
+}
+
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_nonzero_mask(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,0\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#else
+ crypto_int64_x |= -crypto_int64_x;
+ return crypto_int64_negative_mask(crypto_int64_x);
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_nonzero_01(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,0\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#else
+ crypto_int64_x |= -crypto_int64_x;
+ return crypto_int64_unsigned_topbit_01(crypto_int64_x);
#endif
+}
-/* ----- Generator, HashShort */
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_positive_mask(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,0\n csetm %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#else
+ crypto_int64 crypto_int64_z = -crypto_int64_x;
+ crypto_int64_z ^= crypto_int64_x & crypto_int64_z;
+ return crypto_int64_negative_mask(crypto_int64_z);
+#endif
+}
-#ifdef LPR
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_positive_01(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,0\n cset %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#else
+ crypto_int64 crypto_int64_z = -crypto_int64_x;
+ crypto_int64_z ^= crypto_int64_x & crypto_int64_z;
+ return crypto_int64_unsigned_topbit_01(crypto_int64_z);
+#endif
+}
-/* G = Generator(k) */
-static void Generator(Fq *G,const unsigned char *k)
-{
- uint32 L[p];
- int i;
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_zero_mask(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,0\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#else
+ return ~crypto_int64_nonzero_mask(crypto_int64_x);
+#endif
+}
- Expand(L,k);
- for (i = 0;i < p;++i) G[i] = uint32_mod_uint14(L[i],q)-q12;
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_zero_01(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,0\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc");
+ return crypto_int64_z;
+#else
+ return 1-crypto_int64_nonzero_01(crypto_int64_x);
+#endif
}
-/* out = HashShort(r) */
-static void HashShort(small *out,const Inputs r)
-{
- unsigned char s[Inputs_bytes];
- unsigned char h[Hash_bytes];
- uint32 L[p];
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_unequal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ return crypto_int64_nonzero_mask(crypto_int64_x ^ crypto_int64_y);
+#endif
+}
- Inputs_encode(s,r);
- Hash_prefix(h,5,s,sizeof s);
- Expand(L,h);
- Short_fromlist(out,L);
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_unequal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ return crypto_int64_nonzero_01(crypto_int64_x ^ crypto_int64_y);
+#endif
}
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_equal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ return ~crypto_int64_unequal_mask(crypto_int64_x,crypto_int64_y);
#endif
+}
-/* ----- NTRU LPRime Expand */
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_equal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ return 1-crypto_int64_unequal_01(crypto_int64_x,crypto_int64_y);
+#endif
+}
-#ifdef LPR
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_min(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("cmpq %1,%0\n cmovgq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc");
+ return crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("cmp %0,%1\n csel %0,%0,%1,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc");
+ return crypto_int64_x;
+#else
+ crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x;
+ crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x;
+ crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y);
+ crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z);
+ crypto_int64_z &= crypto_int64_r;
+ return crypto_int64_x ^ crypto_int64_z;
+#endif
+}
-/* (S,A),a = XKeyGen() */
-static void XKeyGen(unsigned char *S,Fq *A,small *a)
-{
- Fq G[p];
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_max(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ __asm__ ("cmpq %1,%0\n cmovlq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc");
+ return crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ __asm__ ("cmp %0,%1\n csel %0,%1,%0,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc");
+ return crypto_int64_x;
+#else
+ crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x;
+ crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x;
+ crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y);
+ crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z);
+ crypto_int64_z &= crypto_int64_r;
+ return crypto_int64_y ^ crypto_int64_z;
+#endif
+}
- Seeds_random(S);
- Generator(G,S);
- KeyGen(A,a,G);
+__attribute__((unused))
+static inline
+void crypto_int64_minmax(crypto_int64 *crypto_int64_p,crypto_int64 *crypto_int64_q) {
+ crypto_int64 crypto_int64_x = *crypto_int64_p;
+ crypto_int64 crypto_int64_y = *crypto_int64_q;
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmpq %2,%1\n movq %1,%0\n cmovgq %2,%1\n cmovgq %0,%2" : "=&r"(crypto_int64_z), "+&r"(crypto_int64_x), "+r"(crypto_int64_y) : : "cc");
+ *crypto_int64_p = crypto_int64_x;
+ *crypto_int64_q = crypto_int64_y;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_r, crypto_int64_s;
+ __asm__ ("cmp %2,%3\n csel %0,%2,%3,lt\n csel %1,%3,%2,lt" : "=&r"(crypto_int64_r), "=r"(crypto_int64_s) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ *crypto_int64_p = crypto_int64_r;
+ *crypto_int64_q = crypto_int64_s;
+#else
+ crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x;
+ crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x;
+ crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y);
+ crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z);
+ crypto_int64_z &= crypto_int64_r;
+ crypto_int64_x ^= crypto_int64_z;
+ crypto_int64_y ^= crypto_int64_z;
+ *crypto_int64_p = crypto_int64_x;
+ *crypto_int64_q = crypto_int64_y;
+#endif
}
-/* B,T = XEncrypt(r,(S,A)) */
-static void XEncrypt(Fq *B,int8 *T,const int8 *r,const unsigned char *S,const Fq *A)
-{
- Fq G[p];
- small b[p];
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_smaller_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n csetm %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y;
+ crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y;
+ crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x);
+ return crypto_int64_negative_mask(crypto_int64_z);
+#endif
+}
- Generator(G,S);
- HashShort(b,r);
- Encrypt(B,T,r,G,A,b);
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_smaller_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n cset %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y;
+ crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y;
+ crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x);
+ return crypto_int64_unsigned_topbit_01(crypto_int64_z);
+#endif
}
-#define XDecrypt Decrypt
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_leq_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n csetm %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ return ~crypto_int64_smaller_mask(crypto_int64_y,crypto_int64_x);
+#endif
+}
+__attribute__((unused))
+static inline
+crypto_int64 crypto_int64_leq_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 crypto_int64_q,crypto_int64_z;
+ __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ crypto_int64 crypto_int64_z;
+ __asm__ ("cmp %1,%2\n cset %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc");
+ return crypto_int64_z;
+#else
+ return 1-crypto_int64_smaller_01(crypto_int64_y,crypto_int64_x);
#endif
+}
-/* ----- encoding small polynomials (including short polynomials) */
+__attribute__((unused))
+static inline
+int crypto_int64_ones_num(crypto_int64 crypto_int64_x) {
+ crypto_int64_unsigned crypto_int64_y = crypto_int64_x;
+ const crypto_int64 C0 = 0x5555555555555555;
+ const crypto_int64 C1 = 0x3333333333333333;
+ const crypto_int64 C2 = 0x0f0f0f0f0f0f0f0f;
+ crypto_int64_y -= ((crypto_int64_y >> 1) & C0);
+ crypto_int64_y = (crypto_int64_y & C1) + ((crypto_int64_y >> 2) & C1);
+ crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 4)) & C2;
+ crypto_int64_y += crypto_int64_y >> 8;
+ crypto_int64_y += crypto_int64_y >> 16;
+ crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 32)) & 0xff;
+ return crypto_int64_y;
+}
+
+__attribute__((unused))
+static inline
+int crypto_int64_bottomzeros_num(crypto_int64 crypto_int64_x) {
+#if defined(__GNUC__) && defined(__x86_64__)
+ crypto_int64 fallback = 64;
+ __asm__ ("bsfq %0,%0\n cmoveq %1,%0" : "+&r"(crypto_int64_x) : "r"(fallback) : "cc");
+ return crypto_int64_x;
+#elif defined(__GNUC__) && defined(__aarch64__)
+ int64_t crypto_int64_y;
+ __asm__ ("rbit %0,%1\n clz %0,%0" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : );
+ return crypto_int64_y;
+#else
+ crypto_int64 crypto_int64_y = crypto_int64_x ^ (crypto_int64_x-1);
+ crypto_int64_y = ((crypto_int64) crypto_int64_y) >> 1;
+ crypto_int64_y &= ~(crypto_int64_x & (((crypto_int64) 1) << (64-1)));
+ return crypto_int64_ones_num(crypto_int64_y);
+#endif
+}
-#define Small_bytes ((p+3)/4)
+#endif
-/* these are the only functions that rely on p mod 4 = 1 */
+/* from supercop-20240808/crypto_sort/int32/portable4/sort.c */
+#define int32_MINMAX(a,b) crypto_int32_minmax(&a,&b)
-static void Small_encode(unsigned char *s,const small *f)
+static void crypto_sort_int32(void *array,long long n)
{
- small x;
- int i;
+ long long top,p,q,r,i,j;
+ int32 *x = array;
- for (i = 0;i < p/4;++i) {
- x = *f++ + 1;
- x += (*f++ + 1)<<2;
- x += (*f++ + 1)<<4;
- x += (*f++ + 1)<<6;
- *s++ = x;
- }
- x = *f++ + 1;
- *s++ = x;
-}
+ if (n < 2) return;
+ top = 1;
+ while (top < n - top) top += top;
-static void Small_decode(small *f,const unsigned char *s)
-{
- unsigned char x;
- int i;
+ for (p = top;p >= 1;p >>= 1) {
+ i = 0;
+ while (i + 2 * p <= n) {
+ for (j = i;j < i + p;++j)
+ int32_MINMAX(x[j],x[j+p]);
+ i += 2 * p;
+ }
+ for (j = i;j < n - p;++j)
+ int32_MINMAX(x[j],x[j+p]);
- for (i = 0;i < p/4;++i) {
- x = *s++;
- *f++ = ((small)(x&3))-1; x >>= 2;
- *f++ = ((small)(x&3))-1; x >>= 2;
- *f++ = ((small)(x&3))-1; x >>= 2;
- *f++ = ((small)(x&3))-1;
+ i = 0;
+ j = 0;
+ for (q = top;q > p;q >>= 1) {
+ if (j != i) for (;;) {
+ if (j == n - q) goto done;
+ int32 a = x[j + p];
+ for (r = q;r > p;r >>= 1)
+ int32_MINMAX(a,x[j + r]);
+ x[j + p] = a;
+ ++j;
+ if (j == i + p) {
+ i += 2 * p;
+ break;
+ }
+ }
+ while (i + p <= n - q) {
+ for (j = i;j < i + p;++j) {
+ int32 a = x[j + p];
+ for (r = q;r > p;r >>= 1)
+ int32_MINMAX(a,x[j+r]);
+ x[j + p] = a;
+ }
+ i += 2 * p;
+ }
+ /* now i + p > n - q */
+ j = i;
+ while (j < n - q) {
+ int32 a = x[j + p];
+ for (r = q;r > p;r >>= 1)
+ int32_MINMAX(a,x[j+r]);
+ x[j + p] = a;
+ ++j;
+ }
+
+ done: ;
+ }
}
- x = *s++;
- *f++ = ((small)(x&3))-1;
}
-/* ----- encoding general polynomials */
+/* from supercop-20240808/crypto_sort/uint32/useint32/sort.c */
-#ifndef LPR
+/* can save time by vectorizing xor loops */
+/* can save time by integrating xor loops with int32_sort */
-static void Rq_encode(unsigned char *s,const Fq *r)
+static void crypto_sort_uint32(void *array,long long n)
{
- uint16 R[p],M[p];
- int i;
-
- for (i = 0;i < p;++i) R[i] = r[i]+q12;
- for (i = 0;i < p;++i) M[i] = q;
- Encode(s,R,M,p);
+ crypto_uint32 *x = array;
+ long long j;
+ for (j = 0;j < n;++j) x[j] ^= 0x80000000;
+ crypto_sort_int32(array,n);
+ for (j = 0;j < n;++j) x[j] ^= 0x80000000;
}
-static void Rq_decode(Fq *r,const unsigned char *s)
-{
- uint16 R[p],M[p];
- int i;
+/* from supercop-20240808/crypto_kem/sntrup761/compact/kem.c */
+// 20240806 djb: some automated conversion to cryptoint
- for (i = 0;i < p;++i) M[i] = q;
- Decode(R,s,M,p);
- for (i = 0;i < p;++i) r[i] = ((Fq)R[i])-q12;
-}
+#define p 761
+#define q 4591
+#define w 286
+#define q12 ((q - 1) / 2)
+typedef int8_t small;
+typedef int16_t Fq;
+#define Hash_bytes 32
+#define Small_bytes ((p + 3) / 4)
+typedef small Inputs[p];
+#define SecretKeys_bytes (2 * Small_bytes)
+#define Confirm_bytes 32
-#endif
+static small F3_freeze(int16_t x) { return x - 3 * ((10923 * x + 16384) >> 15); }
+
+static Fq Fq_freeze(int32_t x) {
+ const int32_t q16 = (0x10000 + q / 2) / q;
+ const int32_t q20 = (0x100000 + q / 2) / q;
+ const int32_t q28 = (0x10000000 + q / 2) / q;
+ x -= q * ((q16 * x) >> 16);
+ x -= q * ((q20 * x) >> 20);
+ return x - q * ((q28 * x + 0x8000000) >> 28);
+}
+
+static int Weightw_mask(small *r) {
+ int i, weight = 0;
+ for (i = 0; i < p; ++i) weight += crypto_int64_bottombit_01(r[i]);
+ return crypto_int16_nonzero_mask(weight - w);
+}
+
+static void uint32_divmod_uint14(uint32_t *Q, uint16_t *r, uint32_t x, uint16_t m) {
+ uint32_t qpart, mask, v = 0x80000000 / m;
+ qpart = (x * (uint64_t)v) >> 31;
+ x -= qpart * m;
+ *Q = qpart;
+ qpart = (x * (uint64_t)v) >> 31;
+ x -= qpart * m;
+ *Q += qpart;
+ x -= m;
+ *Q += 1;
+ mask = crypto_int32_negative_mask(x);
+ x += mask & (uint32_t)m;
+ *Q += mask;
+ *r = x;
+}
-/* ----- encoding rounded polynomials */
+static uint16_t uint32_mod_uint14(uint32_t x, uint16_t m) {
+ uint32_t Q;
+ uint16_t r;
+ uint32_divmod_uint14(&Q, &r, x, m);
+ return r;
+}
-static void Rounded_encode(unsigned char *s,const Fq *r)
-{
- uint16 R[p],M[p];
- int i;
+static void Encode(unsigned char *out, const uint16_t *R, const uint16_t *M, long long len) {
+ if (len == 1) {
+ uint16_t r = R[0], m = M[0];
+ while (m > 1) {
+ *out++ = r;
+ r >>= 8;
+ m = (m + 255) >> 8;
+ }
+ }
+ if (len > 1) {
+ uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2];
+ long long i;
+ for (i = 0; i < len - 1; i += 2) {
+ uint32_t m0 = M[i];
+ uint32_t r = R[i] + R[i + 1] * m0;
+ uint32_t m = M[i + 1] * m0;
+ while (m >= 16384) {
+ *out++ = r;
+ r >>= 8;
+ m = (m + 255) >> 8;
+ }
+ R2[i / 2] = r;
+ M2[i / 2] = m;
+ }
+ if (i < len) {
+ R2[i / 2] = R[i];
+ M2[i / 2] = M[i];
+ }
+ Encode(out, R2, M2, (len + 1) / 2);
+ }
+}
- for (i = 0;i < p;++i) R[i] = ((r[i]+q12)*10923)>>15;
- for (i = 0;i < p;++i) M[i] = (q+2)/3;
- Encode(s,R,M,p);
+static void Decode(uint16_t *out, const unsigned char *S, const uint16_t *M, long long len) {
+ if (len == 1) {
+ if (M[0] == 1)
+ *out = 0;
+ else if (M[0] <= 256)
+ *out = uint32_mod_uint14(S[0], M[0]);
+ else
+ *out = uint32_mod_uint14(S[0] + (((uint16_t)S[1]) << 8), M[0]);
+ }
+ if (len > 1) {
+ uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2], bottomr[len / 2];
+ uint32_t bottomt[len / 2];
+ long long i;
+ for (i = 0; i < len - 1; i += 2) {
+ uint32_t m = M[i] * (uint32_t)M[i + 1];
+ if (m > 256 * 16383) {
+ bottomt[i / 2] = 256 * 256;
+ bottomr[i / 2] = S[0] + 256 * S[1];
+ S += 2;
+ M2[i / 2] = (((m + 255) >> 8) + 255) >> 8;
+ } else if (m >= 16384) {
+ bottomt[i / 2] = 256;
+ bottomr[i / 2] = S[0];
+ S += 1;
+ M2[i / 2] = (m + 255) >> 8;
+ } else {
+ bottomt[i / 2] = 1;
+ bottomr[i / 2] = 0;
+ M2[i / 2] = m;
+ }
+ }
+ if (i < len) M2[i / 2] = M[i];
+ Decode(R2, S, M2, (len + 1) / 2);
+ for (i = 0; i < len - 1; i += 2) {
+ uint32_t r1, r = bottomr[i / 2];
+ uint16_t r0;
+ r += bottomt[i / 2] * R2[i / 2];
+ uint32_divmod_uint14(&r1, &r0, r, M[i]);
+ r1 = uint32_mod_uint14(r1, M[i + 1]);
+ *out++ = r0;
+ *out++ = r1;
+ }
+ if (i < len) *out++ = R2[i / 2];
+ }
}
-static void Rounded_decode(Fq *r,const unsigned char *s)
-{
- uint16 R[p],M[p];
+static void R3_fromRq(small *out, const Fq *r) {
int i;
-
- for (i = 0;i < p;++i) M[i] = (q+2)/3;
- Decode(R,s,M,p);
- for (i = 0;i < p;++i) r[i] = R[i]*3-q12;
+ for (i = 0; i < p; ++i) out[i] = F3_freeze(r[i]);
}
-/* ----- encoding top polynomials */
+static void R3_mult(small *h, const small *f, const small *g) {
+ int16_t fg[p + p - 1];
+ int i, j;
+ for (i = 0; i < p + p - 1; ++i) fg[i] = 0;
+ for (i = 0; i < p; ++i)
+ for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int16_t)g[j];
+ for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i];
+ for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i];
+ for (i = 0; i < p; ++i) h[i] = F3_freeze(fg[i]);
+}
-#ifdef LPR
+static int R3_recip(small *out, const small *in) {
+ small f[p + 1], g[p + 1], v[p + 1], r[p + 1];
+ int sign, swap, t, i, loop, delta = 1;
+ for (i = 0; i < p + 1; ++i) v[i] = 0;
+ for (i = 0; i < p + 1; ++i) r[i] = 0;
+ r[0] = 1;
+ for (i = 0; i < p; ++i) f[i] = 0;
+ f[0] = 1;
+ f[p - 1] = f[p] = -1;
+ for (i = 0; i < p; ++i) g[p - 1 - i] = in[i];
+ g[p] = 0;
+ for (loop = 0; loop < 2 * p - 1; ++loop) {
+ for (i = p; i > 0; --i) v[i] = v[i - 1];
+ v[0] = 0;
+ sign = -g[0] * f[0];
+ swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]);
+ delta ^= swap & (delta ^ -delta);
+ delta += 1;
+ for (i = 0; i < p + 1; ++i) {
+ t = swap & (f[i] ^ g[i]);
+ f[i] ^= t;
+ g[i] ^= t;
+ t = swap & (v[i] ^ r[i]);
+ v[i] ^= t;
+ r[i] ^= t;
+ }
+ for (i = 0; i < p + 1; ++i) g[i] = F3_freeze(g[i] + sign * f[i]);
+ for (i = 0; i < p + 1; ++i) r[i] = F3_freeze(r[i] + sign * v[i]);
+ for (i = 0; i < p; ++i) g[i] = g[i + 1];
+ g[p] = 0;
+ }
+ sign = f[0];
+ for (i = 0; i < p; ++i) out[i] = sign * v[p - 1 - i];
+ return crypto_int16_nonzero_mask(delta);
+}
-#define Top_bytes (I/2)
+static void Rq_mult_small(Fq *h, const Fq *f, const small *g) {
+ int32_t fg[p + p - 1];
+ int i, j;
+ for (i = 0; i < p + p - 1; ++i) fg[i] = 0;
+ for (i = 0; i < p; ++i)
+ for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int32_t)g[j];
+ for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i];
+ for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i];
+ for (i = 0; i < p; ++i) h[i] = Fq_freeze(fg[i]);
+}
-static void Top_encode(unsigned char *s,const int8 *T)
-{
+static void Rq_mult3(Fq *h, const Fq *f) {
int i;
- for (i = 0;i < Top_bytes;++i)
- s[i] = T[2*i]+(T[2*i+1]<<4);
+ for (i = 0; i < p; ++i) h[i] = Fq_freeze(3 * f[i]);
}
-static void Top_decode(int8 *T,const unsigned char *s)
-{
- int i;
- for (i = 0;i < Top_bytes;++i) {
- T[2*i] = s[i]&15;
- T[2*i+1] = s[i]>>4;
+static Fq Fq_recip(Fq a1) {
+ int i = 1;
+ Fq ai = a1;
+ while (i < q - 2) {
+ ai = Fq_freeze(a1 * (int32_t)ai);
+ i += 1;
}
+ return ai;
}
-#endif
-
-/* ----- Streamlined NTRU Prime Core plus encoding */
-
-#ifndef LPR
-
-typedef small Inputs[p]; /* passed by reference */
-#define Inputs_random Short_random
-#define Inputs_encode Small_encode
-#define Inputs_bytes Small_bytes
-
-#define Ciphertexts_bytes Rounded_bytes
-#define SecretKeys_bytes (2*Small_bytes)
-#define PublicKeys_bytes Rq_bytes
-
-/* pk,sk = ZKeyGen() */
-static void ZKeyGen(unsigned char *pk,unsigned char *sk)
-{
- Fq h[p];
- small f[p],v[p];
-
- KeyGen(h,f,v);
- Rq_encode(pk,h);
- Small_encode(sk,f); sk += Small_bytes;
- Small_encode(sk,v);
+static int Rq_recip3(Fq *out, const small *in) {
+ Fq f[p + 1], g[p + 1], v[p + 1], r[p + 1], scale;
+ int swap, t, i, loop, delta = 1;
+ int32_t f0, g0;
+ for (i = 0; i < p + 1; ++i) v[i] = 0;
+ for (i = 0; i < p + 1; ++i) r[i] = 0;
+ r[0] = Fq_recip(3);
+ for (i = 0; i < p; ++i) f[i] = 0;
+ f[0] = 1;
+ f[p - 1] = f[p] = -1;
+ for (i = 0; i < p; ++i) g[p - 1 - i] = in[i];
+ g[p] = 0;
+ for (loop = 0; loop < 2 * p - 1; ++loop) {
+ for (i = p; i > 0; --i) v[i] = v[i - 1];
+ v[0] = 0;
+ swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]);
+ delta ^= swap & (delta ^ -delta);
+ delta += 1;
+ for (i = 0; i < p + 1; ++i) {
+ t = swap & (f[i] ^ g[i]);
+ f[i] ^= t;
+ g[i] ^= t;
+ t = swap & (v[i] ^ r[i]);
+ v[i] ^= t;
+ r[i] ^= t;
+ }
+ f0 = f[0];
+ g0 = g[0];
+ for (i = 0; i < p + 1; ++i) g[i] = Fq_freeze(f0 * g[i] - g0 * f[i]);
+ for (i = 0; i < p + 1; ++i) r[i] = Fq_freeze(f0 * r[i] - g0 * v[i]);
+ for (i = 0; i < p; ++i) g[i] = g[i + 1];
+ g[p] = 0;
+ }
+ scale = Fq_recip(f[0]);
+ for (i = 0; i < p; ++i) out[i] = Fq_freeze(scale * (int32_t)v[p - 1 - i]);
+ return crypto_int16_nonzero_mask(delta);
}
-/* C = ZEncrypt(r,pk) */
-static void ZEncrypt(unsigned char *C,const Inputs r,const unsigned char *pk)
-{
- Fq h[p];
- Fq c[p];
- Rq_decode(h,pk);
- Encrypt(c,r,h);
- Rounded_encode(C,c);
+static void Round(Fq *out, const Fq *a) {
+ int i;
+ for (i = 0; i < p; ++i) out[i] = a[i] - F3_freeze(a[i]);
}
-/* r = ZDecrypt(C,sk) */
-static void ZDecrypt(Inputs r,const unsigned char *C,const unsigned char *sk)
-{
- small f[p],v[p];
- Fq c[p];
-
- Small_decode(f,sk); sk += Small_bytes;
- Small_decode(v,sk);
- Rounded_decode(c,C);
- Decrypt(r,c,f,v);
+static void Short_fromlist(small *out, const uint32_t *in) {
+ uint32_t L[p];
+ int i;
+ for (i = 0; i < w; ++i) L[i] = in[i] & (uint32_t)-2;
+ for (i = w; i < p; ++i) L[i] = (in[i] & (uint32_t)-3) | 1;
+ crypto_sort_uint32(L, p);
+ for (i = 0; i < p; ++i) out[i] = (L[i] & 3) - 1;
}
-#endif
-
-/* ----- NTRU LPRime Expand plus encoding */
-
-#ifdef LPR
-
-#define Ciphertexts_bytes (Rounded_bytes+Top_bytes)
-#define SecretKeys_bytes Small_bytes
-#define PublicKeys_bytes (Seeds_bytes+Rounded_bytes)
-
-static void Inputs_random(Inputs r)
-{
- unsigned char s[Inputs_bytes];
+static void Hash_prefix(unsigned char *out, int b, const unsigned char *in, int inlen) {
+ unsigned char x[inlen + 1], h[64];
int i;
-
- randombytes(s,sizeof s);
- for (i = 0;i < I;++i) r[i] = 1&(s[i>>3]>>(i&7));
+ x[0] = b;
+ for (i = 0; i < inlen; ++i) x[i + 1] = in[i];
+ crypto_hash_sha512(h, x, inlen + 1);
+ for (i = 0; i < 32; ++i) out[i] = h[i];
}
-/* pk,sk = ZKeyGen() */
-static void ZKeyGen(unsigned char *pk,unsigned char *sk)
-{
- Fq A[p];
- small a[p];
-
- XKeyGen(pk,A,a); pk += Seeds_bytes;
- Rounded_encode(pk,A);
- Small_encode(sk,a);
+static uint32_t urandom32(void) {
+ unsigned char c[4];
+ uint32_t result = 0;
+ int i;
+ randombytes(c, 4);
+ for (i = 0; i < 4; ++i) result += ((uint32_t)c[i]) << (8 * i);
+ return result;
}
-/* c = ZEncrypt(r,pk) */
-static void ZEncrypt(unsigned char *c,const Inputs r,const unsigned char *pk)
-{
- Fq A[p];
- Fq B[p];
- int8 T[I];
-
- Rounded_decode(A,pk+Seeds_bytes);
- XEncrypt(B,T,r,pk,A);
- Rounded_encode(c,B); c += Rounded_bytes;
- Top_encode(c,T);
+static void Short_random(small *out) {
+ uint32_t L[p];
+ int i;
+ for (i = 0; i < p; ++i) L[i] = urandom32();
+ Short_fromlist(out, L);
}
-/* r = ZDecrypt(C,sk) */
-static void ZDecrypt(Inputs r,const unsigned char *c,const unsigned char *sk)
-{
- small a[p];
- Fq B[p];
- int8 T[I];
-
- Small_decode(a,sk);
- Rounded_decode(B,c);
- Top_decode(T,c+Rounded_bytes);
- XDecrypt(r,B,T,a);
+static void Small_random(small *out) {
+ int i;
+ for (i = 0; i < p; ++i) out[i] = (((urandom32() & 0x3fffffff) * 3) >> 30) - 1;
}
-#endif
-
-/* ----- confirmation hash */
+static void KeyGen(Fq *h, small *f, small *ginv) {
+ small g[p];
+ Fq finv[p];
+ for (;;) {
+ int result;
+ Small_random(g);
+ result = R3_recip(ginv, g);
+ crypto_declassify(&result, sizeof result);
+ if (result == 0) break;
+ }
+ Short_random(f);
+ Rq_recip3(finv, f);
+ Rq_mult_small(h, finv, g);
+}
-#define Confirm_bytes 32
+static void Encrypt(Fq *c, const small *r, const Fq *h) {
+ Fq hr[p];
+ Rq_mult_small(hr, h, r);
+ Round(c, hr);
+}
+
+static void Decrypt(small *r, const Fq *c, const small *f, const small *ginv) {
+ Fq cf[p], cf3[p];
+ small e[p], ev[p];
+ int mask, i;
+ Rq_mult_small(cf, c, f);
+ Rq_mult3(cf3, cf);
+ R3_fromRq(e, cf3);
+ R3_mult(ev, e, ginv);
+ mask = Weightw_mask(ev);
+ for (i = 0; i < w; ++i) r[i] = ((ev[i] ^ 1) & ~mask) ^ 1;
+ for (i = w; i < p; ++i) r[i] = ev[i] & ~mask;
+}
+
+static void Small_encode(unsigned char *s, const small *f) {
+ int i, j;
+ for (i = 0; i < p / 4; ++i) {
+ small x = 0;
+ for (j = 0;j < 4;++j) x += (*f++ + 1) << (2 * j);
+ *s++ = x;
+ }
+ *s = *f++ + 1;
+}
-/* h = HashConfirm(r,pk,cache); cache is Hash4(pk) */
-static void HashConfirm(unsigned char *h,const unsigned char *r,const unsigned char *pk,const unsigned char *cache)
-{
-#ifndef LPR
- unsigned char x[Hash_bytes*2];
- int i;
+static void Small_decode(small *f, const unsigned char *s) {
+ int i, j;
+ for (i = 0; i < p / 4; ++i) {
+ unsigned char x = *s++;
+ for (j = 0;j < 4;++j) *f++ = ((small)((x >> (2 * j)) & 3)) - 1;
+ }
+ *f++ = ((small)(*s & 3)) - 1;
+}
- Hash_prefix(x,3,r,Inputs_bytes);
- for (i = 0;i < Hash_bytes;++i) x[Hash_bytes+i] = cache[i];
-#else
- unsigned char x[Inputs_bytes+Hash_bytes];
+static void Rq_encode(unsigned char *s, const Fq *r) {
+ uint16_t R[p], M[p];
int i;
-
- for (i = 0;i < Inputs_bytes;++i) x[i] = r[i];
- for (i = 0;i < Hash_bytes;++i) x[Inputs_bytes+i] = cache[i];
-#endif
- Hash_prefix(h,2,x,sizeof x);
+ for (i = 0; i < p; ++i) R[i] = r[i] + q12;
+ for (i = 0; i < p; ++i) M[i] = q;
+ Encode(s, R, M, p);
}
-/* ----- session-key hash */
-
-/* k = HashSession(b,y,z) */
-static void HashSession(unsigned char *k,int b,const unsigned char *y,const unsigned char *z)
-{
-#ifndef LPR
- unsigned char x[Hash_bytes+Ciphertexts_bytes+Confirm_bytes];
+static void Rq_decode(Fq *r, const unsigned char *s) {
+ uint16_t R[p], M[p];
int i;
+ for (i = 0; i < p; ++i) M[i] = q;
+ Decode(R, s, M, p);
+ for (i = 0; i < p; ++i) r[i] = ((Fq)R[i]) - q12;
+}
- Hash_prefix(x,3,y,Inputs_bytes);
- for (i = 0;i < Ciphertexts_bytes+Confirm_bytes;++i) x[Hash_bytes+i] = z[i];
-#else
- unsigned char x[Inputs_bytes+Ciphertexts_bytes+Confirm_bytes];
+static void Rounded_encode(unsigned char *s, const Fq *r) {
+ uint16_t R[p], M[p];
int i;
-
- for (i = 0;i < Inputs_bytes;++i) x[i] = y[i];
- for (i = 0;i < Ciphertexts_bytes+Confirm_bytes;++i) x[Inputs_bytes+i] = z[i];
-#endif
- Hash_prefix(k,b,x,sizeof x);
+ for (i = 0; i < p; ++i) R[i] = ((r[i] + q12) * 10923) >> 15;
+ for (i = 0; i < p; ++i) M[i] = (q + 2) / 3;
+ Encode(s, R, M, p);
}
-/* ----- Streamlined NTRU Prime and NTRU LPRime */
-
-/* pk,sk = KEM_KeyGen() */
-static void KEM_KeyGen(unsigned char *pk,unsigned char *sk)
-{
+static void Rounded_decode(Fq *r, const unsigned char *s) {
+ uint16_t R[p], M[p];
int i;
-
- ZKeyGen(pk,sk); sk += SecretKeys_bytes;
- for (i = 0;i < PublicKeys_bytes;++i) *sk++ = pk[i];
- randombytes(sk,Inputs_bytes); sk += Inputs_bytes;
- Hash_prefix(sk,4,pk,PublicKeys_bytes);
+ for (i = 0; i < p; ++i) M[i] = (q + 2) / 3;
+ Decode(R, s, M, p);
+ for (i = 0; i < p; ++i) r[i] = R[i] * 3 - q12;
}
-/* c,r_enc = Hide(r,pk,cache); cache is Hash4(pk) */
-static void Hide(unsigned char *c,unsigned char *r_enc,const Inputs r,const unsigned char *pk,const unsigned char *cache)
-{
- Inputs_encode(r_enc,r);
- ZEncrypt(c,r,pk); c += Ciphertexts_bytes;
- HashConfirm(c,r_enc,pk,cache);
+static void ZKeyGen(unsigned char *pk, unsigned char *sk) {
+ Fq h[p];
+ small f[p], v[p];
+ KeyGen(h, f, v);
+ Rq_encode(pk, h);
+ Small_encode(sk, f);
+ Small_encode(sk + Small_bytes, v);
}
-/* c,k = Encap(pk) */
-static void Encap(unsigned char *c,unsigned char *k,const unsigned char *pk)
-{
- Inputs r;
- unsigned char r_enc[Inputs_bytes];
- unsigned char cache[Hash_bytes];
-
- Hash_prefix(cache,4,pk,PublicKeys_bytes);
- Inputs_random(r);
- Hide(c,r_enc,r,pk,cache);
- HashSession(k,1,r_enc,c);
+static void ZEncrypt(unsigned char *C, const Inputs r, const unsigned char *pk) {
+ Fq h[p], c[p];
+ Rq_decode(h, pk);
+ Encrypt(c, r, h);
+ Rounded_encode(C, c);
}
-/* 0 if matching ciphertext+confirm, else -1 */
-static int Ciphertexts_diff_mask(const unsigned char *c,const unsigned char *c2)
-{
- uint16 differentbits = 0;
- int len = Ciphertexts_bytes+Confirm_bytes;
-
- while (len-- > 0) differentbits |= (*c++)^(*c2++);
- return (1&((differentbits-1)>>8))-1;
+static void ZDecrypt(Inputs r, const unsigned char *C, const unsigned char *sk) {
+ small f[p], v[p];
+ Fq c[p];
+ Small_decode(f, sk);
+ Small_decode(v, sk + Small_bytes);
+ Rounded_decode(c, C);
+ Decrypt(r, c, f, v);
}
-/* k = Decap(c,sk) */
-static void Decap(unsigned char *k,const unsigned char *c,const unsigned char *sk)
-{
- const unsigned char *pk = sk + SecretKeys_bytes;
- const unsigned char *rho = pk + PublicKeys_bytes;
- const unsigned char *cache = rho + Inputs_bytes;
- Inputs r;
- unsigned char r_enc[Inputs_bytes];
- unsigned char cnew[Ciphertexts_bytes+Confirm_bytes];
- int mask;
+static void HashConfirm(unsigned char *h, const unsigned char *r, const unsigned char *cache) {
+ unsigned char x[Hash_bytes * 2];
int i;
+ Hash_prefix(x, 3, r, Small_bytes);
+ for (i = 0; i < Hash_bytes; ++i) x[Hash_bytes + i] = cache[i];
+ Hash_prefix(h, 2, x, sizeof x);
+}
- ZDecrypt(r,c,sk);
- Hide(cnew,r_enc,r,pk,cache);
- mask = Ciphertexts_diff_mask(c,cnew);
- for (i = 0;i < Inputs_bytes;++i) r_enc[i] ^= mask&(r_enc[i]^rho[i]);
- HashSession(k,1+mask,r_enc,c);
+static void HashSession(unsigned char *k, int b, const unsigned char *y, const unsigned char *z) {
+ unsigned char x[Hash_bytes + crypto_kem_sntrup761_CIPHERTEXTBYTES];
+ int i;
+ Hash_prefix(x, 3, y, Small_bytes);
+ for (i = 0; i < crypto_kem_sntrup761_CIPHERTEXTBYTES; ++i) x[Hash_bytes + i] = z[i];
+ Hash_prefix(k, b, x, sizeof x);
}
-/* ----- crypto_kem API */
+int crypto_kem_sntrup761_keypair(unsigned char *pk, unsigned char *sk) {
+ int i;
+ ZKeyGen(pk, sk);
+ sk += SecretKeys_bytes;
+ for (i = 0; i < crypto_kem_sntrup761_PUBLICKEYBYTES; ++i) *sk++ = pk[i];
+ randombytes(sk, Small_bytes);
+ Hash_prefix(sk + Small_bytes, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES);
+ return 0;
+}
+static void Hide(unsigned char *c, unsigned char *r_enc, const Inputs r, const unsigned char *pk, const unsigned char *cache) {
+ Small_encode(r_enc, r);
+ ZEncrypt(c, r, pk);
+ HashConfirm(c + crypto_kem_sntrup761_CIPHERTEXTBYTES - Confirm_bytes, r_enc, cache);
+}
-int crypto_kem_sntrup761_keypair(unsigned char *pk,unsigned char *sk)
-{
- KEM_KeyGen(pk,sk);
+int crypto_kem_sntrup761_enc(unsigned char *c, unsigned char *k, const unsigned char *pk) {
+ Inputs r;
+ unsigned char r_enc[Small_bytes], cache[Hash_bytes];
+ Hash_prefix(cache, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES);
+ Short_random(r);
+ Hide(c, r_enc, r, pk, cache);
+ HashSession(k, 1, r_enc, c);
return 0;
}
-int crypto_kem_sntrup761_enc(unsigned char *c,unsigned char *k,const unsigned char *pk)
-{
- Encap(c,k,pk);
- return 0;
+static int Ciphertexts_diff_mask(const unsigned char *c, const unsigned char *c2) {
+ uint16_t differentbits = 0;
+ int len = crypto_kem_sntrup761_CIPHERTEXTBYTES;
+ while (len-- > 0) differentbits |= (*c++) ^ (*c2++);
+ return (crypto_int64_bitmod_01((differentbits - 1),8)) - 1;
}
-int crypto_kem_sntrup761_dec(unsigned char *k,const unsigned char *c,const unsigned char *sk)
-{
- Decap(k,c,sk);
+int crypto_kem_sntrup761_dec(unsigned char *k, const unsigned char *c, const unsigned char *sk) {
+ const unsigned char *pk = sk + SecretKeys_bytes;
+ const unsigned char *rho = pk + crypto_kem_sntrup761_PUBLICKEYBYTES;
+ const unsigned char *cache = rho + Small_bytes;
+ Inputs r;
+ unsigned char r_enc[Small_bytes], cnew[crypto_kem_sntrup761_CIPHERTEXTBYTES];
+ int mask, i;
+ ZDecrypt(r, c, sk);
+ Hide(cnew, r_enc, r, pk, cache);
+ mask = Ciphertexts_diff_mask(c, cnew);
+ for (i = 0; i < Small_bytes; ++i) r_enc[i] ^= mask & (r_enc[i] ^ rho[i]);
+ HashSession(k, 1 + mask, r_enc, c);
return 0;
}
diff --git a/usr.bin/ssh/sntrup761.sh b/usr.bin/ssh/sntrup761.sh
index db4e9aed08a..92c803bb1a4 100644
--- a/usr.bin/ssh/sntrup761.sh
+++ b/usr.bin/ssh/sntrup761.sh
@@ -1,25 +1,18 @@
#!/bin/sh
-# $OpenBSD: sntrup761.sh,v 1.7 2023/01/11 02:13:52 djm Exp $
+# $OpenBSD: sntrup761.sh,v 1.8 2024/09/15 02:20:51 djm Exp $
# Placed in the Public Domain.
#
-AUTHOR="supercop-20201130/crypto_kem/sntrup761/ref/implementors"
-FILES="
- supercop-20201130/crypto_sort/int32/portable4/int32_minmax.inc
- supercop-20201130/crypto_sort/int32/portable4/sort.c
- supercop-20201130/crypto_sort/uint32/useint32/sort.c
- supercop-20201130/crypto_kem/sntrup761/ref/uint32.c
- supercop-20201130/crypto_kem/sntrup761/ref/int32.c
- supercop-20201130/crypto_kem/sntrup761/ref/paramsmenu.h
- supercop-20201130/crypto_kem/sntrup761/ref/params.h
- supercop-20201130/crypto_kem/sntrup761/ref/Decode.h
- supercop-20201130/crypto_kem/sntrup761/ref/Decode.c
- supercop-20201130/crypto_kem/sntrup761/ref/Encode.h
- supercop-20201130/crypto_kem/sntrup761/ref/Encode.c
- supercop-20201130/crypto_kem/sntrup761/ref/kem.c
+AUTHOR="supercop-20240808/crypto_kem/sntrup761/ref/implementors"
+FILES=" supercop-20240808/cryptoint/crypto_int16.h
+ supercop-20240808/cryptoint/crypto_int32.h
+ supercop-20240808/cryptoint/crypto_int64.h
+ supercop-20240808/crypto_sort/int32/portable4/sort.c
+ supercop-20240808/crypto_sort/uint32/useint32/sort.c
+ supercop-20240808/crypto_kem/sntrup761/compact/kem.c
"
###
-set -e
+set -euo pipefail
cd $1
echo -n '/* $'
echo 'OpenBSD: $ */'
@@ -32,12 +25,19 @@ echo
echo '#include <string.h>'
echo '#include "crypto_api.h"'
echo
+echo '#define crypto_declassify(x, y) do {} while (0)'
+echo
# Map the types used in this code to the ones in crypto_api.h. We use #define
# instead of typedef since some systems have existing intXX types and do not
# permit multiple typedefs even if they do not conflict.
for t in int8 uint8 int16 uint16 int32 uint32 int64 uint64; do
echo "#define $t crypto_${t}"
done
+
+for x in 16 32 64 ; do
+ echo "extern volatile crypto_int$x crypto_int${x}_optblocker;"
+done
+
echo
for i in $FILES; do
echo "/* from $i */"
@@ -57,14 +57,27 @@ for i in $FILES; do
-e 's/[ ]*$//' \
$i | \
case "$i" in
- # Use int64_t for intermediate values in int32_MINMAX to prevent signed
- # 32-bit integer overflow when called by crypto_sort_uint32.
- */int32_minmax.inc)
- sed -e "s/int32 ab = b ^ a/int64_t ab = (int64_t)b ^ (int64_t)a/" \
- -e "s/int32 c = b - a/int64_t c = (int64_t)b - (int64_t)a/"
+ */cryptoint/crypto_int16.h)
+ sed -e "s/static void crypto_int16_store/void crypto_int16_store/" \
+ -e "s/^[#]define crypto_int16_optblocker.*//" \
+ -e "s/static void crypto_int16_minmax/void crypto_int16_minmax/"
+ ;;
+ */cryptoint/crypto_int32.h)
+ sed -e "s/static void crypto_int32_store/void crypto_int32_store/" \
+ -e "s/^[#]define crypto_int32_optblocker.*//" \
+ -e "s/static void crypto_int32_minmax/void crypto_int32_minmax/"
+ ;;
+ */cryptoint/crypto_int64.h)
+ sed -e "s/static void crypto_int64_store/void crypto_int64_store/" \
+ -e "s/^[#]define crypto_int64_optblocker.*//" \
+ -e "s/static void crypto_int64_minmax/void crypto_int64_minmax/"
;;
*/int32/portable4/sort.c)
- sed -e "s/void crypto_sort/void crypto_sort_int32/g"
+ sed -e "s/void crypto_sort[(]/void crypto_sort_int32(/g"
+ ;;
+ */int32/portable5/sort.c)
+ sed -e "s/crypto_sort_smallindices/crypto_sort_int32_smallindices/"\
+ -e "s/void crypto_sort[(]/void crypto_sort_int32(/g"
;;
*/uint32/useint32/sort.c)
sed -e "s/void crypto_sort/void crypto_sort_uint32/g"