summary refs log tree commit diff
path: root/crypto/src/pqc
diff options
context:
space:
mode:
authorDavid Hook <dgh@cryptoworkshop.com>2022-08-08 17:37:50 +1000
committerDavid Hook <dgh@cryptoworkshop.com>2022-08-08 17:37:50 +1000
commit2c6872aad29f8187ddc6535c5c4702e2a0238ecc (patch)
treeab1d02cd6e820c877a86bdbce700f7f00db812d8 /crypto/src/pqc
parentmove KEMExtractor to KemExtractor (diff)
downloadBouncyCastle.NET-ed25519-2c6872aad29f8187ddc6535c5c4702e2a0238ecc.tar.xz
Initial Falcon implementation
Diffstat (limited to 'crypto/src/pqc')
-rw-r--r--crypto/src/pqc/crypto/falcon/FPREngine.cs1311
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconCodec.cs576
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconCommon.cs304
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconConversions.cs66
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconFFT.cs711
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconFPR.cs13
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconKeyGenerationParameters.cs22
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconKeyPairGenerator.cs55
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconKeyParameters.cs22
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconKeygen.cs3673
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconNIST.cs303
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconParameters.cs38
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconPrivateKeyParameters.cs24
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconPublicKeyParameters.cs23
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconRNG.cs261
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconSign.cs974
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconSigner.cs76
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconSmallPrime.cs46
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconSmallPrimes.cs536
-rw-r--r--crypto/src/pqc/crypto/falcon/FalconVrfy.cs860
-rw-r--r--crypto/src/pqc/crypto/falcon/SHAKE256.cs569
-rw-r--r--crypto/src/pqc/crypto/falcon/SamplerZ.cs229
22 files changed, 10692 insertions, 0 deletions
diff --git a/crypto/src/pqc/crypto/falcon/FPREngine.cs b/crypto/src/pqc/crypto/falcon/FPREngine.cs
new file mode 100644
index 000000000..d92c23235
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FPREngine.cs
@@ -0,0 +1,1311 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FPREngine
+    {
+        internal FalconFPR FPR(double v)
+        {
+            return new FalconFPR(v);
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+
+        internal FalconFPR fpr_of(long i)
+        {
+            return FPR((double)i);
+        }
+
+
+
+        internal long fpr_rint(FalconFPR x)
+        {
+            /*
+            * We do not want to use llrint() since it might be not
+            * constant-time.
+            *
+            * Suppose that x >= 0. If x >= 2^52, then it is already an
+            * integer. Otherwise, if x < 2^52, then computing x+2^52 will
+            * yield a value that will be rounded to the nearest integer
+            * with exactly the right rules (round-to-nearest-even).
+            *
+            * In order to have constant-time processing, we must do the
+            * computation for both x >= 0 and x < 0 cases, and use a
+            * cast to an integer to access the sign and select the proper
+            * value. Such casts also allow us to find out if |x| < 2^52.
+            */
+            long sx, tx, rp, rn, m;
+            uint ub;
+
+            sx = (long)(x.v - 1.0);
+            tx = (long)x.v;
+            rp = (long)(x.v + 4503599627370496.0) - 4503599627370496;
+            rn = (long)(x.v - 4503599627370496.0) + 4503599627370496;
+
+            /*
+            * If tx >= 2^52 or tx < -2^52, then result is tx.
+            * Otherwise, if sx >= 0, then result is rp.
+            * Otherwise, result is rn. We use the fact that when x is
+            * close to 0 (|x| <= 0.25) then both rp and rn are correct;
+            * and if x is not close to 0, then trunc(x-1.0) yields the
+            * appropriate sign.
+            */
+
+            /*
+            * Clamp rp to zero if tx < 0.
+            * Clamp rn to zero if tx >= 0.
+            */
+            m = sx >> 63;
+            rn &= m;
+            rp &= ~m;
+
+            /*
+            * Get the 12 upper bits of tx; if they are not all zeros or
+            * all ones, then tx >= 2^52 or tx < -2^52, and we clamp both
+            * rp and rn to zero. Otherwise, we clamp tx to zero.
+            */
+            ub = (uint)((ulong)tx >> 52);
+            m = -(long)((((ub + 1) & 0xFFF) - 2) >> 31);
+            rp &= m;
+            rn &= m;
+            tx &= ~m;
+
+            /*
+            * Only one of tx, rn or rp (at most) can be non-zero at this
+            * point.
+            */
+            return tx | rn | rp;
+        }
+
+        internal long fpr_floor(FalconFPR x)
+        {
+            long r;
+
+            /*
+            * The cast performs a trunc() (rounding toward 0) and thus is
+            * wrong by 1 for most negative values. The correction below is
+            * constant-time as long as the compiler turns the
+            * floating-point conversion result into a 0/1 integer without a
+            * conditional branch or another non-constant-time construction.
+            * This should hold on all modern architectures with an FPU (and
+            * if it is false on a given arch, then chances are that the FPU
+            * itself is not constant-time, making the point moot).
+            */
+            r = (long)x.v;
+            return r - ((x.v < (double)r) ? 1 : 0);
+        }
+
+        internal long fpr_trunc(FalconFPR x)
+        {
+            return (long)x.v;
+        }
+
+        internal FalconFPR fpr_add(FalconFPR x, FalconFPR y)
+        {
+            return FPR(x.v + y.v);
+        }
+
+        internal FalconFPR fpr_sub(FalconFPR x, FalconFPR y)
+        {
+            return FPR(x.v - y.v);
+        }
+
+        internal FalconFPR fpr_neg(FalconFPR x)
+        {
+            return FPR(-x.v);
+        }
+
+        internal FalconFPR fpr_half(FalconFPR x)
+        {
+            return FPR(x.v * 0.5);
+        }
+
+        internal FalconFPR fpr_double(FalconFPR x)
+        {
+            return FPR(x.v + x.v);
+        }
+
+        internal FalconFPR fpr_mul(FalconFPR x, FalconFPR y)
+        {
+            return FPR(x.v * y.v);
+        }
+
+        internal FalconFPR fpr_sqr(FalconFPR x)
+        {
+            return FPR(x.v * x.v);
+        }
+
+        internal FalconFPR fpr_inv(FalconFPR x)
+        {
+            return FPR(1.0 / x.v);
+        }
+
+        internal FalconFPR fpr_div(FalconFPR x, FalconFPR y)
+        {
+            return FPR(x.v / y.v);
+        }
+
+
+        internal FalconFPR fpr_sqrt(FalconFPR x)
+        {
+            return FPR(System.Math.Sqrt(x.v));
+        }
+
+        internal bool fpr_lt(FalconFPR x, FalconFPR y)
+        {
+            return x.v < y.v;
+        }
+
+        internal ulong fpr_expm_p63(FalconFPR x, FalconFPR ccs)
+        {
+            /*
+            * Polynomial approximation of exp(-x) is taken from FACCT:
+            *   https://eprint.iacr.org/2018/1234
+            * Specifically, values are extracted from the implementation
+            * referenced from the FACCT article, and available at:
+            *   https://github.com/raykzhao/gaussian
+            * Tests over more than 24 billions of random inputs in the
+            * 0..log(2) range have never shown a deviation larger than
+            * 2^(-50) from the true mathematical value.
+            */
+
+
+            /*
+            * Normal implementation uses Horner's method, which minimizes
+            * the number of operations.
+            */
+
+            double d, y;
+
+            d = x.v;
+            y = 0.000000002073772366009083061987;
+            y = 0.000000025299506379442070029551 - y * d;
+            y = 0.000000275607356160477811864927 - y * d;
+            y = 0.000002755586350219122514855659 - y * d;
+            y = 0.000024801566833585381209939524 - y * d;
+            y = 0.000198412739277311890541063977 - y * d;
+            y = 0.001388888894063186997887560103 - y * d;
+            y = 0.008333333327800835146903501993 - y * d;
+            y = 0.041666666666110491190622155955 - y * d;
+            y = 0.166666666666984014666397229121 - y * d;
+            y = 0.500000000000019206858326015208 - y * d;
+            y = 0.999999999999994892974086724280 - y * d;
+            y = 1.000000000000000000000000000000 - y * d;
+            y *= ccs.v;
+            return (ulong)(y * fpr_ptwo63.v);
+
+        }
+
+        internal FalconFPR[] fpr_gm_tab = {
+                new FalconFPR(0), new FalconFPR(0), /* unused */
+                new FalconFPR(-0.000000000000000000000000000), new FalconFPR( 1.000000000000000000000000000),
+                new FalconFPR( 0.707106781186547524400844362), new FalconFPR( 0.707106781186547524400844362),
+                new FalconFPR(-0.707106781186547524400844362), new FalconFPR( 0.707106781186547524400844362),
+                new FalconFPR( 0.923879532511286756128183189), new FalconFPR( 0.382683432365089771728459984),
+                new FalconFPR(-0.382683432365089771728459984), new FalconFPR( 0.923879532511286756128183189),
+                new FalconFPR( 0.382683432365089771728459984), new FalconFPR( 0.923879532511286756128183189),
+                new FalconFPR(-0.923879532511286756128183189), new FalconFPR( 0.382683432365089771728459984),
+                new FalconFPR( 0.980785280403230449126182236), new FalconFPR( 0.195090322016128267848284868),
+                new FalconFPR(-0.195090322016128267848284868), new FalconFPR( 0.980785280403230449126182236),
+                new FalconFPR( 0.555570233019602224742830814), new FalconFPR( 0.831469612302545237078788378),
+                new FalconFPR(-0.831469612302545237078788378), new FalconFPR( 0.555570233019602224742830814),
+                new FalconFPR( 0.831469612302545237078788378), new FalconFPR( 0.555570233019602224742830814),
+                new FalconFPR(-0.555570233019602224742830814), new FalconFPR( 0.831469612302545237078788378),
+                new FalconFPR( 0.195090322016128267848284868), new FalconFPR( 0.980785280403230449126182236),
+                new FalconFPR(-0.980785280403230449126182236), new FalconFPR( 0.195090322016128267848284868),
+                new FalconFPR( 0.995184726672196886244836953), new FalconFPR( 0.098017140329560601994195564),
+                new FalconFPR(-0.098017140329560601994195564), new FalconFPR( 0.995184726672196886244836953),
+                new FalconFPR( 0.634393284163645498215171613), new FalconFPR( 0.773010453362736960810906610),
+                new FalconFPR(-0.773010453362736960810906610), new FalconFPR( 0.634393284163645498215171613),
+                new FalconFPR( 0.881921264348355029712756864), new FalconFPR( 0.471396736825997648556387626),
+                new FalconFPR(-0.471396736825997648556387626), new FalconFPR( 0.881921264348355029712756864),
+                new FalconFPR( 0.290284677254462367636192376), new FalconFPR( 0.956940335732208864935797887),
+                new FalconFPR(-0.956940335732208864935797887), new FalconFPR( 0.290284677254462367636192376),
+                new FalconFPR( 0.956940335732208864935797887), new FalconFPR( 0.290284677254462367636192376),
+                new FalconFPR(-0.290284677254462367636192376), new FalconFPR( 0.956940335732208864935797887),
+                new FalconFPR( 0.471396736825997648556387626), new FalconFPR( 0.881921264348355029712756864),
+                new FalconFPR(-0.881921264348355029712756864), new FalconFPR( 0.471396736825997648556387626),
+                new FalconFPR( 0.773010453362736960810906610), new FalconFPR( 0.634393284163645498215171613),
+                new FalconFPR(-0.634393284163645498215171613), new FalconFPR( 0.773010453362736960810906610),
+                new FalconFPR( 0.098017140329560601994195564), new FalconFPR( 0.995184726672196886244836953),
+                new FalconFPR(-0.995184726672196886244836953), new FalconFPR( 0.098017140329560601994195564),
+                new FalconFPR( 0.998795456205172392714771605), new FalconFPR( 0.049067674327418014254954977),
+                new FalconFPR(-0.049067674327418014254954977), new FalconFPR( 0.998795456205172392714771605),
+                new FalconFPR( 0.671558954847018400625376850), new FalconFPR( 0.740951125354959091175616897),
+                new FalconFPR(-0.740951125354959091175616897), new FalconFPR( 0.671558954847018400625376850),
+                new FalconFPR( 0.903989293123443331586200297), new FalconFPR( 0.427555093430282094320966857),
+                new FalconFPR(-0.427555093430282094320966857), new FalconFPR( 0.903989293123443331586200297),
+                new FalconFPR( 0.336889853392220050689253213), new FalconFPR( 0.941544065183020778412509403),
+                new FalconFPR(-0.941544065183020778412509403), new FalconFPR( 0.336889853392220050689253213),
+                new FalconFPR( 0.970031253194543992603984207), new FalconFPR( 0.242980179903263889948274162),
+                new FalconFPR(-0.242980179903263889948274162), new FalconFPR( 0.970031253194543992603984207),
+                new FalconFPR( 0.514102744193221726593693839), new FalconFPR( 0.857728610000272069902269984),
+                new FalconFPR(-0.857728610000272069902269984), new FalconFPR( 0.514102744193221726593693839),
+                new FalconFPR( 0.803207531480644909806676513), new FalconFPR( 0.595699304492433343467036529),
+                new FalconFPR(-0.595699304492433343467036529), new FalconFPR( 0.803207531480644909806676513),
+                new FalconFPR( 0.146730474455361751658850130), new FalconFPR( 0.989176509964780973451673738),
+                new FalconFPR(-0.989176509964780973451673738), new FalconFPR( 0.146730474455361751658850130),
+                new FalconFPR( 0.989176509964780973451673738), new FalconFPR( 0.146730474455361751658850130),
+                new FalconFPR(-0.146730474455361751658850130), new FalconFPR( 0.989176509964780973451673738),
+                new FalconFPR( 0.595699304492433343467036529), new FalconFPR( 0.803207531480644909806676513),
+                new FalconFPR(-0.803207531480644909806676513), new FalconFPR( 0.595699304492433343467036529),
+                new FalconFPR( 0.857728610000272069902269984), new FalconFPR( 0.514102744193221726593693839),
+                new FalconFPR(-0.514102744193221726593693839), new FalconFPR( 0.857728610000272069902269984),
+                new FalconFPR( 0.242980179903263889948274162), new FalconFPR( 0.970031253194543992603984207),
+                new FalconFPR(-0.970031253194543992603984207), new FalconFPR( 0.242980179903263889948274162),
+                new FalconFPR( 0.941544065183020778412509403), new FalconFPR( 0.336889853392220050689253213),
+                new FalconFPR(-0.336889853392220050689253213), new FalconFPR( 0.941544065183020778412509403),
+                new FalconFPR( 0.427555093430282094320966857), new FalconFPR( 0.903989293123443331586200297),
+                new FalconFPR(-0.903989293123443331586200297), new FalconFPR( 0.427555093430282094320966857),
+                new FalconFPR( 0.740951125354959091175616897), new FalconFPR( 0.671558954847018400625376850),
+                new FalconFPR(-0.671558954847018400625376850), new FalconFPR( 0.740951125354959091175616897),
+                new FalconFPR( 0.049067674327418014254954977), new FalconFPR( 0.998795456205172392714771605),
+                new FalconFPR(-0.998795456205172392714771605), new FalconFPR( 0.049067674327418014254954977),
+                new FalconFPR( 0.999698818696204220115765650), new FalconFPR( 0.024541228522912288031734529),
+                new FalconFPR(-0.024541228522912288031734529), new FalconFPR( 0.999698818696204220115765650),
+                new FalconFPR( 0.689540544737066924616730630), new FalconFPR( 0.724247082951466920941069243),
+                new FalconFPR(-0.724247082951466920941069243), new FalconFPR( 0.689540544737066924616730630),
+                new FalconFPR( 0.914209755703530654635014829), new FalconFPR( 0.405241314004989870908481306),
+                new FalconFPR(-0.405241314004989870908481306), new FalconFPR( 0.914209755703530654635014829),
+                new FalconFPR( 0.359895036534988148775104572), new FalconFPR( 0.932992798834738887711660256),
+                new FalconFPR(-0.932992798834738887711660256), new FalconFPR( 0.359895036534988148775104572),
+                new FalconFPR( 0.975702130038528544460395766), new FalconFPR( 0.219101240156869797227737547),
+                new FalconFPR(-0.219101240156869797227737547), new FalconFPR( 0.975702130038528544460395766),
+                new FalconFPR( 0.534997619887097210663076905), new FalconFPR( 0.844853565249707073259571205),
+                new FalconFPR(-0.844853565249707073259571205), new FalconFPR( 0.534997619887097210663076905),
+                new FalconFPR( 0.817584813151583696504920884), new FalconFPR( 0.575808191417845300745972454),
+                new FalconFPR(-0.575808191417845300745972454), new FalconFPR( 0.817584813151583696504920884),
+                new FalconFPR( 0.170961888760301226363642357), new FalconFPR( 0.985277642388941244774018433),
+                new FalconFPR(-0.985277642388941244774018433), new FalconFPR( 0.170961888760301226363642357),
+                new FalconFPR( 0.992479534598709998156767252), new FalconFPR( 0.122410675199216198498704474),
+                new FalconFPR(-0.122410675199216198498704474), new FalconFPR( 0.992479534598709998156767252),
+                new FalconFPR( 0.615231590580626845484913563), new FalconFPR( 0.788346427626606262009164705),
+                new FalconFPR(-0.788346427626606262009164705), new FalconFPR( 0.615231590580626845484913563),
+                new FalconFPR( 0.870086991108711418652292404), new FalconFPR( 0.492898192229784036873026689),
+                new FalconFPR(-0.492898192229784036873026689), new FalconFPR( 0.870086991108711418652292404),
+                new FalconFPR( 0.266712757474898386325286515), new FalconFPR( 0.963776065795439866686464356),
+                new FalconFPR(-0.963776065795439866686464356), new FalconFPR( 0.266712757474898386325286515),
+                new FalconFPR( 0.949528180593036667195936074), new FalconFPR( 0.313681740398891476656478846),
+                new FalconFPR(-0.313681740398891476656478846), new FalconFPR( 0.949528180593036667195936074),
+                new FalconFPR( 0.449611329654606600046294579), new FalconFPR( 0.893224301195515320342416447),
+                new FalconFPR(-0.893224301195515320342416447), new FalconFPR( 0.449611329654606600046294579),
+                new FalconFPR( 0.757208846506484547575464054), new FalconFPR( 0.653172842953776764084203014),
+                new FalconFPR(-0.653172842953776764084203014), new FalconFPR( 0.757208846506484547575464054),
+                new FalconFPR( 0.073564563599667423529465622), new FalconFPR( 0.997290456678690216135597140),
+                new FalconFPR(-0.997290456678690216135597140), new FalconFPR( 0.073564563599667423529465622),
+                new FalconFPR( 0.997290456678690216135597140), new FalconFPR( 0.073564563599667423529465622),
+                new FalconFPR(-0.073564563599667423529465622), new FalconFPR( 0.997290456678690216135597140),
+                new FalconFPR( 0.653172842953776764084203014), new FalconFPR( 0.757208846506484547575464054),
+                new FalconFPR(-0.757208846506484547575464054), new FalconFPR( 0.653172842953776764084203014),
+                new FalconFPR( 0.893224301195515320342416447), new FalconFPR( 0.449611329654606600046294579),
+                new FalconFPR(-0.449611329654606600046294579), new FalconFPR( 0.893224301195515320342416447),
+                new FalconFPR( 0.313681740398891476656478846), new FalconFPR( 0.949528180593036667195936074),
+                new FalconFPR(-0.949528180593036667195936074), new FalconFPR( 0.313681740398891476656478846),
+                new FalconFPR( 0.963776065795439866686464356), new FalconFPR( 0.266712757474898386325286515),
+                new FalconFPR(-0.266712757474898386325286515), new FalconFPR( 0.963776065795439866686464356),
+                new FalconFPR( 0.492898192229784036873026689), new FalconFPR( 0.870086991108711418652292404),
+                new FalconFPR(-0.870086991108711418652292404), new FalconFPR( 0.492898192229784036873026689),
+                new FalconFPR( 0.788346427626606262009164705), new FalconFPR( 0.615231590580626845484913563),
+                new FalconFPR(-0.615231590580626845484913563), new FalconFPR( 0.788346427626606262009164705),
+                new FalconFPR( 0.122410675199216198498704474), new FalconFPR( 0.992479534598709998156767252),
+                new FalconFPR(-0.992479534598709998156767252), new FalconFPR( 0.122410675199216198498704474),
+                new FalconFPR( 0.985277642388941244774018433), new FalconFPR( 0.170961888760301226363642357),
+                new FalconFPR(-0.170961888760301226363642357), new FalconFPR( 0.985277642388941244774018433),
+                new FalconFPR( 0.575808191417845300745972454), new FalconFPR( 0.817584813151583696504920884),
+                new FalconFPR(-0.817584813151583696504920884), new FalconFPR( 0.575808191417845300745972454),
+                new FalconFPR( 0.844853565249707073259571205), new FalconFPR( 0.534997619887097210663076905),
+                new FalconFPR(-0.534997619887097210663076905), new FalconFPR( 0.844853565249707073259571205),
+                new FalconFPR( 0.219101240156869797227737547), new FalconFPR( 0.975702130038528544460395766),
+                new FalconFPR(-0.975702130038528544460395766), new FalconFPR( 0.219101240156869797227737547),
+                new FalconFPR( 0.932992798834738887711660256), new FalconFPR( 0.359895036534988148775104572),
+                new FalconFPR(-0.359895036534988148775104572), new FalconFPR( 0.932992798834738887711660256),
+                new FalconFPR( 0.405241314004989870908481306), new FalconFPR( 0.914209755703530654635014829),
+                new FalconFPR(-0.914209755703530654635014829), new FalconFPR( 0.405241314004989870908481306),
+                new FalconFPR( 0.724247082951466920941069243), new FalconFPR( 0.689540544737066924616730630),
+                new FalconFPR(-0.689540544737066924616730630), new FalconFPR( 0.724247082951466920941069243),
+                new FalconFPR( 0.024541228522912288031734529), new FalconFPR( 0.999698818696204220115765650),
+                new FalconFPR(-0.999698818696204220115765650), new FalconFPR( 0.024541228522912288031734529),
+                new FalconFPR( 0.999924701839144540921646491), new FalconFPR( 0.012271538285719926079408262),
+                new FalconFPR(-0.012271538285719926079408262), new FalconFPR( 0.999924701839144540921646491),
+                new FalconFPR( 0.698376249408972853554813503), new FalconFPR( 0.715730825283818654125532623),
+                new FalconFPR(-0.715730825283818654125532623), new FalconFPR( 0.698376249408972853554813503),
+                new FalconFPR( 0.919113851690057743908477789), new FalconFPR( 0.393992040061048108596188661),
+                new FalconFPR(-0.393992040061048108596188661), new FalconFPR( 0.919113851690057743908477789),
+                new FalconFPR( 0.371317193951837543411934967), new FalconFPR( 0.928506080473215565937167396),
+                new FalconFPR(-0.928506080473215565937167396), new FalconFPR( 0.371317193951837543411934967),
+                new FalconFPR( 0.978317370719627633106240097), new FalconFPR( 0.207111376192218549708116020),
+                new FalconFPR(-0.207111376192218549708116020), new FalconFPR( 0.978317370719627633106240097),
+                new FalconFPR( 0.545324988422046422313987347), new FalconFPR( 0.838224705554838043186996856),
+                new FalconFPR(-0.838224705554838043186996856), new FalconFPR( 0.545324988422046422313987347),
+                new FalconFPR( 0.824589302785025264474803737), new FalconFPR( 0.565731810783613197389765011),
+                new FalconFPR(-0.565731810783613197389765011), new FalconFPR( 0.824589302785025264474803737),
+                new FalconFPR( 0.183039887955140958516532578), new FalconFPR( 0.983105487431216327180301155),
+                new FalconFPR(-0.983105487431216327180301155), new FalconFPR( 0.183039887955140958516532578),
+                new FalconFPR( 0.993906970002356041546922813), new FalconFPR( 0.110222207293883058807899140),
+                new FalconFPR(-0.110222207293883058807899140), new FalconFPR( 0.993906970002356041546922813),
+                new FalconFPR( 0.624859488142386377084072816), new FalconFPR( 0.780737228572094478301588484),
+                new FalconFPR(-0.780737228572094478301588484), new FalconFPR( 0.624859488142386377084072816),
+                new FalconFPR( 0.876070094195406607095844268), new FalconFPR( 0.482183772079122748517344481),
+                new FalconFPR(-0.482183772079122748517344481), new FalconFPR( 0.876070094195406607095844268),
+                new FalconFPR( 0.278519689385053105207848526), new FalconFPR( 0.960430519415565811199035138),
+                new FalconFPR(-0.960430519415565811199035138), new FalconFPR( 0.278519689385053105207848526),
+                new FalconFPR( 0.953306040354193836916740383), new FalconFPR( 0.302005949319228067003463232),
+                new FalconFPR(-0.302005949319228067003463232), new FalconFPR( 0.953306040354193836916740383),
+                new FalconFPR( 0.460538710958240023633181487), new FalconFPR( 0.887639620402853947760181617),
+                new FalconFPR(-0.887639620402853947760181617), new FalconFPR( 0.460538710958240023633181487),
+                new FalconFPR( 0.765167265622458925888815999), new FalconFPR( 0.643831542889791465068086063),
+                new FalconFPR(-0.643831542889791465068086063), new FalconFPR( 0.765167265622458925888815999),
+                new FalconFPR( 0.085797312344439890461556332), new FalconFPR( 0.996312612182778012627226190),
+                new FalconFPR(-0.996312612182778012627226190), new FalconFPR( 0.085797312344439890461556332),
+                new FalconFPR( 0.998118112900149207125155861), new FalconFPR( 0.061320736302208577782614593),
+                new FalconFPR(-0.061320736302208577782614593), new FalconFPR( 0.998118112900149207125155861),
+                new FalconFPR( 0.662415777590171761113069817), new FalconFPR( 0.749136394523459325469203257),
+                new FalconFPR(-0.749136394523459325469203257), new FalconFPR( 0.662415777590171761113069817),
+                new FalconFPR( 0.898674465693953843041976744), new FalconFPR( 0.438616238538527637647025738),
+                new FalconFPR(-0.438616238538527637647025738), new FalconFPR( 0.898674465693953843041976744),
+                new FalconFPR( 0.325310292162262934135954708), new FalconFPR( 0.945607325380521325730945387),
+                new FalconFPR(-0.945607325380521325730945387), new FalconFPR( 0.325310292162262934135954708),
+                new FalconFPR( 0.966976471044852109087220226), new FalconFPR( 0.254865659604514571553980779),
+                new FalconFPR(-0.254865659604514571553980779), new FalconFPR( 0.966976471044852109087220226),
+                new FalconFPR( 0.503538383725717558691867071), new FalconFPR( 0.863972856121586737918147054),
+                new FalconFPR(-0.863972856121586737918147054), new FalconFPR( 0.503538383725717558691867071),
+                new FalconFPR( 0.795836904608883536262791915), new FalconFPR( 0.605511041404325513920626941),
+                new FalconFPR(-0.605511041404325513920626941), new FalconFPR( 0.795836904608883536262791915),
+                new FalconFPR( 0.134580708507126186316358409), new FalconFPR( 0.990902635427780025108237011),
+                new FalconFPR(-0.990902635427780025108237011), new FalconFPR( 0.134580708507126186316358409),
+                new FalconFPR( 0.987301418157858382399815802), new FalconFPR( 0.158858143333861441684385360),
+                new FalconFPR(-0.158858143333861441684385360), new FalconFPR( 0.987301418157858382399815802),
+                new FalconFPR( 0.585797857456438860328080838), new FalconFPR( 0.810457198252594791726703434),
+                new FalconFPR(-0.810457198252594791726703434), new FalconFPR( 0.585797857456438860328080838),
+                new FalconFPR( 0.851355193105265142261290312), new FalconFPR( 0.524589682678468906215098464),
+                new FalconFPR(-0.524589682678468906215098464), new FalconFPR( 0.851355193105265142261290312),
+                new FalconFPR( 0.231058108280671119643236018), new FalconFPR( 0.972939952205560145467720114),
+                new FalconFPR(-0.972939952205560145467720114), new FalconFPR( 0.231058108280671119643236018),
+                new FalconFPR( 0.937339011912574923201899593), new FalconFPR( 0.348418680249434568419308588),
+                new FalconFPR(-0.348418680249434568419308588), new FalconFPR( 0.937339011912574923201899593),
+                new FalconFPR( 0.416429560097637182562598911), new FalconFPR( 0.909167983090522376563884788),
+                new FalconFPR(-0.909167983090522376563884788), new FalconFPR( 0.416429560097637182562598911),
+                new FalconFPR( 0.732654271672412834615546649), new FalconFPR( 0.680600997795453050594430464),
+                new FalconFPR(-0.680600997795453050594430464), new FalconFPR( 0.732654271672412834615546649),
+                new FalconFPR( 0.036807222941358832324332691), new FalconFPR( 0.999322384588349500896221011),
+                new FalconFPR(-0.999322384588349500896221011), new FalconFPR( 0.036807222941358832324332691),
+                new FalconFPR( 0.999322384588349500896221011), new FalconFPR( 0.036807222941358832324332691),
+                new FalconFPR(-0.036807222941358832324332691), new FalconFPR( 0.999322384588349500896221011),
+                new FalconFPR( 0.680600997795453050594430464), new FalconFPR( 0.732654271672412834615546649),
+                new FalconFPR(-0.732654271672412834615546649), new FalconFPR( 0.680600997795453050594430464),
+                new FalconFPR( 0.909167983090522376563884788), new FalconFPR( 0.416429560097637182562598911),
+                new FalconFPR(-0.416429560097637182562598911), new FalconFPR( 0.909167983090522376563884788),
+                new FalconFPR( 0.348418680249434568419308588), new FalconFPR( 0.937339011912574923201899593),
+                new FalconFPR(-0.937339011912574923201899593), new FalconFPR( 0.348418680249434568419308588),
+                new FalconFPR( 0.972939952205560145467720114), new FalconFPR( 0.231058108280671119643236018),
+                new FalconFPR(-0.231058108280671119643236018), new FalconFPR( 0.972939952205560145467720114),
+                new FalconFPR( 0.524589682678468906215098464), new FalconFPR( 0.851355193105265142261290312),
+                new FalconFPR(-0.851355193105265142261290312), new FalconFPR( 0.524589682678468906215098464),
+                new FalconFPR( 0.810457198252594791726703434), new FalconFPR( 0.585797857456438860328080838),
+                new FalconFPR(-0.585797857456438860328080838), new FalconFPR( 0.810457198252594791726703434),
+                new FalconFPR( 0.158858143333861441684385360), new FalconFPR( 0.987301418157858382399815802),
+                new FalconFPR(-0.987301418157858382399815802), new FalconFPR( 0.158858143333861441684385360),
+                new FalconFPR( 0.990902635427780025108237011), new FalconFPR( 0.134580708507126186316358409),
+                new FalconFPR(-0.134580708507126186316358409), new FalconFPR( 0.990902635427780025108237011),
+                new FalconFPR( 0.605511041404325513920626941), new FalconFPR( 0.795836904608883536262791915),
+                new FalconFPR(-0.795836904608883536262791915), new FalconFPR( 0.605511041404325513920626941),
+                new FalconFPR( 0.863972856121586737918147054), new FalconFPR( 0.503538383725717558691867071),
+                new FalconFPR(-0.503538383725717558691867071), new FalconFPR( 0.863972856121586737918147054),
+                new FalconFPR( 0.254865659604514571553980779), new FalconFPR( 0.966976471044852109087220226),
+                new FalconFPR(-0.966976471044852109087220226), new FalconFPR( 0.254865659604514571553980779),
+                new FalconFPR( 0.945607325380521325730945387), new FalconFPR( 0.325310292162262934135954708),
+                new FalconFPR(-0.325310292162262934135954708), new FalconFPR( 0.945607325380521325730945387),
+                new FalconFPR( 0.438616238538527637647025738), new FalconFPR( 0.898674465693953843041976744),
+                new FalconFPR(-0.898674465693953843041976744), new FalconFPR( 0.438616238538527637647025738),
+                new FalconFPR( 0.749136394523459325469203257), new FalconFPR( 0.662415777590171761113069817),
+                new FalconFPR(-0.662415777590171761113069817), new FalconFPR( 0.749136394523459325469203257),
+                new FalconFPR( 0.061320736302208577782614593), new FalconFPR( 0.998118112900149207125155861),
+                new FalconFPR(-0.998118112900149207125155861), new FalconFPR( 0.061320736302208577782614593),
+                new FalconFPR( 0.996312612182778012627226190), new FalconFPR( 0.085797312344439890461556332),
+                new FalconFPR(-0.085797312344439890461556332), new FalconFPR( 0.996312612182778012627226190),
+                new FalconFPR( 0.643831542889791465068086063), new FalconFPR( 0.765167265622458925888815999),
+                new FalconFPR(-0.765167265622458925888815999), new FalconFPR( 0.643831542889791465068086063),
+                new FalconFPR( 0.887639620402853947760181617), new FalconFPR( 0.460538710958240023633181487),
+                new FalconFPR(-0.460538710958240023633181487), new FalconFPR( 0.887639620402853947760181617),
+                new FalconFPR( 0.302005949319228067003463232), new FalconFPR( 0.953306040354193836916740383),
+                new FalconFPR(-0.953306040354193836916740383), new FalconFPR( 0.302005949319228067003463232),
+                new FalconFPR( 0.960430519415565811199035138), new FalconFPR( 0.278519689385053105207848526),
+                new FalconFPR(-0.278519689385053105207848526), new FalconFPR( 0.960430519415565811199035138),
+                new FalconFPR( 0.482183772079122748517344481), new FalconFPR( 0.876070094195406607095844268),
+                new FalconFPR(-0.876070094195406607095844268), new FalconFPR( 0.482183772079122748517344481),
+                new FalconFPR( 0.780737228572094478301588484), new FalconFPR( 0.624859488142386377084072816),
+                new FalconFPR(-0.624859488142386377084072816), new FalconFPR( 0.780737228572094478301588484),
+                new FalconFPR( 0.110222207293883058807899140), new FalconFPR( 0.993906970002356041546922813),
+                new FalconFPR(-0.993906970002356041546922813), new FalconFPR( 0.110222207293883058807899140),
+                new FalconFPR( 0.983105487431216327180301155), new FalconFPR( 0.183039887955140958516532578),
+                new FalconFPR(-0.183039887955140958516532578), new FalconFPR( 0.983105487431216327180301155),
+                new FalconFPR( 0.565731810783613197389765011), new FalconFPR( 0.824589302785025264474803737),
+                new FalconFPR(-0.824589302785025264474803737), new FalconFPR( 0.565731810783613197389765011),
+                new FalconFPR( 0.838224705554838043186996856), new FalconFPR( 0.545324988422046422313987347),
+                new FalconFPR(-0.545324988422046422313987347), new FalconFPR( 0.838224705554838043186996856),
+                new FalconFPR( 0.207111376192218549708116020), new FalconFPR( 0.978317370719627633106240097),
+                new FalconFPR(-0.978317370719627633106240097), new FalconFPR( 0.207111376192218549708116020),
+                new FalconFPR( 0.928506080473215565937167396), new FalconFPR( 0.371317193951837543411934967),
+                new FalconFPR(-0.371317193951837543411934967), new FalconFPR( 0.928506080473215565937167396),
+                new FalconFPR( 0.393992040061048108596188661), new FalconFPR( 0.919113851690057743908477789),
+                new FalconFPR(-0.919113851690057743908477789), new FalconFPR( 0.393992040061048108596188661),
+                new FalconFPR( 0.715730825283818654125532623), new FalconFPR( 0.698376249408972853554813503),
+                new FalconFPR(-0.698376249408972853554813503), new FalconFPR( 0.715730825283818654125532623),
+                new FalconFPR( 0.012271538285719926079408262), new FalconFPR( 0.999924701839144540921646491),
+                new FalconFPR(-0.999924701839144540921646491), new FalconFPR( 0.012271538285719926079408262),
+                new FalconFPR( 0.999981175282601142656990438), new FalconFPR( 0.006135884649154475359640235),
+                new FalconFPR(-0.006135884649154475359640235), new FalconFPR( 0.999981175282601142656990438),
+                new FalconFPR( 0.702754744457225302452914421), new FalconFPR( 0.711432195745216441522130290),
+                new FalconFPR(-0.711432195745216441522130290), new FalconFPR( 0.702754744457225302452914421),
+                new FalconFPR( 0.921514039342041943465396332), new FalconFPR( 0.388345046698826291624993541),
+                new FalconFPR(-0.388345046698826291624993541), new FalconFPR( 0.921514039342041943465396332),
+                new FalconFPR( 0.377007410216418256726567823), new FalconFPR( 0.926210242138311341974793388),
+                new FalconFPR(-0.926210242138311341974793388), new FalconFPR( 0.377007410216418256726567823),
+                new FalconFPR( 0.979569765685440534439326110), new FalconFPR( 0.201104634842091911558443546),
+                new FalconFPR(-0.201104634842091911558443546), new FalconFPR( 0.979569765685440534439326110),
+                new FalconFPR( 0.550457972936604802977289893), new FalconFPR( 0.834862874986380056304401383),
+                new FalconFPR(-0.834862874986380056304401383), new FalconFPR( 0.550457972936604802977289893),
+                new FalconFPR( 0.828045045257755752067527592), new FalconFPR( 0.560661576197336023839710223),
+                new FalconFPR(-0.560661576197336023839710223), new FalconFPR( 0.828045045257755752067527592),
+                new FalconFPR( 0.189068664149806212754997837), new FalconFPR( 0.981963869109555264072848154),
+                new FalconFPR(-0.981963869109555264072848154), new FalconFPR( 0.189068664149806212754997837),
+                new FalconFPR( 0.994564570734255452119106243), new FalconFPR( 0.104121633872054579120943880),
+                new FalconFPR(-0.104121633872054579120943880), new FalconFPR( 0.994564570734255452119106243),
+                new FalconFPR( 0.629638238914927025372981341), new FalconFPR( 0.776888465673232450040827983),
+                new FalconFPR(-0.776888465673232450040827983), new FalconFPR( 0.629638238914927025372981341),
+                new FalconFPR( 0.879012226428633477831323711), new FalconFPR( 0.476799230063322133342158117),
+                new FalconFPR(-0.476799230063322133342158117), new FalconFPR( 0.879012226428633477831323711),
+                new FalconFPR( 0.284407537211271843618310615), new FalconFPR( 0.958703474895871555374645792),
+                new FalconFPR(-0.958703474895871555374645792), new FalconFPR( 0.284407537211271843618310615),
+                new FalconFPR( 0.955141168305770721498157712), new FalconFPR( 0.296150888243623824121786128),
+                new FalconFPR(-0.296150888243623824121786128), new FalconFPR( 0.955141168305770721498157712),
+                new FalconFPR( 0.465976495767966177902756065), new FalconFPR( 0.884797098430937780104007041),
+                new FalconFPR(-0.884797098430937780104007041), new FalconFPR( 0.465976495767966177902756065),
+                new FalconFPR( 0.769103337645579639346626069), new FalconFPR( 0.639124444863775743801488193),
+                new FalconFPR(-0.639124444863775743801488193), new FalconFPR( 0.769103337645579639346626069),
+                new FalconFPR( 0.091908956497132728624990979), new FalconFPR( 0.995767414467659793982495643),
+                new FalconFPR(-0.995767414467659793982495643), new FalconFPR( 0.091908956497132728624990979),
+                new FalconFPR( 0.998475580573294752208559038), new FalconFPR( 0.055195244349689939809447526),
+                new FalconFPR(-0.055195244349689939809447526), new FalconFPR( 0.998475580573294752208559038),
+                new FalconFPR( 0.666999922303637506650154222), new FalconFPR( 0.745057785441465962407907310),
+                new FalconFPR(-0.745057785441465962407907310), new FalconFPR( 0.666999922303637506650154222),
+                new FalconFPR( 0.901348847046022014570746093), new FalconFPR( 0.433093818853151968484222638),
+                new FalconFPR(-0.433093818853151968484222638), new FalconFPR( 0.901348847046022014570746093),
+                new FalconFPR( 0.331106305759876401737190737), new FalconFPR( 0.943593458161960361495301445),
+                new FalconFPR(-0.943593458161960361495301445), new FalconFPR( 0.331106305759876401737190737),
+                new FalconFPR( 0.968522094274417316221088329), new FalconFPR( 0.248927605745720168110682816),
+                new FalconFPR(-0.248927605745720168110682816), new FalconFPR( 0.968522094274417316221088329),
+                new FalconFPR( 0.508830142543107036931749324), new FalconFPR( 0.860866938637767279344583877),
+                new FalconFPR(-0.860866938637767279344583877), new FalconFPR( 0.508830142543107036931749324),
+                new FalconFPR( 0.799537269107905033500246232), new FalconFPR( 0.600616479383868926653875896),
+                new FalconFPR(-0.600616479383868926653875896), new FalconFPR( 0.799537269107905033500246232),
+                new FalconFPR( 0.140658239332849230714788846), new FalconFPR( 0.990058210262297105505906464),
+                new FalconFPR(-0.990058210262297105505906464), new FalconFPR( 0.140658239332849230714788846),
+                new FalconFPR( 0.988257567730749491404792538), new FalconFPR( 0.152797185258443427720336613),
+                new FalconFPR(-0.152797185258443427720336613), new FalconFPR( 0.988257567730749491404792538),
+                new FalconFPR( 0.590759701858874228423887908), new FalconFPR( 0.806847553543799272206514313),
+                new FalconFPR(-0.806847553543799272206514313), new FalconFPR( 0.590759701858874228423887908),
+                new FalconFPR( 0.854557988365400520767862276), new FalconFPR( 0.519355990165589587361829932),
+                new FalconFPR(-0.519355990165589587361829932), new FalconFPR( 0.854557988365400520767862276),
+                new FalconFPR( 0.237023605994367206867735915), new FalconFPR( 0.971503890986251775537099622),
+                new FalconFPR(-0.971503890986251775537099622), new FalconFPR( 0.237023605994367206867735915),
+                new FalconFPR( 0.939459223602189911962669246), new FalconFPR( 0.342660717311994397592781983),
+                new FalconFPR(-0.342660717311994397592781983), new FalconFPR( 0.939459223602189911962669246),
+                new FalconFPR( 0.422000270799799685941287941), new FalconFPR( 0.906595704514915365332960588),
+                new FalconFPR(-0.906595704514915365332960588), new FalconFPR( 0.422000270799799685941287941),
+                new FalconFPR( 0.736816568877369875090132520), new FalconFPR( 0.676092703575315960360419228),
+                new FalconFPR(-0.676092703575315960360419228), new FalconFPR( 0.736816568877369875090132520),
+                new FalconFPR( 0.042938256934940823077124540), new FalconFPR( 0.999077727752645382888781997),
+                new FalconFPR(-0.999077727752645382888781997), new FalconFPR( 0.042938256934940823077124540),
+                new FalconFPR( 0.999529417501093163079703322), new FalconFPR( 0.030674803176636625934021028),
+                new FalconFPR(-0.030674803176636625934021028), new FalconFPR( 0.999529417501093163079703322),
+                new FalconFPR( 0.685083667772700381362052545), new FalconFPR( 0.728464390448225196492035438),
+                new FalconFPR(-0.728464390448225196492035438), new FalconFPR( 0.685083667772700381362052545),
+                new FalconFPR( 0.911706032005429851404397325), new FalconFPR( 0.410843171057903942183466675),
+                new FalconFPR(-0.410843171057903942183466675), new FalconFPR( 0.911706032005429851404397325),
+                new FalconFPR( 0.354163525420490382357395796), new FalconFPR( 0.935183509938947577642207480),
+                new FalconFPR(-0.935183509938947577642207480), new FalconFPR( 0.354163525420490382357395796),
+                new FalconFPR( 0.974339382785575860518721668), new FalconFPR( 0.225083911359792835991642120),
+                new FalconFPR(-0.225083911359792835991642120), new FalconFPR( 0.974339382785575860518721668),
+                new FalconFPR( 0.529803624686294668216054671), new FalconFPR( 0.848120344803297251279133563),
+                new FalconFPR(-0.848120344803297251279133563), new FalconFPR( 0.529803624686294668216054671),
+                new FalconFPR( 0.814036329705948361654516690), new FalconFPR( 0.580813958095764545075595272),
+                new FalconFPR(-0.580813958095764545075595272), new FalconFPR( 0.814036329705948361654516690),
+                new FalconFPR( 0.164913120489969921418189113), new FalconFPR( 0.986308097244598647863297524),
+                new FalconFPR(-0.986308097244598647863297524), new FalconFPR( 0.164913120489969921418189113),
+                new FalconFPR( 0.991709753669099522860049931), new FalconFPR( 0.128498110793793172624415589),
+                new FalconFPR(-0.128498110793793172624415589), new FalconFPR( 0.991709753669099522860049931),
+                new FalconFPR( 0.610382806276309452716352152), new FalconFPR( 0.792106577300212351782342879),
+                new FalconFPR(-0.792106577300212351782342879), new FalconFPR( 0.610382806276309452716352152),
+                new FalconFPR( 0.867046245515692651480195629), new FalconFPR( 0.498227666972781852410983869),
+                new FalconFPR(-0.498227666972781852410983869), new FalconFPR( 0.867046245515692651480195629),
+                new FalconFPR( 0.260794117915275518280186509), new FalconFPR( 0.965394441697689374550843858),
+                new FalconFPR(-0.965394441697689374550843858), new FalconFPR( 0.260794117915275518280186509),
+                new FalconFPR( 0.947585591017741134653387321), new FalconFPR( 0.319502030816015677901518272),
+                new FalconFPR(-0.319502030816015677901518272), new FalconFPR( 0.947585591017741134653387321),
+                new FalconFPR( 0.444122144570429231642069418), new FalconFPR( 0.895966249756185155914560282),
+                new FalconFPR(-0.895966249756185155914560282), new FalconFPR( 0.444122144570429231642069418),
+                new FalconFPR( 0.753186799043612482483430486), new FalconFPR( 0.657806693297078656931182264),
+                new FalconFPR(-0.657806693297078656931182264), new FalconFPR( 0.753186799043612482483430486),
+                new FalconFPR( 0.067443919563664057897972422), new FalconFPR( 0.997723066644191609848546728),
+                new FalconFPR(-0.997723066644191609848546728), new FalconFPR( 0.067443919563664057897972422),
+                new FalconFPR( 0.996820299291165714972629398), new FalconFPR( 0.079682437971430121147120656),
+                new FalconFPR(-0.079682437971430121147120656), new FalconFPR( 0.996820299291165714972629398),
+                new FalconFPR( 0.648514401022112445084560551), new FalconFPR( 0.761202385484261814029709836),
+                new FalconFPR(-0.761202385484261814029709836), new FalconFPR( 0.648514401022112445084560551),
+                new FalconFPR( 0.890448723244757889952150560), new FalconFPR( 0.455083587126343823535869268),
+                new FalconFPR(-0.455083587126343823535869268), new FalconFPR( 0.890448723244757889952150560),
+                new FalconFPR( 0.307849640041534893682063646), new FalconFPR( 0.951435020969008369549175569),
+                new FalconFPR(-0.951435020969008369549175569), new FalconFPR( 0.307849640041534893682063646),
+                new FalconFPR( 0.962121404269041595429604316), new FalconFPR( 0.272621355449948984493347477),
+                new FalconFPR(-0.272621355449948984493347477), new FalconFPR( 0.962121404269041595429604316),
+                new FalconFPR( 0.487550160148435954641485027), new FalconFPR( 0.873094978418290098636085973),
+                new FalconFPR(-0.873094978418290098636085973), new FalconFPR( 0.487550160148435954641485027),
+                new FalconFPR( 0.784556597155575233023892575), new FalconFPR( 0.620057211763289178646268191),
+                new FalconFPR(-0.620057211763289178646268191), new FalconFPR( 0.784556597155575233023892575),
+                new FalconFPR( 0.116318630911904767252544319), new FalconFPR( 0.993211949234794533104601012),
+                new FalconFPR(-0.993211949234794533104601012), new FalconFPR( 0.116318630911904767252544319),
+                new FalconFPR( 0.984210092386929073193874387), new FalconFPR( 0.177004220412148756196839844),
+                new FalconFPR(-0.177004220412148756196839844), new FalconFPR( 0.984210092386929073193874387),
+                new FalconFPR( 0.570780745886967280232652864), new FalconFPR( 0.821102514991104679060430820),
+                new FalconFPR(-0.821102514991104679060430820), new FalconFPR( 0.570780745886967280232652864),
+                new FalconFPR( 0.841554977436898409603499520), new FalconFPR( 0.540171472729892881297845480),
+                new FalconFPR(-0.540171472729892881297845480), new FalconFPR( 0.841554977436898409603499520),
+                new FalconFPR( 0.213110319916091373967757518), new FalconFPR( 0.977028142657754351485866211),
+                new FalconFPR(-0.977028142657754351485866211), new FalconFPR( 0.213110319916091373967757518),
+                new FalconFPR( 0.930766961078983731944872340), new FalconFPR( 0.365612997804773870011745909),
+                new FalconFPR(-0.365612997804773870011745909), new FalconFPR( 0.930766961078983731944872340),
+                new FalconFPR( 0.399624199845646828544117031), new FalconFPR( 0.916679059921042663116457013),
+                new FalconFPR(-0.916679059921042663116457013), new FalconFPR( 0.399624199845646828544117031),
+                new FalconFPR( 0.720002507961381629076682999), new FalconFPR( 0.693971460889654009003734389),
+                new FalconFPR(-0.693971460889654009003734389), new FalconFPR( 0.720002507961381629076682999),
+                new FalconFPR( 0.018406729905804820927366313), new FalconFPR( 0.999830581795823422015722275),
+                new FalconFPR(-0.999830581795823422015722275), new FalconFPR( 0.018406729905804820927366313),
+                new FalconFPR( 0.999830581795823422015722275), new FalconFPR( 0.018406729905804820927366313),
+                new FalconFPR(-0.018406729905804820927366313), new FalconFPR( 0.999830581795823422015722275),
+                new FalconFPR( 0.693971460889654009003734389), new FalconFPR( 0.720002507961381629076682999),
+                new FalconFPR(-0.720002507961381629076682999), new FalconFPR( 0.693971460889654009003734389),
+                new FalconFPR( 0.916679059921042663116457013), new FalconFPR( 0.399624199845646828544117031),
+                new FalconFPR(-0.399624199845646828544117031), new FalconFPR( 0.916679059921042663116457013),
+                new FalconFPR( 0.365612997804773870011745909), new FalconFPR( 0.930766961078983731944872340),
+                new FalconFPR(-0.930766961078983731944872340), new FalconFPR( 0.365612997804773870011745909),
+                new FalconFPR( 0.977028142657754351485866211), new FalconFPR( 0.213110319916091373967757518),
+                new FalconFPR(-0.213110319916091373967757518), new FalconFPR( 0.977028142657754351485866211),
+                new FalconFPR( 0.540171472729892881297845480), new FalconFPR( 0.841554977436898409603499520),
+                new FalconFPR(-0.841554977436898409603499520), new FalconFPR( 0.540171472729892881297845480),
+                new FalconFPR( 0.821102514991104679060430820), new FalconFPR( 0.570780745886967280232652864),
+                new FalconFPR(-0.570780745886967280232652864), new FalconFPR( 0.821102514991104679060430820),
+                new FalconFPR( 0.177004220412148756196839844), new FalconFPR( 0.984210092386929073193874387),
+                new FalconFPR(-0.984210092386929073193874387), new FalconFPR( 0.177004220412148756196839844),
+                new FalconFPR( 0.993211949234794533104601012), new FalconFPR( 0.116318630911904767252544319),
+                new FalconFPR(-0.116318630911904767252544319), new FalconFPR( 0.993211949234794533104601012),
+                new FalconFPR( 0.620057211763289178646268191), new FalconFPR( 0.784556597155575233023892575),
+                new FalconFPR(-0.784556597155575233023892575), new FalconFPR( 0.620057211763289178646268191),
+                new FalconFPR( 0.873094978418290098636085973), new FalconFPR( 0.487550160148435954641485027),
+                new FalconFPR(-0.487550160148435954641485027), new FalconFPR( 0.873094978418290098636085973),
+                new FalconFPR( 0.272621355449948984493347477), new FalconFPR( 0.962121404269041595429604316),
+                new FalconFPR(-0.962121404269041595429604316), new FalconFPR( 0.272621355449948984493347477),
+                new FalconFPR( 0.951435020969008369549175569), new FalconFPR( 0.307849640041534893682063646),
+                new FalconFPR(-0.307849640041534893682063646), new FalconFPR( 0.951435020969008369549175569),
+                new FalconFPR( 0.455083587126343823535869268), new FalconFPR( 0.890448723244757889952150560),
+                new FalconFPR(-0.890448723244757889952150560), new FalconFPR( 0.455083587126343823535869268),
+                new FalconFPR( 0.761202385484261814029709836), new FalconFPR( 0.648514401022112445084560551),
+                new FalconFPR(-0.648514401022112445084560551), new FalconFPR( 0.761202385484261814029709836),
+                new FalconFPR( 0.079682437971430121147120656), new FalconFPR( 0.996820299291165714972629398),
+                new FalconFPR(-0.996820299291165714972629398), new FalconFPR( 0.079682437971430121147120656),
+                new FalconFPR( 0.997723066644191609848546728), new FalconFPR( 0.067443919563664057897972422),
+                new FalconFPR(-0.067443919563664057897972422), new FalconFPR( 0.997723066644191609848546728),
+                new FalconFPR( 0.657806693297078656931182264), new FalconFPR( 0.753186799043612482483430486),
+                new FalconFPR(-0.753186799043612482483430486), new FalconFPR( 0.657806693297078656931182264),
+                new FalconFPR( 0.895966249756185155914560282), new FalconFPR( 0.444122144570429231642069418),
+                new FalconFPR(-0.444122144570429231642069418), new FalconFPR( 0.895966249756185155914560282),
+                new FalconFPR( 0.319502030816015677901518272), new FalconFPR( 0.947585591017741134653387321),
+                new FalconFPR(-0.947585591017741134653387321), new FalconFPR( 0.319502030816015677901518272),
+                new FalconFPR( 0.965394441697689374550843858), new FalconFPR( 0.260794117915275518280186509),
+                new FalconFPR(-0.260794117915275518280186509), new FalconFPR( 0.965394441697689374550843858),
+                new FalconFPR( 0.498227666972781852410983869), new FalconFPR( 0.867046245515692651480195629),
+                new FalconFPR(-0.867046245515692651480195629), new FalconFPR( 0.498227666972781852410983869),
+                new FalconFPR( 0.792106577300212351782342879), new FalconFPR( 0.610382806276309452716352152),
+                new FalconFPR(-0.610382806276309452716352152), new FalconFPR( 0.792106577300212351782342879),
+                new FalconFPR( 0.128498110793793172624415589), new FalconFPR( 0.991709753669099522860049931),
+                new FalconFPR(-0.991709753669099522860049931), new FalconFPR( 0.128498110793793172624415589),
+                new FalconFPR( 0.986308097244598647863297524), new FalconFPR( 0.164913120489969921418189113),
+                new FalconFPR(-0.164913120489969921418189113), new FalconFPR( 0.986308097244598647863297524),
+                new FalconFPR( 0.580813958095764545075595272), new FalconFPR( 0.814036329705948361654516690),
+                new FalconFPR(-0.814036329705948361654516690), new FalconFPR( 0.580813958095764545075595272),
+                new FalconFPR( 0.848120344803297251279133563), new FalconFPR( 0.529803624686294668216054671),
+                new FalconFPR(-0.529803624686294668216054671), new FalconFPR( 0.848120344803297251279133563),
+                new FalconFPR( 0.225083911359792835991642120), new FalconFPR( 0.974339382785575860518721668),
+                new FalconFPR(-0.974339382785575860518721668), new FalconFPR( 0.225083911359792835991642120),
+                new FalconFPR( 0.935183509938947577642207480), new FalconFPR( 0.354163525420490382357395796),
+                new FalconFPR(-0.354163525420490382357395796), new FalconFPR( 0.935183509938947577642207480),
+                new FalconFPR( 0.410843171057903942183466675), new FalconFPR( 0.911706032005429851404397325),
+                new FalconFPR(-0.911706032005429851404397325), new FalconFPR( 0.410843171057903942183466675),
+                new FalconFPR( 0.728464390448225196492035438), new FalconFPR( 0.685083667772700381362052545),
+                new FalconFPR(-0.685083667772700381362052545), new FalconFPR( 0.728464390448225196492035438),
+                new FalconFPR( 0.030674803176636625934021028), new FalconFPR( 0.999529417501093163079703322),
+                new FalconFPR(-0.999529417501093163079703322), new FalconFPR( 0.030674803176636625934021028),
+                new FalconFPR( 0.999077727752645382888781997), new FalconFPR( 0.042938256934940823077124540),
+                new FalconFPR(-0.042938256934940823077124540), new FalconFPR( 0.999077727752645382888781997),
+                new FalconFPR( 0.676092703575315960360419228), new FalconFPR( 0.736816568877369875090132520),
+                new FalconFPR(-0.736816568877369875090132520), new FalconFPR( 0.676092703575315960360419228),
+                new FalconFPR( 0.906595704514915365332960588), new FalconFPR( 0.422000270799799685941287941),
+                new FalconFPR(-0.422000270799799685941287941), new FalconFPR( 0.906595704514915365332960588),
+                new FalconFPR( 0.342660717311994397592781983), new FalconFPR( 0.939459223602189911962669246),
+                new FalconFPR(-0.939459223602189911962669246), new FalconFPR( 0.342660717311994397592781983),
+                new FalconFPR( 0.971503890986251775537099622), new FalconFPR( 0.237023605994367206867735915),
+                new FalconFPR(-0.237023605994367206867735915), new FalconFPR( 0.971503890986251775537099622),
+                new FalconFPR( 0.519355990165589587361829932), new FalconFPR( 0.854557988365400520767862276),
+                new FalconFPR(-0.854557988365400520767862276), new FalconFPR( 0.519355990165589587361829932),
+                new FalconFPR( 0.806847553543799272206514313), new FalconFPR( 0.590759701858874228423887908),
+                new FalconFPR(-0.590759701858874228423887908), new FalconFPR( 0.806847553543799272206514313),
+                new FalconFPR( 0.152797185258443427720336613), new FalconFPR( 0.988257567730749491404792538),
+                new FalconFPR(-0.988257567730749491404792538), new FalconFPR( 0.152797185258443427720336613),
+                new FalconFPR( 0.990058210262297105505906464), new FalconFPR( 0.140658239332849230714788846),
+                new FalconFPR(-0.140658239332849230714788846), new FalconFPR( 0.990058210262297105505906464),
+                new FalconFPR( 0.600616479383868926653875896), new FalconFPR( 0.799537269107905033500246232),
+                new FalconFPR(-0.799537269107905033500246232), new FalconFPR( 0.600616479383868926653875896),
+                new FalconFPR( 0.860866938637767279344583877), new FalconFPR( 0.508830142543107036931749324),
+                new FalconFPR(-0.508830142543107036931749324), new FalconFPR( 0.860866938637767279344583877),
+                new FalconFPR( 0.248927605745720168110682816), new FalconFPR( 0.968522094274417316221088329),
+                new FalconFPR(-0.968522094274417316221088329), new FalconFPR( 0.248927605745720168110682816),
+                new FalconFPR( 0.943593458161960361495301445), new FalconFPR( 0.331106305759876401737190737),
+                new FalconFPR(-0.331106305759876401737190737), new FalconFPR( 0.943593458161960361495301445),
+                new FalconFPR( 0.433093818853151968484222638), new FalconFPR( 0.901348847046022014570746093),
+                new FalconFPR(-0.901348847046022014570746093), new FalconFPR( 0.433093818853151968484222638),
+                new FalconFPR( 0.745057785441465962407907310), new FalconFPR( 0.666999922303637506650154222),
+                new FalconFPR(-0.666999922303637506650154222), new FalconFPR( 0.745057785441465962407907310),
+                new FalconFPR( 0.055195244349689939809447526), new FalconFPR( 0.998475580573294752208559038),
+                new FalconFPR(-0.998475580573294752208559038), new FalconFPR( 0.055195244349689939809447526),
+                new FalconFPR( 0.995767414467659793982495643), new FalconFPR( 0.091908956497132728624990979),
+                new FalconFPR(-0.091908956497132728624990979), new FalconFPR( 0.995767414467659793982495643),
+                new FalconFPR( 0.639124444863775743801488193), new FalconFPR( 0.769103337645579639346626069),
+                new FalconFPR(-0.769103337645579639346626069), new FalconFPR( 0.639124444863775743801488193),
+                new FalconFPR( 0.884797098430937780104007041), new FalconFPR( 0.465976495767966177902756065),
+                new FalconFPR(-0.465976495767966177902756065), new FalconFPR( 0.884797098430937780104007041),
+                new FalconFPR( 0.296150888243623824121786128), new FalconFPR( 0.955141168305770721498157712),
+                new FalconFPR(-0.955141168305770721498157712), new FalconFPR( 0.296150888243623824121786128),
+                new FalconFPR( 0.958703474895871555374645792), new FalconFPR( 0.284407537211271843618310615),
+                new FalconFPR(-0.284407537211271843618310615), new FalconFPR( 0.958703474895871555374645792),
+                new FalconFPR( 0.476799230063322133342158117), new FalconFPR( 0.879012226428633477831323711),
+                new FalconFPR(-0.879012226428633477831323711), new FalconFPR( 0.476799230063322133342158117),
+                new FalconFPR( 0.776888465673232450040827983), new FalconFPR( 0.629638238914927025372981341),
+                new FalconFPR(-0.629638238914927025372981341), new FalconFPR( 0.776888465673232450040827983),
+                new FalconFPR( 0.104121633872054579120943880), new FalconFPR( 0.994564570734255452119106243),
+                new FalconFPR(-0.994564570734255452119106243), new FalconFPR( 0.104121633872054579120943880),
+                new FalconFPR( 0.981963869109555264072848154), new FalconFPR( 0.189068664149806212754997837),
+                new FalconFPR(-0.189068664149806212754997837), new FalconFPR( 0.981963869109555264072848154),
+                new FalconFPR( 0.560661576197336023839710223), new FalconFPR( 0.828045045257755752067527592),
+                new FalconFPR(-0.828045045257755752067527592), new FalconFPR( 0.560661576197336023839710223),
+                new FalconFPR( 0.834862874986380056304401383), new FalconFPR( 0.550457972936604802977289893),
+                new FalconFPR(-0.550457972936604802977289893), new FalconFPR( 0.834862874986380056304401383),
+                new FalconFPR( 0.201104634842091911558443546), new FalconFPR( 0.979569765685440534439326110),
+                new FalconFPR(-0.979569765685440534439326110), new FalconFPR( 0.201104634842091911558443546),
+                new FalconFPR( 0.926210242138311341974793388), new FalconFPR( 0.377007410216418256726567823),
+                new FalconFPR(-0.377007410216418256726567823), new FalconFPR( 0.926210242138311341974793388),
+                new FalconFPR( 0.388345046698826291624993541), new FalconFPR( 0.921514039342041943465396332),
+                new FalconFPR(-0.921514039342041943465396332), new FalconFPR( 0.388345046698826291624993541),
+                new FalconFPR( 0.711432195745216441522130290), new FalconFPR( 0.702754744457225302452914421),
+                new FalconFPR(-0.702754744457225302452914421), new FalconFPR( 0.711432195745216441522130290),
+                new FalconFPR( 0.006135884649154475359640235), new FalconFPR( 0.999981175282601142656990438),
+                new FalconFPR(-0.999981175282601142656990438), new FalconFPR( 0.006135884649154475359640235),
+                new FalconFPR( 0.999995293809576171511580126), new FalconFPR( 0.003067956762965976270145365),
+                new FalconFPR(-0.003067956762965976270145365), new FalconFPR( 0.999995293809576171511580126),
+                new FalconFPR( 0.704934080375904908852523758), new FalconFPR( 0.709272826438865651316533772),
+                new FalconFPR(-0.709272826438865651316533772), new FalconFPR( 0.704934080375904908852523758),
+                new FalconFPR( 0.922701128333878570437264227), new FalconFPR( 0.385516053843918864075607949),
+                new FalconFPR(-0.385516053843918864075607949), new FalconFPR( 0.922701128333878570437264227),
+                new FalconFPR( 0.379847208924051170576281147), new FalconFPR( 0.925049240782677590302371869),
+                new FalconFPR(-0.925049240782677590302371869), new FalconFPR( 0.379847208924051170576281147),
+                new FalconFPR( 0.980182135968117392690210009), new FalconFPR( 0.198098410717953586179324918),
+                new FalconFPR(-0.198098410717953586179324918), new FalconFPR( 0.980182135968117392690210009),
+                new FalconFPR( 0.553016705580027531764226988), new FalconFPR( 0.833170164701913186439915922),
+                new FalconFPR(-0.833170164701913186439915922), new FalconFPR( 0.553016705580027531764226988),
+                new FalconFPR( 0.829761233794523042469023765), new FalconFPR( 0.558118531220556115693702964),
+                new FalconFPR(-0.558118531220556115693702964), new FalconFPR( 0.829761233794523042469023765),
+                new FalconFPR( 0.192080397049892441679288205), new FalconFPR( 0.981379193313754574318224190),
+                new FalconFPR(-0.981379193313754574318224190), new FalconFPR( 0.192080397049892441679288205),
+                new FalconFPR( 0.994879330794805620591166107), new FalconFPR( 0.101069862754827824987887585),
+                new FalconFPR(-0.101069862754827824987887585), new FalconFPR( 0.994879330794805620591166107),
+                new FalconFPR( 0.632018735939809021909403706), new FalconFPR( 0.774953106594873878359129282),
+                new FalconFPR(-0.774953106594873878359129282), new FalconFPR( 0.632018735939809021909403706),
+                new FalconFPR( 0.880470889052160770806542929), new FalconFPR( 0.474100214650550014398580015),
+                new FalconFPR(-0.474100214650550014398580015), new FalconFPR( 0.880470889052160770806542929),
+                new FalconFPR( 0.287347459544729526477331841), new FalconFPR( 0.957826413027532890321037029),
+                new FalconFPR(-0.957826413027532890321037029), new FalconFPR( 0.287347459544729526477331841),
+                new FalconFPR( 0.956045251349996443270479823), new FalconFPR( 0.293219162694258650606608599),
+                new FalconFPR(-0.293219162694258650606608599), new FalconFPR( 0.956045251349996443270479823),
+                new FalconFPR( 0.468688822035827933697617870), new FalconFPR( 0.883363338665731594736308015),
+                new FalconFPR(-0.883363338665731594736308015), new FalconFPR( 0.468688822035827933697617870),
+                new FalconFPR( 0.771060524261813773200605759), new FalconFPR( 0.636761861236284230413943435),
+                new FalconFPR(-0.636761861236284230413943435), new FalconFPR( 0.771060524261813773200605759),
+                new FalconFPR( 0.094963495329638998938034312), new FalconFPR( 0.995480755491926941769171600),
+                new FalconFPR(-0.995480755491926941769171600), new FalconFPR( 0.094963495329638998938034312),
+                new FalconFPR( 0.998640218180265222418199049), new FalconFPR( 0.052131704680283321236358216),
+                new FalconFPR(-0.052131704680283321236358216), new FalconFPR( 0.998640218180265222418199049),
+                new FalconFPR( 0.669282588346636065720696366), new FalconFPR( 0.743007952135121693517362293),
+                new FalconFPR(-0.743007952135121693517362293), new FalconFPR( 0.669282588346636065720696366),
+                new FalconFPR( 0.902673318237258806751502391), new FalconFPR( 0.430326481340082633908199031),
+                new FalconFPR(-0.430326481340082633908199031), new FalconFPR( 0.902673318237258806751502391),
+                new FalconFPR( 0.333999651442009404650865481), new FalconFPR( 0.942573197601446879280758735),
+                new FalconFPR(-0.942573197601446879280758735), new FalconFPR( 0.333999651442009404650865481),
+                new FalconFPR( 0.969281235356548486048290738), new FalconFPR( 0.245955050335794611599924709),
+                new FalconFPR(-0.245955050335794611599924709), new FalconFPR( 0.969281235356548486048290738),
+                new FalconFPR( 0.511468850437970399504391001), new FalconFPR( 0.859301818357008404783582139),
+                new FalconFPR(-0.859301818357008404783582139), new FalconFPR( 0.511468850437970399504391001),
+                new FalconFPR( 0.801376171723140219430247777), new FalconFPR( 0.598160706996342311724958652),
+                new FalconFPR(-0.598160706996342311724958652), new FalconFPR( 0.801376171723140219430247777),
+                new FalconFPR( 0.143695033150294454819773349), new FalconFPR( 0.989622017463200834623694454),
+                new FalconFPR(-0.989622017463200834623694454), new FalconFPR( 0.143695033150294454819773349),
+                new FalconFPR( 0.988721691960323767604516485), new FalconFPR( 0.149764534677321517229695737),
+                new FalconFPR(-0.149764534677321517229695737), new FalconFPR( 0.988721691960323767604516485),
+                new FalconFPR( 0.593232295039799808047809426), new FalconFPR( 0.805031331142963597922659282),
+                new FalconFPR(-0.805031331142963597922659282), new FalconFPR( 0.593232295039799808047809426),
+                new FalconFPR( 0.856147328375194481019630732), new FalconFPR( 0.516731799017649881508753876),
+                new FalconFPR(-0.516731799017649881508753876), new FalconFPR( 0.856147328375194481019630732),
+                new FalconFPR( 0.240003022448741486568922365), new FalconFPR( 0.970772140728950302138169611),
+                new FalconFPR(-0.970772140728950302138169611), new FalconFPR( 0.240003022448741486568922365),
+                new FalconFPR( 0.940506070593268323787291309), new FalconFPR( 0.339776884406826857828825803),
+                new FalconFPR(-0.339776884406826857828825803), new FalconFPR( 0.940506070593268323787291309),
+                new FalconFPR( 0.424779681209108833357226189), new FalconFPR( 0.905296759318118774354048329),
+                new FalconFPR(-0.905296759318118774354048329), new FalconFPR( 0.424779681209108833357226189),
+                new FalconFPR( 0.738887324460615147933116508), new FalconFPR( 0.673829000378756060917568372),
+                new FalconFPR(-0.673829000378756060917568372), new FalconFPR( 0.738887324460615147933116508),
+                new FalconFPR( 0.046003182130914628814301788), new FalconFPR( 0.998941293186856850633930266),
+                new FalconFPR(-0.998941293186856850633930266), new FalconFPR( 0.046003182130914628814301788),
+                new FalconFPR( 0.999618822495178597116830637), new FalconFPR( 0.027608145778965741612354872),
+                new FalconFPR(-0.027608145778965741612354872), new FalconFPR( 0.999618822495178597116830637),
+                new FalconFPR( 0.687315340891759108199186948), new FalconFPR( 0.726359155084345976817494315),
+                new FalconFPR(-0.726359155084345976817494315), new FalconFPR( 0.687315340891759108199186948),
+                new FalconFPR( 0.912962190428398164628018233), new FalconFPR( 0.408044162864978680820747499),
+                new FalconFPR(-0.408044162864978680820747499), new FalconFPR( 0.912962190428398164628018233),
+                new FalconFPR( 0.357030961233430032614954036), new FalconFPR( 0.934092550404258914729877883),
+                new FalconFPR(-0.934092550404258914729877883), new FalconFPR( 0.357030961233430032614954036),
+                new FalconFPR( 0.975025345066994146844913468), new FalconFPR( 0.222093620973203534094094721),
+                new FalconFPR(-0.222093620973203534094094721), new FalconFPR( 0.975025345066994146844913468),
+                new FalconFPR( 0.532403127877197971442805218), new FalconFPR( 0.846490938774052078300544488),
+                new FalconFPR(-0.846490938774052078300544488), new FalconFPR( 0.532403127877197971442805218),
+                new FalconFPR( 0.815814410806733789010772660), new FalconFPR( 0.578313796411655563342245019),
+                new FalconFPR(-0.578313796411655563342245019), new FalconFPR( 0.815814410806733789010772660),
+                new FalconFPR( 0.167938294974731178054745536), new FalconFPR( 0.985797509167567424700995000),
+                new FalconFPR(-0.985797509167567424700995000), new FalconFPR( 0.167938294974731178054745536),
+                new FalconFPR( 0.992099313142191757112085445), new FalconFPR( 0.125454983411546238542336453),
+                new FalconFPR(-0.125454983411546238542336453), new FalconFPR( 0.992099313142191757112085445),
+                new FalconFPR( 0.612810082429409703935211936), new FalconFPR( 0.790230221437310055030217152),
+                new FalconFPR(-0.790230221437310055030217152), new FalconFPR( 0.612810082429409703935211936),
+                new FalconFPR( 0.868570705971340895340449876), new FalconFPR( 0.495565261825772531150266670),
+                new FalconFPR(-0.495565261825772531150266670), new FalconFPR( 0.868570705971340895340449876),
+                new FalconFPR( 0.263754678974831383611349322), new FalconFPR( 0.964589793289812723836432159),
+                new FalconFPR(-0.964589793289812723836432159), new FalconFPR( 0.263754678974831383611349322),
+                new FalconFPR( 0.948561349915730288158494826), new FalconFPR( 0.316593375556165867243047035),
+                new FalconFPR(-0.316593375556165867243047035), new FalconFPR( 0.948561349915730288158494826),
+                new FalconFPR( 0.446868840162374195353044389), new FalconFPR( 0.894599485631382678433072126),
+                new FalconFPR(-0.894599485631382678433072126), new FalconFPR( 0.446868840162374195353044389),
+                new FalconFPR( 0.755201376896536527598710756), new FalconFPR( 0.655492852999615385312679701),
+                new FalconFPR(-0.655492852999615385312679701), new FalconFPR( 0.755201376896536527598710756),
+                new FalconFPR( 0.070504573389613863027351471), new FalconFPR( 0.997511456140303459699448390),
+                new FalconFPR(-0.997511456140303459699448390), new FalconFPR( 0.070504573389613863027351471),
+                new FalconFPR( 0.997060070339482978987989949), new FalconFPR( 0.076623861392031492278332463),
+                new FalconFPR(-0.076623861392031492278332463), new FalconFPR( 0.997060070339482978987989949),
+                new FalconFPR( 0.650846684996380915068975573), new FalconFPR( 0.759209188978388033485525443),
+                new FalconFPR(-0.759209188978388033485525443), new FalconFPR( 0.650846684996380915068975573),
+                new FalconFPR( 0.891840709392342727796478697), new FalconFPR( 0.452349587233770874133026703),
+                new FalconFPR(-0.452349587233770874133026703), new FalconFPR( 0.891840709392342727796478697),
+                new FalconFPR( 0.310767152749611495835997250), new FalconFPR( 0.950486073949481721759926101),
+                new FalconFPR(-0.950486073949481721759926101), new FalconFPR( 0.310767152749611495835997250),
+                new FalconFPR( 0.962953266873683886347921481), new FalconFPR( 0.269668325572915106525464462),
+                new FalconFPR(-0.269668325572915106525464462), new FalconFPR( 0.962953266873683886347921481),
+                new FalconFPR( 0.490226483288291154229598449), new FalconFPR( 0.871595086655951034842481435),
+                new FalconFPR(-0.871595086655951034842481435), new FalconFPR( 0.490226483288291154229598449),
+                new FalconFPR( 0.786455213599085757522319464), new FalconFPR( 0.617647307937803932403979402),
+                new FalconFPR(-0.617647307937803932403979402), new FalconFPR( 0.786455213599085757522319464),
+                new FalconFPR( 0.119365214810991364593637790), new FalconFPR( 0.992850414459865090793563344),
+                new FalconFPR(-0.992850414459865090793563344), new FalconFPR( 0.119365214810991364593637790),
+                new FalconFPR( 0.984748501801904218556553176), new FalconFPR( 0.173983873387463827950700807),
+                new FalconFPR(-0.173983873387463827950700807), new FalconFPR( 0.984748501801904218556553176),
+                new FalconFPR( 0.573297166698042212820171239), new FalconFPR( 0.819347520076796960824689637),
+                new FalconFPR(-0.819347520076796960824689637), new FalconFPR( 0.573297166698042212820171239),
+                new FalconFPR( 0.843208239641845437161743865), new FalconFPR( 0.537587076295645482502214932),
+                new FalconFPR(-0.537587076295645482502214932), new FalconFPR( 0.843208239641845437161743865),
+                new FalconFPR( 0.216106797076219509948385131), new FalconFPR( 0.976369731330021149312732194),
+                new FalconFPR(-0.976369731330021149312732194), new FalconFPR( 0.216106797076219509948385131),
+                new FalconFPR( 0.931884265581668106718557199), new FalconFPR( 0.362755724367397216204854462),
+                new FalconFPR(-0.362755724367397216204854462), new FalconFPR( 0.931884265581668106718557199),
+                new FalconFPR( 0.402434650859418441082533934), new FalconFPR( 0.915448716088267819566431292),
+                new FalconFPR(-0.915448716088267819566431292), new FalconFPR( 0.402434650859418441082533934),
+                new FalconFPR( 0.722128193929215321243607198), new FalconFPR( 0.691759258364157774906734132),
+                new FalconFPR(-0.691759258364157774906734132), new FalconFPR( 0.722128193929215321243607198),
+                new FalconFPR( 0.021474080275469507418374898), new FalconFPR( 0.999769405351215321657617036),
+                new FalconFPR(-0.999769405351215321657617036), new FalconFPR( 0.021474080275469507418374898),
+                new FalconFPR( 0.999882347454212525633049627), new FalconFPR( 0.015339206284988101044151868),
+                new FalconFPR(-0.015339206284988101044151868), new FalconFPR( 0.999882347454212525633049627),
+                new FalconFPR( 0.696177131491462944788582591), new FalconFPR( 0.717870045055731736211325329),
+                new FalconFPR(-0.717870045055731736211325329), new FalconFPR( 0.696177131491462944788582591),
+                new FalconFPR( 0.917900775621390457642276297), new FalconFPR( 0.396809987416710328595290911),
+                new FalconFPR(-0.396809987416710328595290911), new FalconFPR( 0.917900775621390457642276297),
+                new FalconFPR( 0.368466829953372331712746222), new FalconFPR( 0.929640895843181265457918066),
+                new FalconFPR(-0.929640895843181265457918066), new FalconFPR( 0.368466829953372331712746222),
+                new FalconFPR( 0.977677357824509979943404762), new FalconFPR( 0.210111836880469621717489972),
+                new FalconFPR(-0.210111836880469621717489972), new FalconFPR( 0.977677357824509979943404762),
+                new FalconFPR( 0.542750784864515906586768661), new FalconFPR( 0.839893794195999504583383987),
+                new FalconFPR(-0.839893794195999504583383987), new FalconFPR( 0.542750784864515906586768661),
+                new FalconFPR( 0.822849781375826332046780034), new FalconFPR( 0.568258952670131549790548489),
+                new FalconFPR(-0.568258952670131549790548489), new FalconFPR( 0.822849781375826332046780034),
+                new FalconFPR( 0.180022901405699522679906590), new FalconFPR( 0.983662419211730274396237776),
+                new FalconFPR(-0.983662419211730274396237776), new FalconFPR( 0.180022901405699522679906590),
+                new FalconFPR( 0.993564135520595333782021697), new FalconFPR( 0.113270952177564349018228733),
+                new FalconFPR(-0.113270952177564349018228733), new FalconFPR( 0.993564135520595333782021697),
+                new FalconFPR( 0.622461279374149972519166721), new FalconFPR( 0.782650596166575738458949301),
+                new FalconFPR(-0.782650596166575738458949301), new FalconFPR( 0.622461279374149972519166721),
+                new FalconFPR( 0.874586652278176112634431897), new FalconFPR( 0.484869248000791101822951699),
+                new FalconFPR(-0.484869248000791101822951699), new FalconFPR( 0.874586652278176112634431897),
+                new FalconFPR( 0.275571819310958163076425168), new FalconFPR( 0.961280485811320641748659653),
+                new FalconFPR(-0.961280485811320641748659653), new FalconFPR( 0.275571819310958163076425168),
+                new FalconFPR( 0.952375012719765858529893608), new FalconFPR( 0.304929229735402406490728633),
+                new FalconFPR(-0.304929229735402406490728633), new FalconFPR( 0.952375012719765858529893608),
+                new FalconFPR( 0.457813303598877221904961155), new FalconFPR( 0.889048355854664562540777729),
+                new FalconFPR(-0.889048355854664562540777729), new FalconFPR( 0.457813303598877221904961155),
+                new FalconFPR( 0.763188417263381271704838297), new FalconFPR( 0.646176012983316364832802220),
+                new FalconFPR(-0.646176012983316364832802220), new FalconFPR( 0.763188417263381271704838297),
+                new FalconFPR( 0.082740264549375693111987083), new FalconFPR( 0.996571145790554847093566910),
+                new FalconFPR(-0.996571145790554847093566910), new FalconFPR( 0.082740264549375693111987083),
+                new FalconFPR( 0.997925286198596012623025462), new FalconFPR( 0.064382630929857460819324537),
+                new FalconFPR(-0.064382630929857460819324537), new FalconFPR( 0.997925286198596012623025462),
+                new FalconFPR( 0.660114342067420478559490747), new FalconFPR( 0.751165131909686411205819422),
+                new FalconFPR(-0.751165131909686411205819422), new FalconFPR( 0.660114342067420478559490747),
+                new FalconFPR( 0.897324580705418281231391836), new FalconFPR( 0.441371268731716692879988968),
+                new FalconFPR(-0.441371268731716692879988968), new FalconFPR( 0.897324580705418281231391836),
+                new FalconFPR( 0.322407678801069848384807478), new FalconFPR( 0.946600913083283570044599823),
+                new FalconFPR(-0.946600913083283570044599823), new FalconFPR( 0.322407678801069848384807478),
+                new FalconFPR( 0.966190003445412555433832961), new FalconFPR( 0.257831102162159005614471295),
+                new FalconFPR(-0.257831102162159005614471295), new FalconFPR( 0.966190003445412555433832961),
+                new FalconFPR( 0.500885382611240786241285004), new FalconFPR( 0.865513624090569082825488358),
+                new FalconFPR(-0.865513624090569082825488358), new FalconFPR( 0.500885382611240786241285004),
+                new FalconFPR( 0.793975477554337164895083757), new FalconFPR( 0.607949784967773667243642671),
+                new FalconFPR(-0.607949784967773667243642671), new FalconFPR( 0.793975477554337164895083757),
+                new FalconFPR( 0.131540028702883111103387493), new FalconFPR( 0.991310859846115418957349799),
+                new FalconFPR(-0.991310859846115418957349799), new FalconFPR( 0.131540028702883111103387493),
+                new FalconFPR( 0.986809401814185476970235952), new FalconFPR( 0.161886393780111837641387995),
+                new FalconFPR(-0.161886393780111837641387995), new FalconFPR( 0.986809401814185476970235952),
+                new FalconFPR( 0.583308652937698294392830961), new FalconFPR( 0.812250586585203913049744181),
+                new FalconFPR(-0.812250586585203913049744181), new FalconFPR( 0.583308652937698294392830961),
+                new FalconFPR( 0.849741768000852489471268395), new FalconFPR( 0.527199134781901348464274575),
+                new FalconFPR(-0.527199134781901348464274575), new FalconFPR( 0.849741768000852489471268395),
+                new FalconFPR( 0.228072083170885739254457379), new FalconFPR( 0.973644249650811925318383912),
+                new FalconFPR(-0.973644249650811925318383912), new FalconFPR( 0.228072083170885739254457379),
+                new FalconFPR( 0.936265667170278246576310996), new FalconFPR( 0.351292756085567125601307623),
+                new FalconFPR(-0.351292756085567125601307623), new FalconFPR( 0.936265667170278246576310996),
+                new FalconFPR( 0.413638312238434547471944324), new FalconFPR( 0.910441292258067196934095369),
+                new FalconFPR(-0.910441292258067196934095369), new FalconFPR( 0.413638312238434547471944324),
+                new FalconFPR( 0.730562769227827561177758850), new FalconFPR( 0.682845546385248068164596123),
+                new FalconFPR(-0.682845546385248068164596123), new FalconFPR( 0.730562769227827561177758850),
+                new FalconFPR( 0.033741171851377584833716112), new FalconFPR( 0.999430604555461772019008327),
+                new FalconFPR(-0.999430604555461772019008327), new FalconFPR( 0.033741171851377584833716112),
+                new FalconFPR( 0.999204758618363895492950001), new FalconFPR( 0.039872927587739811128578738),
+                new FalconFPR(-0.039872927587739811128578738), new FalconFPR( 0.999204758618363895492950001),
+                new FalconFPR( 0.678350043129861486873655042), new FalconFPR( 0.734738878095963464563223604),
+                new FalconFPR(-0.734738878095963464563223604), new FalconFPR( 0.678350043129861486873655042),
+                new FalconFPR( 0.907886116487666212038681480), new FalconFPR( 0.419216888363223956433010020),
+                new FalconFPR(-0.419216888363223956433010020), new FalconFPR( 0.907886116487666212038681480),
+                new FalconFPR( 0.345541324963989065539191723), new FalconFPR( 0.938403534063108112192420774),
+                new FalconFPR(-0.938403534063108112192420774), new FalconFPR( 0.345541324963989065539191723),
+                new FalconFPR( 0.972226497078936305708321144), new FalconFPR( 0.234041958583543423191242045),
+                new FalconFPR(-0.234041958583543423191242045), new FalconFPR( 0.972226497078936305708321144),
+                new FalconFPR( 0.521975292937154342694258318), new FalconFPR( 0.852960604930363657746588082),
+                new FalconFPR(-0.852960604930363657746588082), new FalconFPR( 0.521975292937154342694258318),
+                new FalconFPR( 0.808656181588174991946968128), new FalconFPR( 0.588281548222645304786439813),
+                new FalconFPR(-0.588281548222645304786439813), new FalconFPR( 0.808656181588174991946968128),
+                new FalconFPR( 0.155828397654265235743101486), new FalconFPR( 0.987784141644572154230969032),
+                new FalconFPR(-0.987784141644572154230969032), new FalconFPR( 0.155828397654265235743101486),
+                new FalconFPR( 0.990485084256457037998682243), new FalconFPR( 0.137620121586486044948441663),
+                new FalconFPR(-0.137620121586486044948441663), new FalconFPR( 0.990485084256457037998682243),
+                new FalconFPR( 0.603066598540348201693430617), new FalconFPR( 0.797690840943391108362662755),
+                new FalconFPR(-0.797690840943391108362662755), new FalconFPR( 0.603066598540348201693430617),
+                new FalconFPR( 0.862423956111040538690933878), new FalconFPR( 0.506186645345155291048942344),
+                new FalconFPR(-0.506186645345155291048942344), new FalconFPR( 0.862423956111040538690933878),
+                new FalconFPR( 0.251897818154216950498106628), new FalconFPR( 0.967753837093475465243391912),
+                new FalconFPR(-0.967753837093475465243391912), new FalconFPR( 0.251897818154216950498106628),
+                new FalconFPR( 0.944604837261480265659265493), new FalconFPR( 0.328209843579092526107916817),
+                new FalconFPR(-0.328209843579092526107916817), new FalconFPR( 0.944604837261480265659265493),
+                new FalconFPR( 0.435857079922255491032544080), new FalconFPR( 0.900015892016160228714535267),
+                new FalconFPR(-0.900015892016160228714535267), new FalconFPR( 0.435857079922255491032544080),
+                new FalconFPR( 0.747100605980180144323078847), new FalconFPR( 0.664710978203344868130324985),
+                new FalconFPR(-0.664710978203344868130324985), new FalconFPR( 0.747100605980180144323078847),
+                new FalconFPR( 0.058258264500435759613979782), new FalconFPR( 0.998301544933892840738782163),
+                new FalconFPR(-0.998301544933892840738782163), new FalconFPR( 0.058258264500435759613979782),
+                new FalconFPR( 0.996044700901251989887944810), new FalconFPR( 0.088853552582524596561586535),
+                new FalconFPR(-0.088853552582524596561586535), new FalconFPR( 0.996044700901251989887944810),
+                new FalconFPR( 0.641481012808583151988739898), new FalconFPR( 0.767138911935820381181694573),
+                new FalconFPR(-0.767138911935820381181694573), new FalconFPR( 0.641481012808583151988739898),
+                new FalconFPR( 0.886222530148880631647990821), new FalconFPR( 0.463259783551860197390719637),
+                new FalconFPR(-0.463259783551860197390719637), new FalconFPR( 0.886222530148880631647990821),
+                new FalconFPR( 0.299079826308040476750336973), new FalconFPR( 0.954228095109105629780430732),
+                new FalconFPR(-0.954228095109105629780430732), new FalconFPR( 0.299079826308040476750336973),
+                new FalconFPR( 0.959571513081984528335528181), new FalconFPR( 0.281464937925757984095231007),
+                new FalconFPR(-0.281464937925757984095231007), new FalconFPR( 0.959571513081984528335528181),
+                new FalconFPR( 0.479493757660153026679839798), new FalconFPR( 0.877545290207261291668470750),
+                new FalconFPR(-0.877545290207261291668470750), new FalconFPR( 0.479493757660153026679839798),
+                new FalconFPR( 0.778816512381475953374724325), new FalconFPR( 0.627251815495144113509622565),
+                new FalconFPR(-0.627251815495144113509622565), new FalconFPR( 0.778816512381475953374724325),
+                new FalconFPR( 0.107172424956808849175529148), new FalconFPR( 0.994240449453187946358413442),
+                new FalconFPR(-0.994240449453187946358413442), new FalconFPR( 0.107172424956808849175529148),
+                new FalconFPR( 0.982539302287441255907040396), new FalconFPR( 0.186055151663446648105438304),
+                new FalconFPR(-0.186055151663446648105438304), new FalconFPR( 0.982539302287441255907040396),
+                new FalconFPR( 0.563199344013834115007363772), new FalconFPR( 0.826321062845663480311195452),
+                new FalconFPR(-0.826321062845663480311195452), new FalconFPR( 0.563199344013834115007363772),
+                new FalconFPR( 0.836547727223511984524285790), new FalconFPR( 0.547894059173100165608820571),
+                new FalconFPR(-0.547894059173100165608820571), new FalconFPR( 0.836547727223511984524285790),
+                new FalconFPR( 0.204108966092816874181696950), new FalconFPR( 0.978948175319062194715480124),
+                new FalconFPR(-0.978948175319062194715480124), new FalconFPR( 0.204108966092816874181696950),
+                new FalconFPR( 0.927362525650401087274536959), new FalconFPR( 0.374164062971457997104393020),
+                new FalconFPR(-0.374164062971457997104393020), new FalconFPR( 0.927362525650401087274536959),
+                new FalconFPR( 0.391170384302253888687512949), new FalconFPR( 0.920318276709110566440076541),
+                new FalconFPR(-0.920318276709110566440076541), new FalconFPR( 0.391170384302253888687512949),
+                new FalconFPR( 0.713584868780793592903125099), new FalconFPR( 0.700568793943248366792866380),
+                new FalconFPR(-0.700568793943248366792866380), new FalconFPR( 0.713584868780793592903125099),
+                new FalconFPR( 0.009203754782059819315102378), new FalconFPR( 0.999957644551963866333120920),
+                new FalconFPR(-0.999957644551963866333120920), new FalconFPR( 0.009203754782059819315102378),
+                new FalconFPR( 0.999957644551963866333120920), new FalconFPR( 0.009203754782059819315102378),
+                new FalconFPR(-0.009203754782059819315102378), new FalconFPR( 0.999957644551963866333120920),
+                new FalconFPR( 0.700568793943248366792866380), new FalconFPR( 0.713584868780793592903125099),
+                new FalconFPR(-0.713584868780793592903125099), new FalconFPR( 0.700568793943248366792866380),
+                new FalconFPR( 0.920318276709110566440076541), new FalconFPR( 0.391170384302253888687512949),
+                new FalconFPR(-0.391170384302253888687512949), new FalconFPR( 0.920318276709110566440076541),
+                new FalconFPR( 0.374164062971457997104393020), new FalconFPR( 0.927362525650401087274536959),
+                new FalconFPR(-0.927362525650401087274536959), new FalconFPR( 0.374164062971457997104393020),
+                new FalconFPR( 0.978948175319062194715480124), new FalconFPR( 0.204108966092816874181696950),
+                new FalconFPR(-0.204108966092816874181696950), new FalconFPR( 0.978948175319062194715480124),
+                new FalconFPR( 0.547894059173100165608820571), new FalconFPR( 0.836547727223511984524285790),
+                new FalconFPR(-0.836547727223511984524285790), new FalconFPR( 0.547894059173100165608820571),
+                new FalconFPR( 0.826321062845663480311195452), new FalconFPR( 0.563199344013834115007363772),
+                new FalconFPR(-0.563199344013834115007363772), new FalconFPR( 0.826321062845663480311195452),
+                new FalconFPR( 0.186055151663446648105438304), new FalconFPR( 0.982539302287441255907040396),
+                new FalconFPR(-0.982539302287441255907040396), new FalconFPR( 0.186055151663446648105438304),
+                new FalconFPR( 0.994240449453187946358413442), new FalconFPR( 0.107172424956808849175529148),
+                new FalconFPR(-0.107172424956808849175529148), new FalconFPR( 0.994240449453187946358413442),
+                new FalconFPR( 0.627251815495144113509622565), new FalconFPR( 0.778816512381475953374724325),
+                new FalconFPR(-0.778816512381475953374724325), new FalconFPR( 0.627251815495144113509622565),
+                new FalconFPR( 0.877545290207261291668470750), new FalconFPR( 0.479493757660153026679839798),
+                new FalconFPR(-0.479493757660153026679839798), new FalconFPR( 0.877545290207261291668470750),
+                new FalconFPR( 0.281464937925757984095231007), new FalconFPR( 0.959571513081984528335528181),
+                new FalconFPR(-0.959571513081984528335528181), new FalconFPR( 0.281464937925757984095231007),
+                new FalconFPR( 0.954228095109105629780430732), new FalconFPR( 0.299079826308040476750336973),
+                new FalconFPR(-0.299079826308040476750336973), new FalconFPR( 0.954228095109105629780430732),
+                new FalconFPR( 0.463259783551860197390719637), new FalconFPR( 0.886222530148880631647990821),
+                new FalconFPR(-0.886222530148880631647990821), new FalconFPR( 0.463259783551860197390719637),
+                new FalconFPR( 0.767138911935820381181694573), new FalconFPR( 0.641481012808583151988739898),
+                new FalconFPR(-0.641481012808583151988739898), new FalconFPR( 0.767138911935820381181694573),
+                new FalconFPR( 0.088853552582524596561586535), new FalconFPR( 0.996044700901251989887944810),
+                new FalconFPR(-0.996044700901251989887944810), new FalconFPR( 0.088853552582524596561586535),
+                new FalconFPR( 0.998301544933892840738782163), new FalconFPR( 0.058258264500435759613979782),
+                new FalconFPR(-0.058258264500435759613979782), new FalconFPR( 0.998301544933892840738782163),
+                new FalconFPR( 0.664710978203344868130324985), new FalconFPR( 0.747100605980180144323078847),
+                new FalconFPR(-0.747100605980180144323078847), new FalconFPR( 0.664710978203344868130324985),
+                new FalconFPR( 0.900015892016160228714535267), new FalconFPR( 0.435857079922255491032544080),
+                new FalconFPR(-0.435857079922255491032544080), new FalconFPR( 0.900015892016160228714535267),
+                new FalconFPR( 0.328209843579092526107916817), new FalconFPR( 0.944604837261480265659265493),
+                new FalconFPR(-0.944604837261480265659265493), new FalconFPR( 0.328209843579092526107916817),
+                new FalconFPR( 0.967753837093475465243391912), new FalconFPR( 0.251897818154216950498106628),
+                new FalconFPR(-0.251897818154216950498106628), new FalconFPR( 0.967753837093475465243391912),
+                new FalconFPR( 0.506186645345155291048942344), new FalconFPR( 0.862423956111040538690933878),
+                new FalconFPR(-0.862423956111040538690933878), new FalconFPR( 0.506186645345155291048942344),
+                new FalconFPR( 0.797690840943391108362662755), new FalconFPR( 0.603066598540348201693430617),
+                new FalconFPR(-0.603066598540348201693430617), new FalconFPR( 0.797690840943391108362662755),
+                new FalconFPR( 0.137620121586486044948441663), new FalconFPR( 0.990485084256457037998682243),
+                new FalconFPR(-0.990485084256457037998682243), new FalconFPR( 0.137620121586486044948441663),
+                new FalconFPR( 0.987784141644572154230969032), new FalconFPR( 0.155828397654265235743101486),
+                new FalconFPR(-0.155828397654265235743101486), new FalconFPR( 0.987784141644572154230969032),
+                new FalconFPR( 0.588281548222645304786439813), new FalconFPR( 0.808656181588174991946968128),
+                new FalconFPR(-0.808656181588174991946968128), new FalconFPR( 0.588281548222645304786439813),
+                new FalconFPR( 0.852960604930363657746588082), new FalconFPR( 0.521975292937154342694258318),
+                new FalconFPR(-0.521975292937154342694258318), new FalconFPR( 0.852960604930363657746588082),
+                new FalconFPR( 0.234041958583543423191242045), new FalconFPR( 0.972226497078936305708321144),
+                new FalconFPR(-0.972226497078936305708321144), new FalconFPR( 0.234041958583543423191242045),
+                new FalconFPR( 0.938403534063108112192420774), new FalconFPR( 0.345541324963989065539191723),
+                new FalconFPR(-0.345541324963989065539191723), new FalconFPR( 0.938403534063108112192420774),
+                new FalconFPR( 0.419216888363223956433010020), new FalconFPR( 0.907886116487666212038681480),
+                new FalconFPR(-0.907886116487666212038681480), new FalconFPR( 0.419216888363223956433010020),
+                new FalconFPR( 0.734738878095963464563223604), new FalconFPR( 0.678350043129861486873655042),
+                new FalconFPR(-0.678350043129861486873655042), new FalconFPR( 0.734738878095963464563223604),
+                new FalconFPR( 0.039872927587739811128578738), new FalconFPR( 0.999204758618363895492950001),
+                new FalconFPR(-0.999204758618363895492950001), new FalconFPR( 0.039872927587739811128578738),
+                new FalconFPR( 0.999430604555461772019008327), new FalconFPR( 0.033741171851377584833716112),
+                new FalconFPR(-0.033741171851377584833716112), new FalconFPR( 0.999430604555461772019008327),
+                new FalconFPR( 0.682845546385248068164596123), new FalconFPR( 0.730562769227827561177758850),
+                new FalconFPR(-0.730562769227827561177758850), new FalconFPR( 0.682845546385248068164596123),
+                new FalconFPR( 0.910441292258067196934095369), new FalconFPR( 0.413638312238434547471944324),
+                new FalconFPR(-0.413638312238434547471944324), new FalconFPR( 0.910441292258067196934095369),
+                new FalconFPR( 0.351292756085567125601307623), new FalconFPR( 0.936265667170278246576310996),
+                new FalconFPR(-0.936265667170278246576310996), new FalconFPR( 0.351292756085567125601307623),
+                new FalconFPR( 0.973644249650811925318383912), new FalconFPR( 0.228072083170885739254457379),
+                new FalconFPR(-0.228072083170885739254457379), new FalconFPR( 0.973644249650811925318383912),
+                new FalconFPR( 0.527199134781901348464274575), new FalconFPR( 0.849741768000852489471268395),
+                new FalconFPR(-0.849741768000852489471268395), new FalconFPR( 0.527199134781901348464274575),
+                new FalconFPR( 0.812250586585203913049744181), new FalconFPR( 0.583308652937698294392830961),
+                new FalconFPR(-0.583308652937698294392830961), new FalconFPR( 0.812250586585203913049744181),
+                new FalconFPR( 0.161886393780111837641387995), new FalconFPR( 0.986809401814185476970235952),
+                new FalconFPR(-0.986809401814185476970235952), new FalconFPR( 0.161886393780111837641387995),
+                new FalconFPR( 0.991310859846115418957349799), new FalconFPR( 0.131540028702883111103387493),
+                new FalconFPR(-0.131540028702883111103387493), new FalconFPR( 0.991310859846115418957349799),
+                new FalconFPR( 0.607949784967773667243642671), new FalconFPR( 0.793975477554337164895083757),
+                new FalconFPR(-0.793975477554337164895083757), new FalconFPR( 0.607949784967773667243642671),
+                new FalconFPR( 0.865513624090569082825488358), new FalconFPR( 0.500885382611240786241285004),
+                new FalconFPR(-0.500885382611240786241285004), new FalconFPR( 0.865513624090569082825488358),
+                new FalconFPR( 0.257831102162159005614471295), new FalconFPR( 0.966190003445412555433832961),
+                new FalconFPR(-0.966190003445412555433832961), new FalconFPR( 0.257831102162159005614471295),
+                new FalconFPR( 0.946600913083283570044599823), new FalconFPR( 0.322407678801069848384807478),
+                new FalconFPR(-0.322407678801069848384807478), new FalconFPR( 0.946600913083283570044599823),
+                new FalconFPR( 0.441371268731716692879988968), new FalconFPR( 0.897324580705418281231391836),
+                new FalconFPR(-0.897324580705418281231391836), new FalconFPR( 0.441371268731716692879988968),
+                new FalconFPR( 0.751165131909686411205819422), new FalconFPR( 0.660114342067420478559490747),
+                new FalconFPR(-0.660114342067420478559490747), new FalconFPR( 0.751165131909686411205819422),
+                new FalconFPR( 0.064382630929857460819324537), new FalconFPR( 0.997925286198596012623025462),
+                new FalconFPR(-0.997925286198596012623025462), new FalconFPR( 0.064382630929857460819324537),
+                new FalconFPR( 0.996571145790554847093566910), new FalconFPR( 0.082740264549375693111987083),
+                new FalconFPR(-0.082740264549375693111987083), new FalconFPR( 0.996571145790554847093566910),
+                new FalconFPR( 0.646176012983316364832802220), new FalconFPR( 0.763188417263381271704838297),
+                new FalconFPR(-0.763188417263381271704838297), new FalconFPR( 0.646176012983316364832802220),
+                new FalconFPR( 0.889048355854664562540777729), new FalconFPR( 0.457813303598877221904961155),
+                new FalconFPR(-0.457813303598877221904961155), new FalconFPR( 0.889048355854664562540777729),
+                new FalconFPR( 0.304929229735402406490728633), new FalconFPR( 0.952375012719765858529893608),
+                new FalconFPR(-0.952375012719765858529893608), new FalconFPR( 0.304929229735402406490728633),
+                new FalconFPR( 0.961280485811320641748659653), new FalconFPR( 0.275571819310958163076425168),
+                new FalconFPR(-0.275571819310958163076425168), new FalconFPR( 0.961280485811320641748659653),
+                new FalconFPR( 0.484869248000791101822951699), new FalconFPR( 0.874586652278176112634431897),
+                new FalconFPR(-0.874586652278176112634431897), new FalconFPR( 0.484869248000791101822951699),
+                new FalconFPR( 0.782650596166575738458949301), new FalconFPR( 0.622461279374149972519166721),
+                new FalconFPR(-0.622461279374149972519166721), new FalconFPR( 0.782650596166575738458949301),
+                new FalconFPR( 0.113270952177564349018228733), new FalconFPR( 0.993564135520595333782021697),
+                new FalconFPR(-0.993564135520595333782021697), new FalconFPR( 0.113270952177564349018228733),
+                new FalconFPR( 0.983662419211730274396237776), new FalconFPR( 0.180022901405699522679906590),
+                new FalconFPR(-0.180022901405699522679906590), new FalconFPR( 0.983662419211730274396237776),
+                new FalconFPR( 0.568258952670131549790548489), new FalconFPR( 0.822849781375826332046780034),
+                new FalconFPR(-0.822849781375826332046780034), new FalconFPR( 0.568258952670131549790548489),
+                new FalconFPR( 0.839893794195999504583383987), new FalconFPR( 0.542750784864515906586768661),
+                new FalconFPR(-0.542750784864515906586768661), new FalconFPR( 0.839893794195999504583383987),
+                new FalconFPR( 0.210111836880469621717489972), new FalconFPR( 0.977677357824509979943404762),
+                new FalconFPR(-0.977677357824509979943404762), new FalconFPR( 0.210111836880469621717489972),
+                new FalconFPR( 0.929640895843181265457918066), new FalconFPR( 0.368466829953372331712746222),
+                new FalconFPR(-0.368466829953372331712746222), new FalconFPR( 0.929640895843181265457918066),
+                new FalconFPR( 0.396809987416710328595290911), new FalconFPR( 0.917900775621390457642276297),
+                new FalconFPR(-0.917900775621390457642276297), new FalconFPR( 0.396809987416710328595290911),
+                new FalconFPR( 0.717870045055731736211325329), new FalconFPR( 0.696177131491462944788582591),
+                new FalconFPR(-0.696177131491462944788582591), new FalconFPR( 0.717870045055731736211325329),
+                new FalconFPR( 0.015339206284988101044151868), new FalconFPR( 0.999882347454212525633049627),
+                new FalconFPR(-0.999882347454212525633049627), new FalconFPR( 0.015339206284988101044151868),
+                new FalconFPR( 0.999769405351215321657617036), new FalconFPR( 0.021474080275469507418374898),
+                new FalconFPR(-0.021474080275469507418374898), new FalconFPR( 0.999769405351215321657617036),
+                new FalconFPR( 0.691759258364157774906734132), new FalconFPR( 0.722128193929215321243607198),
+                new FalconFPR(-0.722128193929215321243607198), new FalconFPR( 0.691759258364157774906734132),
+                new FalconFPR( 0.915448716088267819566431292), new FalconFPR( 0.402434650859418441082533934),
+                new FalconFPR(-0.402434650859418441082533934), new FalconFPR( 0.915448716088267819566431292),
+                new FalconFPR( 0.362755724367397216204854462), new FalconFPR( 0.931884265581668106718557199),
+                new FalconFPR(-0.931884265581668106718557199), new FalconFPR( 0.362755724367397216204854462),
+                new FalconFPR( 0.976369731330021149312732194), new FalconFPR( 0.216106797076219509948385131),
+                new FalconFPR(-0.216106797076219509948385131), new FalconFPR( 0.976369731330021149312732194),
+                new FalconFPR( 0.537587076295645482502214932), new FalconFPR( 0.843208239641845437161743865),
+                new FalconFPR(-0.843208239641845437161743865), new FalconFPR( 0.537587076295645482502214932),
+                new FalconFPR( 0.819347520076796960824689637), new FalconFPR( 0.573297166698042212820171239),
+                new FalconFPR(-0.573297166698042212820171239), new FalconFPR( 0.819347520076796960824689637),
+                new FalconFPR( 0.173983873387463827950700807), new FalconFPR( 0.984748501801904218556553176),
+                new FalconFPR(-0.984748501801904218556553176), new FalconFPR( 0.173983873387463827950700807),
+                new FalconFPR( 0.992850414459865090793563344), new FalconFPR( 0.119365214810991364593637790),
+                new FalconFPR(-0.119365214810991364593637790), new FalconFPR( 0.992850414459865090793563344),
+                new FalconFPR( 0.617647307937803932403979402), new FalconFPR( 0.786455213599085757522319464),
+                new FalconFPR(-0.786455213599085757522319464), new FalconFPR( 0.617647307937803932403979402),
+                new FalconFPR( 0.871595086655951034842481435), new FalconFPR( 0.490226483288291154229598449),
+                new FalconFPR(-0.490226483288291154229598449), new FalconFPR( 0.871595086655951034842481435),
+                new FalconFPR( 0.269668325572915106525464462), new FalconFPR( 0.962953266873683886347921481),
+                new FalconFPR(-0.962953266873683886347921481), new FalconFPR( 0.269668325572915106525464462),
+                new FalconFPR( 0.950486073949481721759926101), new FalconFPR( 0.310767152749611495835997250),
+                new FalconFPR(-0.310767152749611495835997250), new FalconFPR( 0.950486073949481721759926101),
+                new FalconFPR( 0.452349587233770874133026703), new FalconFPR( 0.891840709392342727796478697),
+                new FalconFPR(-0.891840709392342727796478697), new FalconFPR( 0.452349587233770874133026703),
+                new FalconFPR( 0.759209188978388033485525443), new FalconFPR( 0.650846684996380915068975573),
+                new FalconFPR(-0.650846684996380915068975573), new FalconFPR( 0.759209188978388033485525443),
+                new FalconFPR( 0.076623861392031492278332463), new FalconFPR( 0.997060070339482978987989949),
+                new FalconFPR(-0.997060070339482978987989949), new FalconFPR( 0.076623861392031492278332463),
+                new FalconFPR( 0.997511456140303459699448390), new FalconFPR( 0.070504573389613863027351471),
+                new FalconFPR(-0.070504573389613863027351471), new FalconFPR( 0.997511456140303459699448390),
+                new FalconFPR( 0.655492852999615385312679701), new FalconFPR( 0.755201376896536527598710756),
+                new FalconFPR(-0.755201376896536527598710756), new FalconFPR( 0.655492852999615385312679701),
+                new FalconFPR( 0.894599485631382678433072126), new FalconFPR( 0.446868840162374195353044389),
+                new FalconFPR(-0.446868840162374195353044389), new FalconFPR( 0.894599485631382678433072126),
+                new FalconFPR( 0.316593375556165867243047035), new FalconFPR( 0.948561349915730288158494826),
+                new FalconFPR(-0.948561349915730288158494826), new FalconFPR( 0.316593375556165867243047035),
+                new FalconFPR( 0.964589793289812723836432159), new FalconFPR( 0.263754678974831383611349322),
+                new FalconFPR(-0.263754678974831383611349322), new FalconFPR( 0.964589793289812723836432159),
+                new FalconFPR( 0.495565261825772531150266670), new FalconFPR( 0.868570705971340895340449876),
+                new FalconFPR(-0.868570705971340895340449876), new FalconFPR( 0.495565261825772531150266670),
+                new FalconFPR( 0.790230221437310055030217152), new FalconFPR( 0.612810082429409703935211936),
+                new FalconFPR(-0.612810082429409703935211936), new FalconFPR( 0.790230221437310055030217152),
+                new FalconFPR( 0.125454983411546238542336453), new FalconFPR( 0.992099313142191757112085445),
+                new FalconFPR(-0.992099313142191757112085445), new FalconFPR( 0.125454983411546238542336453),
+                new FalconFPR( 0.985797509167567424700995000), new FalconFPR( 0.167938294974731178054745536),
+                new FalconFPR(-0.167938294974731178054745536), new FalconFPR( 0.985797509167567424700995000),
+                new FalconFPR( 0.578313796411655563342245019), new FalconFPR( 0.815814410806733789010772660),
+                new FalconFPR(-0.815814410806733789010772660), new FalconFPR( 0.578313796411655563342245019),
+                new FalconFPR( 0.846490938774052078300544488), new FalconFPR( 0.532403127877197971442805218),
+                new FalconFPR(-0.532403127877197971442805218), new FalconFPR( 0.846490938774052078300544488),
+                new FalconFPR( 0.222093620973203534094094721), new FalconFPR( 0.975025345066994146844913468),
+                new FalconFPR(-0.975025345066994146844913468), new FalconFPR( 0.222093620973203534094094721),
+                new FalconFPR( 0.934092550404258914729877883), new FalconFPR( 0.357030961233430032614954036),
+                new FalconFPR(-0.357030961233430032614954036), new FalconFPR( 0.934092550404258914729877883),
+                new FalconFPR( 0.408044162864978680820747499), new FalconFPR( 0.912962190428398164628018233),
+                new FalconFPR(-0.912962190428398164628018233), new FalconFPR( 0.408044162864978680820747499),
+                new FalconFPR( 0.726359155084345976817494315), new FalconFPR( 0.687315340891759108199186948),
+                new FalconFPR(-0.687315340891759108199186948), new FalconFPR( 0.726359155084345976817494315),
+                new FalconFPR( 0.027608145778965741612354872), new FalconFPR( 0.999618822495178597116830637),
+                new FalconFPR(-0.999618822495178597116830637), new FalconFPR( 0.027608145778965741612354872),
+                new FalconFPR( 0.998941293186856850633930266), new FalconFPR( 0.046003182130914628814301788),
+                new FalconFPR(-0.046003182130914628814301788), new FalconFPR( 0.998941293186856850633930266),
+                new FalconFPR( 0.673829000378756060917568372), new FalconFPR( 0.738887324460615147933116508),
+                new FalconFPR(-0.738887324460615147933116508), new FalconFPR( 0.673829000378756060917568372),
+                new FalconFPR( 0.905296759318118774354048329), new FalconFPR( 0.424779681209108833357226189),
+                new FalconFPR(-0.424779681209108833357226189), new FalconFPR( 0.905296759318118774354048329),
+                new FalconFPR( 0.339776884406826857828825803), new FalconFPR( 0.940506070593268323787291309),
+                new FalconFPR(-0.940506070593268323787291309), new FalconFPR( 0.339776884406826857828825803),
+                new FalconFPR( 0.970772140728950302138169611), new FalconFPR( 0.240003022448741486568922365),
+                new FalconFPR(-0.240003022448741486568922365), new FalconFPR( 0.970772140728950302138169611),
+                new FalconFPR( 0.516731799017649881508753876), new FalconFPR( 0.856147328375194481019630732),
+                new FalconFPR(-0.856147328375194481019630732), new FalconFPR( 0.516731799017649881508753876),
+                new FalconFPR( 0.805031331142963597922659282), new FalconFPR( 0.593232295039799808047809426),
+                new FalconFPR(-0.593232295039799808047809426), new FalconFPR( 0.805031331142963597922659282),
+                new FalconFPR( 0.149764534677321517229695737), new FalconFPR( 0.988721691960323767604516485),
+                new FalconFPR(-0.988721691960323767604516485), new FalconFPR( 0.149764534677321517229695737),
+                new FalconFPR( 0.989622017463200834623694454), new FalconFPR( 0.143695033150294454819773349),
+                new FalconFPR(-0.143695033150294454819773349), new FalconFPR( 0.989622017463200834623694454),
+                new FalconFPR( 0.598160706996342311724958652), new FalconFPR( 0.801376171723140219430247777),
+                new FalconFPR(-0.801376171723140219430247777), new FalconFPR( 0.598160706996342311724958652),
+                new FalconFPR( 0.859301818357008404783582139), new FalconFPR( 0.511468850437970399504391001),
+                new FalconFPR(-0.511468850437970399504391001), new FalconFPR( 0.859301818357008404783582139),
+                new FalconFPR( 0.245955050335794611599924709), new FalconFPR( 0.969281235356548486048290738),
+                new FalconFPR(-0.969281235356548486048290738), new FalconFPR( 0.245955050335794611599924709),
+                new FalconFPR( 0.942573197601446879280758735), new FalconFPR( 0.333999651442009404650865481),
+                new FalconFPR(-0.333999651442009404650865481), new FalconFPR( 0.942573197601446879280758735),
+                new FalconFPR( 0.430326481340082633908199031), new FalconFPR( 0.902673318237258806751502391),
+                new FalconFPR(-0.902673318237258806751502391), new FalconFPR( 0.430326481340082633908199031),
+                new FalconFPR( 0.743007952135121693517362293), new FalconFPR( 0.669282588346636065720696366),
+                new FalconFPR(-0.669282588346636065720696366), new FalconFPR( 0.743007952135121693517362293),
+                new FalconFPR( 0.052131704680283321236358216), new FalconFPR( 0.998640218180265222418199049),
+                new FalconFPR(-0.998640218180265222418199049), new FalconFPR( 0.052131704680283321236358216),
+                new FalconFPR( 0.995480755491926941769171600), new FalconFPR( 0.094963495329638998938034312),
+                new FalconFPR(-0.094963495329638998938034312), new FalconFPR( 0.995480755491926941769171600),
+                new FalconFPR( 0.636761861236284230413943435), new FalconFPR( 0.771060524261813773200605759),
+                new FalconFPR(-0.771060524261813773200605759), new FalconFPR( 0.636761861236284230413943435),
+                new FalconFPR( 0.883363338665731594736308015), new FalconFPR( 0.468688822035827933697617870),
+                new FalconFPR(-0.468688822035827933697617870), new FalconFPR( 0.883363338665731594736308015),
+                new FalconFPR( 0.293219162694258650606608599), new FalconFPR( 0.956045251349996443270479823),
+                new FalconFPR(-0.956045251349996443270479823), new FalconFPR( 0.293219162694258650606608599),
+                new FalconFPR( 0.957826413027532890321037029), new FalconFPR( 0.287347459544729526477331841),
+                new FalconFPR(-0.287347459544729526477331841), new FalconFPR( 0.957826413027532890321037029),
+                new FalconFPR( 0.474100214650550014398580015), new FalconFPR( 0.880470889052160770806542929),
+                new FalconFPR(-0.880470889052160770806542929), new FalconFPR( 0.474100214650550014398580015),
+                new FalconFPR( 0.774953106594873878359129282), new FalconFPR( 0.632018735939809021909403706),
+                new FalconFPR(-0.632018735939809021909403706), new FalconFPR( 0.774953106594873878359129282),
+                new FalconFPR( 0.101069862754827824987887585), new FalconFPR( 0.994879330794805620591166107),
+                new FalconFPR(-0.994879330794805620591166107), new FalconFPR( 0.101069862754827824987887585),
+                new FalconFPR( 0.981379193313754574318224190), new FalconFPR( 0.192080397049892441679288205),
+                new FalconFPR(-0.192080397049892441679288205), new FalconFPR( 0.981379193313754574318224190),
+                new FalconFPR( 0.558118531220556115693702964), new FalconFPR( 0.829761233794523042469023765),
+                new FalconFPR(-0.829761233794523042469023765), new FalconFPR( 0.558118531220556115693702964),
+                new FalconFPR( 0.833170164701913186439915922), new FalconFPR( 0.553016705580027531764226988),
+                new FalconFPR(-0.553016705580027531764226988), new FalconFPR( 0.833170164701913186439915922),
+                new FalconFPR( 0.198098410717953586179324918), new FalconFPR( 0.980182135968117392690210009),
+                new FalconFPR(-0.980182135968117392690210009), new FalconFPR( 0.198098410717953586179324918),
+                new FalconFPR( 0.925049240782677590302371869), new FalconFPR( 0.379847208924051170576281147),
+                new FalconFPR(-0.379847208924051170576281147), new FalconFPR( 0.925049240782677590302371869),
+                new FalconFPR( 0.385516053843918864075607949), new FalconFPR( 0.922701128333878570437264227),
+                new FalconFPR(-0.922701128333878570437264227), new FalconFPR( 0.385516053843918864075607949),
+                new FalconFPR( 0.709272826438865651316533772), new FalconFPR( 0.704934080375904908852523758),
+                new FalconFPR(-0.704934080375904908852523758), new FalconFPR( 0.709272826438865651316533772),
+                new FalconFPR( 0.003067956762965976270145365), new FalconFPR( 0.999995293809576171511580126),
+                new FalconFPR(-0.999995293809576171511580126), new FalconFPR( 0.003067956762965976270145365)
+        };
+
+        internal FalconFPR[] fpr_p2_tab = {
+                new FalconFPR( 2.00000000000 ),
+                new FalconFPR( 1.00000000000 ),
+                new FalconFPR( 0.50000000000 ),
+                new FalconFPR( 0.25000000000 ),
+                new FalconFPR( 0.12500000000 ),
+                new FalconFPR( 0.06250000000 ),
+                new FalconFPR( 0.03125000000 ),
+                new FalconFPR( 0.01562500000 ),
+                new FalconFPR( 0.00781250000 ),
+                new FalconFPR( 0.00390625000 ),
+                new FalconFPR( 0.00195312500 )
+        };
+        internal FalconFPR fpr_log2 = new FalconFPR(0.69314718055994530941723212146);
+        internal FalconFPR fpr_inv_log2 = new FalconFPR(1.4426950408889634073599246810);
+        internal FalconFPR fpr_bnorm_max = new FalconFPR(16822.4121);
+        internal FalconFPR fpr_zero = new FalconFPR(0.0);
+        internal FalconFPR fpr_one = new FalconFPR(1.0);
+        internal FalconFPR fpr_two = new FalconFPR(2.0);
+        internal FalconFPR fpr_onehalf = new FalconFPR(0.5);
+        internal FalconFPR fpr_invsqrt2 = new FalconFPR(0.707106781186547524400844362105);
+        internal FalconFPR fpr_invsqrt8 = new FalconFPR(0.353553390593273762200422181052);
+        internal FalconFPR fpr_ptwo31 = new FalconFPR(2147483648.0);
+        internal FalconFPR fpr_ptwo31m1 = new FalconFPR(2147483647.0);
+        internal FalconFPR fpr_mtwo31m1 = new FalconFPR(-2147483647.0);
+        internal FalconFPR fpr_ptwo63m1 = new FalconFPR(9223372036854775807.0);
+        internal FalconFPR fpr_mtwo63m1 = new FalconFPR(-9223372036854775807.0);
+        internal FalconFPR fpr_ptwo63 = new FalconFPR(9223372036854775808.0);
+        internal FalconFPR fpr_q = new FalconFPR(12289.0);
+        internal FalconFPR fpr_inverse_of_q = new FalconFPR(1.0 / 12289.0);
+        internal FalconFPR fpr_inv_2sqrsigma0 = new FalconFPR(0.150865048875372721532312163019);
+        internal FalconFPR[] fpr_inv_sigma = {
+            new FalconFPR( 0.0 ), /* unused */
+            new FalconFPR( 0.0069054793295940891952143765991630516 ),
+            new FalconFPR( 0.0068102267767177975961393730687908629 ),
+            new FalconFPR( 0.0067188101910722710707826117910434131 ),
+            new FalconFPR( 0.0065883354370073665545865037227681924 ),
+            new FalconFPR( 0.0064651781207602900738053897763485516 ),
+            new FalconFPR( 0.0063486788828078995327741182928037856 ),
+            new FalconFPR( 0.0062382586529084374473367528433697537 ),
+            new FalconFPR( 0.0061334065020930261548984001431770281 ),
+            new FalconFPR( 0.0060336696681577241031668062510953022 ),
+            new FalconFPR( 0.0059386453095331159950250124336477482 )
+        };
+        internal FalconFPR[] fpr_sigma_min = {
+            new FalconFPR( 0.0 ), /* unused */
+            new FalconFPR( 1.1165085072329102588881898380334015 ),
+            new FalconFPR( 1.1321247692325272405718031785357108 ),
+            new FalconFPR( 1.1475285353733668684571123112513188 ),
+            new FalconFPR( 1.1702540788534828939713084716509250 ),
+            new FalconFPR( 1.1925466358390344011122170489094133 ),
+            new FalconFPR( 1.2144300507766139921088487776957699 ),
+            new FalconFPR( 1.2359260567719808790104525941706723 ),
+            new FalconFPR( 1.2570545284063214162779743112075080 ),
+            new FalconFPR( 1.2778336969128335860256340575729042 ),
+            new FalconFPR( 1.2982803343442918539708792538826807 )
+        };
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconCodec.cs b/crypto/src/pqc/crypto/falcon/FalconCodec.cs
new file mode 100644
index 000000000..062e006e4
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconCodec.cs
@@ -0,0 +1,576 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconCodec
+    {
+        
+        internal FalconCodec() {
+            
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+
+        internal int modq_encode(
+            byte[] outarrsrc, int outarr, int max_out_len,
+            ushort[] xsrc, int x, uint logn)
+        {
+            int n, out_len, u;
+            int buf;
+            uint acc;
+            int acc_len;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                if (xsrc[x+u] >= 12289) {
+                    return 0;
+                }
+            }
+            out_len = ((n * 14) + 7) >> 3;
+            if (outarrsrc == null) {
+                return out_len;
+            }
+            if (out_len > max_out_len) {
+                return 0;
+            }
+            buf = outarr;
+            acc = 0;
+            acc_len = 0;
+            for (u = 0; u < n; u ++) {
+                acc = (acc << 14) | xsrc[x+u];
+                acc_len += 14;
+                while (acc_len >= 8) {
+                    acc_len -= 8;
+                    outarrsrc[buf ++] = (byte)(acc >> acc_len);
+                }
+            }
+            if (acc_len > 0) {
+                outarrsrc[buf] = (byte)(acc << (8 - acc_len));
+            }
+            return out_len;
+        }
+
+        internal int modq_decode(
+            ushort[] xsrc, int x, uint logn,
+            byte[] inarrsrc, int inarr, int max_in_len)
+        {
+            int n, in_len, u;
+            int buf;
+            uint acc;
+            int acc_len;
+
+            n = (int)1 << (int)logn;
+            in_len = ((n * 14) + 7) >> 3;
+            if (in_len > max_in_len) {
+                return 0;
+            }
+            buf = inarr;
+            acc = 0;
+            acc_len = 0;
+            u = 0;
+            while (u < n) {
+                acc = (acc << 8) | (inarrsrc[buf ++]);
+                acc_len += 8;
+                if (acc_len >= 14) {
+                    uint w;
+
+                    acc_len -= 14;
+                    w = (acc >> acc_len) & 0x3FFF;
+                    if (w >= 12289) {
+                        return 0;
+                    }
+                    xsrc[x + u] = (ushort)w;
+                    u++;
+                }
+            }
+            if ((acc & (((uint)1 << acc_len) - 1)) != 0) {
+                return 0;
+            }
+            return in_len;
+        }
+
+        internal int trim_i16_encode(
+            byte[] outarrsrc, int outarr, int max_out_len,
+            short[] xsrc, int x, uint logn, uint bits)
+        {
+            int n, u, out_len;
+            int minv, maxv;
+            int buf;
+            uint acc, mask;
+            uint acc_len;
+
+            n = (int)1 << (int)logn;
+            maxv = (1 << (int)(bits - 1)) - 1;
+            minv = -maxv;
+            for (u = 0; u < n; u ++) {
+                if (xsrc[x+u] < minv || xsrc[x+u] > maxv) {
+                    return 0;
+                }
+            }
+            out_len = (int)((n * bits) + 7) >> 3;
+            if (outarrsrc == null) {
+                return out_len;
+            }
+            if (out_len > max_out_len) {
+                return 0;
+            }
+            buf = outarr;
+            acc = 0;
+            acc_len = 0;
+            mask = ((uint)1 << (int)bits) - 1;
+            for (u = 0; u < n; u ++) {
+                acc = (acc << (int)bits) | ((ushort)xsrc[x+u] & mask);
+                acc_len += bits;
+                while (acc_len >= 8) {
+                    acc_len -= 8;
+                    outarrsrc[buf ++] = (byte)(acc >> (int)acc_len);
+                }
+            }
+            if (acc_len > 0) {
+                outarrsrc[buf ++] = (byte)(acc << (int)(8 - acc_len));
+            }
+            return out_len;
+        }
+
+        internal int trim_i16_decode(
+            short[] xsrc, int x, uint logn, uint bits,
+            byte[] inarrsrc, int inarr, int max_in_len)
+        {
+            int n, in_len;
+            int buf;
+            int u;
+            uint acc, mask1, mask2;
+            uint acc_len;
+
+            n = (int)1 << (int)logn;
+            in_len = (int)((n * bits) + 7) >> 3;
+            if (in_len > max_in_len) {
+                return 0;
+            }
+            buf = inarr;
+            u = 0;
+            acc = 0;
+            acc_len = 0;
+            mask1 = ((uint)1 << (int)bits) - 1;
+            mask2 = (uint)1 << (int)(bits - 1);
+            while (u < n) {
+                acc = (acc << 8) | inarrsrc[buf ++];
+                acc_len += 8;
+                while (acc_len >= bits && u < n) {
+                    uint w;
+
+                    acc_len -= bits;
+                    w = (acc >> (int)acc_len) & mask1;
+                    w = (uint)(w | -(w & mask2));
+                    w |= (uint)(-(w & mask2));
+                    if (w == -mask2) {
+                        /*
+                        * The -2^(bits-1) value is forbidden.
+                        */
+                        return 0;
+                    }
+                    w |= (uint)(-(w & mask2));
+                    //xsrc[x + u] = (short)*(int *)&w;
+                    xsrc[x + u] = (short)(int)w;
+                    u++;
+                }
+            }
+            if ((acc & (((uint)1 << (int)acc_len) - 1)) != 0) {
+                /*
+                * Extra bits in the last byte must be zero.
+                */
+                return 0;
+            }
+            return in_len;
+        }
+
+        internal int trim_i8_encode(
+            byte[] outarrsrc, int outarr, int max_out_len,
+            sbyte[] xsrc, int x, uint logn, uint bits)
+        {
+            int n, u, out_len;
+            int minv, maxv;
+            int buf;
+            uint acc, mask;
+            uint acc_len;
+
+            n = (int)1 << (int)logn;
+            maxv = (1 << (int)(bits - 1)) - 1;
+            minv = -maxv;
+            for (u = 0; u < n; u ++) {
+                if (xsrc[x+u] < minv || xsrc[x+u] > maxv) {
+                    return 0;
+                }
+            }
+            out_len = (int)((n * bits) + 7) >> 3;
+            if (outarrsrc == null) {
+                return out_len;
+            }
+            if (out_len > max_out_len) {
+                return 0;
+            }
+            buf = outarr;
+            acc = 0;
+            acc_len = 0;
+            mask = ((uint)1 << (int)bits) - 1;
+            for (u = 0; u < n; u ++) {
+                acc = (acc << (int)bits) | ((byte)xsrc[x+u] & mask);
+                acc_len += bits;
+                while (acc_len >= 8) {
+                    acc_len -= 8;
+                    outarrsrc[buf ++] = (byte)(acc >> (int)acc_len);
+                }
+            }
+            if (acc_len > 0) {
+                outarrsrc[buf ++] = (byte)(acc << (int)(8 - acc_len));
+            }
+            return out_len;
+        }
+
+        internal int trim_i8_decode(
+            sbyte[] xsrc, int x, uint logn, uint bits,
+            byte[] inarrsrc, int inarr, int max_in_len)
+        {
+            int n, in_len;
+            int buf;
+            int u;
+            uint acc, mask1, mask2;
+            uint acc_len;
+
+            n = (int)1 << (int)logn;
+            in_len = (int)((n * bits) + 7) >> 3;
+            if (in_len > max_in_len) {
+                return 0;
+            }
+            buf = inarr;
+            u = 0;
+            acc = 0;
+            acc_len = 0;
+            mask1 = ((uint)1 << (int)bits) - 1;
+            mask2 = (uint)1 << (int)(bits - 1);
+            while (u < n) {
+                acc = (acc << 8) | inarrsrc[buf ++];
+                acc_len += 8;
+                while (acc_len >= bits && u < n) {
+                    uint w;
+
+                    acc_len -= bits;
+                    w = (acc >> (int)acc_len) & mask1;
+                    w |= (uint)(-(w & mask2));
+                    if (w == -mask2) {
+                        /*
+                        * The -2^(bits-1) value is forbidden.
+                        */
+                        return 0;
+                    }
+                    //xsrc[x + u] = (sbyte)*(int *)&w;
+                    xsrc[x + u] = (sbyte)(int)w;
+                    u++;
+                }
+            }
+            if ((acc & (((uint)1 << (int)acc_len) - 1)) != 0) {
+                /*
+                * Extra bits in the last byte must be zero.
+                */
+                return 0;
+            }
+            return in_len;
+        }
+
+        internal int comp_encode(
+            byte[] outarrsrc, int outarr, int max_out_len,
+            short[] xsrc, int x, uint logn)
+        {
+            int buf;
+            int n, u, v;
+            uint acc;
+            uint acc_len;
+
+            n = (int)1 << (int)logn;
+            buf = outarr;
+
+            /*
+            * Make sure that all values are within the -2047..+2047 range.
+            */
+            for (u = 0; u < n; u ++) {
+                if (xsrc[x+u] < -2047 || xsrc[x+u] > +2047) {
+                    return 0;
+                }
+            }
+
+            acc = 0;
+            acc_len = 0;
+            v = 0;
+            for (u = 0; u < n; u ++) {
+                int t;
+                uint w;
+
+                /*
+                * Get sign and absolute value of next integer; push the
+                * sign bit.
+                */
+                acc <<= 1;
+                t = xsrc[x+u];
+                if (t < 0) {
+                    t = -t;
+                    acc |= 1;
+                }
+                w = (uint)t;
+
+                /*
+                * Push the low 7 bits of the absolute value.
+                */
+                acc <<= 7;
+                acc |= w & 127u;
+                w >>= 7;
+
+                /*
+                * We pushed exactly 8 bits.
+                */
+                acc_len += 8;
+
+                /*
+                * Push as many zeros as necessary, then a one. Since the
+                * absolute value is at most 2047, w can only range up to
+                * 15 at this point, thus we will add at most 16 bits
+                * here. With the 8 bits above and possibly up to 7 bits
+                * from previous iterations, we may go up to 31 bits, which
+                * will fit in the accumulator, which is an uint.
+                */
+                acc <<= (int)(w + 1);
+                acc |= 1;
+                acc_len += w + 1;
+
+                /*
+                * Produce all full bytes.
+                */
+                while (acc_len >= 8) {
+                    acc_len -= 8;
+                    if (outarrsrc != null) {
+                        if (v >= max_out_len) {
+                            return 0;
+                        }
+                        outarrsrc[buf+v] = (byte)(acc >> (int)acc_len);
+                    }
+                    v ++;
+                }
+            }
+
+            /*
+            * Flush remaining bits (if any).
+            */
+            if (acc_len > 0) {
+                if (outarrsrc != null) {
+                    if (v >= max_out_len) {
+                        return 0;
+                    }
+                    outarrsrc[buf+v] = (byte)(acc << (int)(8 - acc_len));
+                }
+                v ++;
+            }
+
+            return v;
+        }
+
+        internal int comp_decode(
+            short[] xsrc, int x, uint logn,
+            byte[] inarrsrc, int inarr, int max_in_len)
+        {
+            int buf;
+            int n, u, v;
+            uint acc;
+            uint acc_len;
+
+            n = (int)1 << (int)logn;
+            buf = inarr;
+            acc = 0;
+            acc_len = 0;
+            v = 0;
+            for (u = 0; u < n; u ++) {
+                uint b, s, m;
+
+                /*
+                * Get next eight bits: sign and low seven bits of the
+                * absolute value.
+                */
+                if (v >= max_in_len) {
+                    return 0;
+                }
+                acc = (acc << 8) | (uint)inarrsrc[buf + v];
+                v++;
+                b = acc >> (int)acc_len;
+                s = b & 128;
+                m = b & 127;
+
+                /*
+                * Get next bits until a 1 is reached.
+                */
+                for (;;) {
+                    if (acc_len == 0) {
+                        if (v >= max_in_len) {
+                            return 0;
+                        }
+                        acc = (acc << 8) | (uint)inarrsrc[buf + v];
+                        v++;
+                        acc_len = 8;
+                    }
+                    acc_len --;
+                    if (((acc >> (int)acc_len) & 1) != 0) {
+                        break;
+                    }
+                    m += 128;
+                    if (m > 2047) {
+                        return 0;
+                    }
+                }
+
+                /*
+                * "-0" is forbidden.
+                */
+                if (s != 0 && m == 0) {
+                    return 0;
+                }
+
+                xsrc[x+u] = (short)(s != 0 ? -(int)m : (int)m);
+            }
+
+            /*
+            * Unused bits in the last byte must be zero.
+            */
+            if ((acc & ((1u << (int)acc_len) - 1u)) != 0) {
+                return 0;
+            }
+
+            return v;
+        }
+
+        /*
+        * Key elements and signatures are polynomials with small integer
+        * coefficients. Here are some statistics gathered over many
+        * generated key pairs (10000 or more for each degree):
+        *
+        *   log(n)     n   max(f,g)   std(f,g)   max(F,G)   std(F,G)
+        *      1       2     129       56.31       143       60.02
+        *      2       4     123       40.93       160       46.52
+        *      3       8      97       28.97       159       38.01
+        *      4      16     100       21.48       154       32.50
+        *      5      32      71       15.41       151       29.36
+        *      6      64      59       11.07       138       27.77
+        *      7     128      39        7.91       144       27.00
+        *      8     256      32        5.63       148       26.61
+        *      9     512      22        4.00       137       26.46
+        *     10    1024      15        2.84       146       26.41
+        *
+        * We want a compact storage format for private key, and, as part of
+        * key generation, we are allowed to reject some keys which would
+        * otherwise be fine (this does not induce any noticeable vulnerability
+        * as long as we reject only a small proportion of possible keys).
+        * Hence, we enforce at key generation time maximum values for the
+        * elements of f, g, F and G, so that their encoding can be expressed
+        * in fixed-width values. Limits have been chosen so that generated
+        * keys are almost always within bounds, thus not impacting neither
+        * security or performance.
+        *
+        * IMPORTANT: the code assumes that all coefficients of f, g, F and G
+        * ultimately fit in the -127..+127 range. Thus, none of the elements
+        * of max_fg_bits[] and max_FG_bits[] shall be greater than 8.
+        */
+
+        internal byte[] max_fg_bits = {
+            0, /* unused */
+            8,
+            8,
+            8,
+            8,
+            8,
+            7,
+            7,
+            6,
+            6,
+            5
+        };
+
+        internal byte[] max_FG_bits = {
+            0, /* unused */
+            8,
+            8,
+            8,
+            8,
+            8,
+            8,
+            8,
+            8,
+            8,
+            8
+        };
+
+        /*
+        * When generating a new key pair, we can always reject keys which
+        * feature an abnormally large coefficient. This can also be done for
+        * signatures, albeit with some care: in case the signature process is
+        * used in a derandomized setup (explicitly seeded with the message and
+        * private key), we have to follow the specification faithfully, and the
+        * specification only enforces a limit on the L2 norm of the signature
+        * vector. The limit on the L2 norm implies that the absolute value of
+        * a coefficient of the signature cannot be more than the following:
+        *
+        *   log(n)     n   max sig coeff (theoretical)
+        *      1       2       412
+        *      2       4       583
+        *      3       8       824
+        *      4      16      1166
+        *      5      32      1649
+        *      6      64      2332
+        *      7     128      3299
+        *      8     256      4665
+        *      9     512      6598
+        *     10    1024      9331
+        *
+        * However, the largest observed signature coefficients during our
+        * experiments was 1077 (in absolute value), hence we can assume that,
+        * with overwhelming probability, signature coefficients will fit
+        * in -2047..2047, i.e. 12 bits.
+        */
+
+        internal byte[] max_sig_bits = {
+            0, /* unused */
+            10,
+            11,
+            11,
+            12,
+            12,
+            12,
+            12,
+            12,
+            12,
+            12
+        };
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconCommon.cs b/crypto/src/pqc/crypto/falcon/FalconCommon.cs
new file mode 100644
index 000000000..e92237936
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconCommon.cs
@@ -0,0 +1,304 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconCommon
+    {
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+        internal void hash_to_point_vartime(
+            SHAKE256 sc,
+            ushort[] xsrc, int x, uint logn)
+        {
+            /*
+            * This is the straightforward per-the-spec implementation. It
+            * is not constant-time, thus it might reveal information on the
+            * plaintext (at least, enough to check the plaintext against a
+            * list of potential plaintexts) in a scenario where the
+            * attacker does not have access to the signature value or to
+            * the public key, but knows the nonce (without knowledge of the
+            * nonce, the hashed output cannot be matched against potential
+            * plaintexts).
+            */
+            int n;
+
+            n = (int)1 << (int)logn;
+            while (n > 0) {
+                byte[] buf = new byte[2];
+                uint w;
+                sc.i_shake256_extract(buf, 0, 2);
+                // inner_shake256_extract(sc, (void *)buf, sizeof buf);
+                w = ((uint)buf[0] << 8) | (uint)buf[1];
+                if (w < 61445) {
+                    while (w >= 12289) {
+                        w -= 12289;
+                    }
+                    xsrc[x ++] = (ushort)w;
+                    n --;
+                }
+            }
+        }
+
+        // void hash_to_point_ct(
+        //     SHAKE256 sc,
+        //     ushort[] xsrc, int x, uint logn, byte *tmp)
+        // {
+        //     /*
+        //     * Each 16-bit sample is a value in 0..65535. The value is
+        //     * kept if it falls in 0..61444 (because 61445 = 5*12289)
+        //     * and rejected otherwise; thus, each sample has probability
+        //     * about 0.93758 of being selected.
+        //     *
+        //     * We want to oversample enough to be sure that we will
+        //     * have enough values with probability at least 1 - 2^(-256).
+        //     * Depending on degree N, this leads to the following
+        //     * required oversampling:
+        //     *
+        //     *   logn     n  oversampling
+        //     *     1      2     65
+        //     *     2      4     67
+        //     *     3      8     71
+        //     *     4     16     77
+        //     *     5     32     86
+        //     *     6     64    100
+        //     *     7    128    122
+        //     *     8    256    154
+        //     *     9    512    205
+        //     *    10   1024    287
+        //     *
+        //     * If logn >= 7, then the provided temporary buffer is large
+        //     * enough. Otherwise, we use a stack buffer of 63 entries
+        //     * (i.e. 126 bytes) for the values that do not fit in tmp[].
+        //     */
+
+        //     const ushort[] overtab = {
+        //         0, /* unused */
+        //         65,
+        //         67,
+        //         71,
+        //         77,
+        //         86,
+        //         100,
+        //         122,
+        //         154,
+        //         205,
+        //         287
+        //     };
+
+        //     uint n, n2, u, m, p, over;
+        //     int tt1;
+        //     ushort[] tt2 = new ushort[63];
+
+        //     /*
+        //     * We first generate m 16-bit value. Values 0..n-1 go to x[].
+        //     * Values n..2*n-1 go to tt1[]. Values 2*n and later go to tt2[].
+        //     * We also reduce modulo q the values; rejected values are set
+        //     * to 0xFFFF.
+        //     */
+        //     n = 1U << logn;
+        //     n2 = n << 1;
+        //     over = overtab[logn];
+        //     m = n + over;
+        //     tt1 = tmp;
+        //     for (u = 0; u < m; u ++) {
+        //         byte[] buf = new byte[2];
+        //         uint w, wr;
+
+        //         // inner_shake256_extract(sc, buf, sizeof buf);
+        //         sc.i_shake256_extract(buf, 2);
+        //         w = ((uint)buf[0] << 8) | (uint)buf[1];
+        //         wr = w - ((uint)24578 & (((w - 24578) >> 31) - 1));
+        //         wr = wr - ((uint)24578 & (((wr - 24578) >> 31) - 1));
+        //         wr = wr - ((uint)12289 & (((wr - 12289) >> 31) - 1));
+        //         wr |= ((w - 61445) >> 31) - 1;
+        //         if (u < n) {
+        //             x[u] = (ushort)wr;
+        //         } else if (u < n2) {
+        //             tt1[u - n] = (ushort)wr;
+        //         } else {
+        //             tt2[u - n2] = (ushort)wr;
+        //         }
+        //     }
+
+        //     /*
+        //     * Now we must "squeeze out" the invalid values. We do this in
+        //     * a logarithmic sequence of passes; each pass computes where a
+        //     * value should go, and moves it down by 'p' slots if necessary,
+        //     * where 'p' uses an increasing powers-of-two scale. It can be
+        //     * shown that in all cases where the loop decides that a value
+        //     * has to be moved down by p slots, the destination slot is
+        //     * "free" (i.e. contains an invalid value).
+        //     */
+        //     for (p = 1; p <= over; p <<= 1) {
+        //         uint v;
+
+        //         /*
+        //         * In the loop below:
+        //         *
+        //         *   - v contains the index of the final destination of
+        //         *     the value; it is recomputed dynamically based on
+        //         *     whether values are valid or not.
+        //         *
+        //         *   - u is the index of the value we consider ("source");
+        //         *     its address is s.
+        //         *
+        //         *   - The loop may swap the value with the one at index
+        //         *     u-p. The address of the swap destination is d.
+        //         */
+        //         v = 0;
+        //         for (u = 0; u < m; u ++) {
+        //             ushort *s;
+        //             ushort *d;
+        //             uint j, sv, dv, mk;
+
+        //             if (u < n) {
+        //                 s = &x[u];
+        //             } else if (u < n2) {
+        //                 s = &tt1[u - n];
+        //             } else {
+        //                 s = &tt2[u - n2];
+        //             }
+        //             sv = *s;
+
+        //             /*
+        //             * The value in sv should ultimately go to
+        //             * address v, i.e. jump back by u-v slots.
+        //             */
+        //             j = u - v;
+
+        //             /*
+        //             * We increment v for the next iteration, but
+        //             * only if the source value is valid. The mask
+        //             * 'mk' is -1 if the value is valid, 0 otherwise,
+        //             * so we _subtract_ mk.
+        //             */
+        //             mk = (sv >> 15) - 1U;
+        //             v -= mk;
+
+        //             /*
+        //             * In this loop we consider jumps by p slots; if
+        //             * u < p then there is nothing more to do.
+        //             */
+        //             if (u < p) {
+        //                 continue;
+        //             }
+
+        //             /*
+        //             * Destination for the swap: value at address u-p.
+        //             */
+        //             if ((u - p) < n) {
+        //                 d = &x[u - p];
+        //             } else if ((u - p) < n2) {
+        //                 d = &tt1[(u - p) - n];
+        //             } else {
+        //                 d = &tt2[(u - p) - n2];
+        //             }
+        //             dv = *d;
+
+        //             /*
+        //             * The swap should be performed only if the source
+        //             * is valid AND the jump j has its 'p' bit set.
+        //             */
+        //             mk &= -(((j & p) + 0x1FF) >> 9);
+
+        //             *s = (ushort)(sv ^ (mk & (sv ^ dv)));
+        //             *d = (ushort)(dv ^ (mk & (sv ^ dv)));
+        //         }
+        //     }
+        // }
+
+        /*
+        * Acceptance bound for the (squared) l2-norm of the signature depends
+        * on the degree. This array is indexed by logn (1 to 10). These bounds
+        * are _inclusive_ (they are equal to floor(beta^2)).
+        */
+        internal uint[] l2bound = {
+            0,    /* unused */
+            101498,
+            208714,
+            428865,
+            892039,
+            1852696,
+            3842630,
+            7959734,
+            16468416,
+            34034726,
+            70265242
+        };
+
+        internal bool is_short(
+            short[] s1src, int s1, short[] s2src, int s2, uint logn)
+        {
+            /*
+            * We use the l2-norm. Code below uses only 32-bit operations to
+            * compute the square of the norm with saturation to 2^32-1 if
+            * the value exceeds 2^31-1.
+            */
+            int n, u;
+            uint s, ng;
+
+            n = (int)1 << (int)logn;
+            s = 0;
+            ng = 0;
+            for (u = 0; u < n; u ++) {
+                int z;
+
+                z = s1src[s1+u];
+                s += (uint)(z * z);
+                ng |= s;
+                z = s2src[s2+u];
+                s += (uint)(z * z);
+                ng |= s;
+            }
+            s |= (uint)(-(ng >> 31));
+
+            return s <= l2bound[logn];
+        }
+
+        internal bool is_short_half(
+            uint sqn, short[] s2src, int s2, uint logn)
+        {
+            int n, u;
+            uint ng;
+
+            n = (int)1 << (int)logn;
+            ng = (uint)(-(sqn >> 31));
+            for (u = 0; u < n; u ++) {
+                int z;
+
+                z = s2src[s2 + u];
+                sqn += (uint)(z * z);
+                ng |= sqn;
+            }
+            sqn |= (uint)(-(ng >> 31));
+
+            return sqn <= l2bound[logn];
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconConversions.cs b/crypto/src/pqc/crypto/falcon/FalconConversions.cs
new file mode 100644
index 000000000..36ef56fb4
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconConversions.cs
@@ -0,0 +1,66 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconConversions
+    {
+        internal FalconConversions(){}
+
+        internal byte[] int_to_bytes(int x)
+        {
+            byte[] res = new byte[4];
+            res[0] = (byte)(x >> 0);
+            res[1] = (byte)(x >> 8);
+            res[2] = (byte)(x >> 16);
+            res[3] = (byte)(x >> 24);
+            return res;
+        }
+        internal uint bytes_to_uint(byte[] src, int pos)
+        {
+            uint acc = 0;
+            acc =   ((uint)src[pos + 0]) << 0 |
+                    ((uint)src[pos + 1]) << 8 |
+                    ((uint)src[pos + 2]) << 16 |
+                    ((uint)src[pos + 3]) << 24;
+            return acc;
+        }
+
+        internal byte[] ulong_to_bytes(ulong x)
+        {
+            byte[] res = new byte[8];
+            res[0] = (byte)(x >> 0);
+            res[1] = (byte)(x >> 8);
+            res[2] = (byte)(x >> 16);
+            res[3] = (byte)(x >> 24);
+            res[4] = (byte)(x >> 32);
+            res[5] = (byte)(x >> 40);
+            res[6] = (byte)(x >> 48);
+            res[7] = (byte)(x >> 56);
+            return res;
+        }
+
+        internal ulong bytes_to_ulong(byte[] src, int pos)
+        {
+            ulong acc = 0;
+            acc = ((ulong)src[pos + 0]) << 0 |
+                ((ulong)src[pos + 1]) << 8 |
+                ((ulong)src[pos + 2]) << 16 |
+                ((ulong)src[pos + 3]) << 24 |
+                ((ulong)src[pos + 4]) << 32 |
+                ((ulong)src[pos + 5]) << 40 |
+                ((ulong)src[pos + 6]) << 48 |
+                ((ulong)src[pos + 7]) << 56;
+            return acc;
+        }
+
+        internal uint[] bytes_to_uint_array(byte[] src, int pos, int num)
+        {
+            uint[] res = new uint[num];
+            for (int i = 0; i < num; i++)
+            {
+                res[i] = bytes_to_uint(src, pos + (4 * i));
+            }
+            return res;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconFFT.cs b/crypto/src/pqc/crypto/falcon/FalconFFT.cs
new file mode 100644
index 000000000..aa862cc23
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconFFT.cs
@@ -0,0 +1,711 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconFFT
+    {
+        FPREngine fpre;
+        internal FalconFFT() {
+            fpre = new FPREngine();
+        }
+        internal FalconFFT(FPREngine fprengine) {
+            this.fpre = fprengine;
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+        /*
+        * Addition of two complex numbers (d = a + b).
+        */
+        internal FalconFPR[] FPC_ADD(FalconFPR a_re, FalconFPR a_im,
+                        FalconFPR b_re, FalconFPR b_im) {
+            FalconFPR fpct_re, fpct_im;
+            fpct_re = this.fpre.fpr_add(a_re, b_re);
+            fpct_im = this.fpre.fpr_add(a_im, b_im);
+            //d_re.Set(fpct_re);
+            //d_im.Set(fpct_im);
+            return new FalconFPR[] { fpct_re, fpct_im };
+        }
+
+        /*
+        * Subtraction of two complex numbers (d = a - b).
+        */
+        internal FalconFPR[] FPC_SUB(FalconFPR a_re, FalconFPR a_im,
+                        FalconFPR b_re, FalconFPR b_im) {
+            FalconFPR fpct_re, fpct_im;
+            fpct_re = this.fpre.fpr_sub(a_re, b_re);
+            fpct_im = this.fpre.fpr_sub(a_im, b_im);
+            return new FalconFPR[] { fpct_re, fpct_im }; 
+        }
+
+        /*
+        * Multplication of two complex numbers (d = a * b).
+        */
+        internal FalconFPR[] FPC_MUL(FalconFPR a_re, FalconFPR a_im,
+                        FalconFPR b_re, FalconFPR b_im) {
+            FalconFPR fpct_a_re, fpct_a_im;
+            FalconFPR fpct_b_re, fpct_b_im;
+            FalconFPR fpct_d_re, fpct_d_im;
+            fpct_a_re = a_re; 
+            fpct_a_im = a_im; 
+            fpct_b_re = b_re; 
+            fpct_b_im = b_im; 
+            fpct_d_re = this.fpre.fpr_sub( 
+                this.fpre.fpr_mul(fpct_a_re, fpct_b_re), 
+                this.fpre.fpr_mul(fpct_a_im, fpct_b_im)); 
+            fpct_d_im = this.fpre.fpr_add( 
+                this.fpre.fpr_mul(fpct_a_re, fpct_b_im), 
+                this.fpre.fpr_mul(fpct_a_im, fpct_b_re)); 
+            return new FalconFPR[] {fpct_d_re, fpct_d_im};
+        }
+
+        /*
+        * Squaring of a complex number (d = a * a).
+        */
+        internal FalconFPR[] FPC_SQR(FalconFPR d_re, FalconFPR d_im, 
+                        FalconFPR a_re, FalconFPR a_im) {
+            FalconFPR fpct_a_re, fpct_a_im; 
+            FalconFPR fpct_d_re, fpct_d_im; 
+            fpct_a_re = a_re; 
+            fpct_a_im = a_im; 
+            fpct_d_re = this.fpre.fpr_sub(this.fpre.fpr_sqr(fpct_a_re), this.fpre.fpr_sqr(fpct_a_im)); 
+            fpct_d_im = this.fpre.fpr_double(this.fpre.fpr_mul(fpct_a_re, fpct_a_im)); 
+            return new FalconFPR[] {fpct_d_re, fpct_d_im};
+        }
+
+        /*
+        * Inversion of a complex number (d = 1 / a).
+        */
+        internal FalconFPR[] FPC_INV(FalconFPR a_re, FalconFPR a_im) {
+            FalconFPR fpct_a_re, fpct_a_im; 
+            FalconFPR fpct_d_re, fpct_d_im; 
+            FalconFPR fpct_m; 
+            fpct_a_re = a_re; 
+            fpct_a_im = a_im; 
+            fpct_m = this.fpre.fpr_add(this.fpre.fpr_sqr(fpct_a_re), this.fpre.fpr_sqr(fpct_a_im)); 
+            fpct_m = this.fpre.fpr_inv(fpct_m); 
+            fpct_d_re = this.fpre.fpr_mul(fpct_a_re, fpct_m); 
+            fpct_d_im = this.fpre.fpr_mul(this.fpre.fpr_neg(fpct_a_im), fpct_m);
+            return new FalconFPR[] { fpct_d_re, fpct_d_im };
+        }
+        /*
+        * Division of complex numbers (d = a / b).
+        */
+        internal FalconFPR[] FPC_DIV(FalconFPR a_re, FalconFPR a_im,
+                        FalconFPR b_re, FalconFPR b_im) {
+            FalconFPR fpct_a_re, fpct_a_im; 
+            FalconFPR fpct_b_re, fpct_b_im; 
+            FalconFPR fpct_d_re, fpct_d_im; 
+            FalconFPR fpct_m; 
+            fpct_a_re = (a_re); 
+            fpct_a_im = (a_im); 
+            fpct_b_re = (b_re); 
+            fpct_b_im = (b_im); 
+            fpct_m = this.fpre.fpr_add(this.fpre.fpr_sqr(fpct_b_re), this.fpre.fpr_sqr(fpct_b_im)); 
+            fpct_m = this.fpre.fpr_inv(fpct_m); 
+            fpct_b_re = this.fpre.fpr_mul(fpct_b_re, fpct_m); 
+            fpct_b_im = this.fpre.fpr_mul(this.fpre.fpr_neg(fpct_b_im), fpct_m); 
+            fpct_d_re = this.fpre.fpr_sub( 
+                this.fpre.fpr_mul(fpct_a_re, fpct_b_re), 
+                this.fpre.fpr_mul(fpct_a_im, fpct_b_im)); 
+            fpct_d_im = this.fpre.fpr_add( 
+                this.fpre.fpr_mul(fpct_a_re, fpct_b_im), 
+                this.fpre.fpr_mul(fpct_a_im, fpct_b_re));
+            return new FalconFPR[] { fpct_d_re, fpct_d_im };
+        }
+
+        /*
+        * Let w = exp(i*pi/N); w is a primitive 2N-th root of 1. We define the
+        * values w_j = w^(2j+1) for all j from 0 to N-1: these are the roots
+        * of X^N+1 in the field of complex numbers. A crucial property is that
+        * w_{N-1-j} = conj(w_j) = 1/w_j for all j.
+        *
+        * FFT representation of a polynomial f (taken modulo X^N+1) is the
+        * set of values f(w_j). Since f is real, conj(f(w_j)) = f(conj(w_j)),
+        * thus f(w_{N-1-j}) = conj(f(w_j)). We thus store only half the values,
+        * for j = 0 to N/2-1; the other half can be recomputed easily when (if)
+        * needed. A consequence is that FFT representation has the same size
+        * as normal representation: N/2 complex numbers use N real numbers (each
+        * complex number is the combination of a real and an imaginary part).
+        *
+        * We use a specific ordering which makes computations easier. Let rev()
+        * be the bit-reversal function over log(N) bits. For j in 0..N/2-1, we
+        * store the real and imaginary parts of f(w_j) in slots:
+        *
+        *    Re(f(w_j)) -> slot rev(j)/2
+        *    Im(f(w_j)) -> slot rev(j)/2+N/2
+        *
+        * (Note that rev(j) is even for j < N/2.)
+        */
+
+        internal void FFT(FalconFPR[] fsrc, int f, uint logn)
+        {
+            /*
+            * FFT algorithm in bit-reversal order uses the following
+            * iterative algorithm:
+            *
+            *   t = N
+            *   for m = 1; m < N; m *= 2:
+            *       ht = t/2
+            *       for i1 = 0; i1 < m; i1 ++:
+            *           j1 = i1 * t
+            *           s = GM[m + i1]
+            *           for j = j1; j < (j1 + ht); j ++:
+            *               x = fsrc[f + j]
+            *               y = s * fsrc[f + j + ht]
+            *               fsrc[f + j] = x + y
+            *               fsrc[f + j + ht] = x - y
+            *       t = ht
+            *
+            * GM[k] contains w^rev(k) for primitive root w = exp(i*pi/N).
+            *
+            * In the description above, fsrc[f + ] is supposed to contain complex
+            * numbers. In our in-memory representation, the real and
+            * imaginary parts of fsrc[f + k] are in array slots k and k+N/2.
+            *
+            * We only keep the first half of the complex numbers. We can
+            * see that after the first iteration, the first and second halves
+            * of the array of complex numbers have separate lives, so we
+            * simply ignore the second part.
+            */
+
+            uint u;
+            int t, n, hn, m;
+
+            /*
+            * First iteration: compute fsrc[f + j] + i * fsrc[f + j+N/2] for all j < N/2
+            * (because GM[1] = w^rev(1) = w^(N/2) = i).
+            * In our chosen representation, this is a no-op: everything is
+            * already where it should be.
+            */
+
+            /*
+            * Subsequent iterations are truncated to use only the first
+            * half of values.
+            */
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            t = hn;
+            for (u = 1, m = 2; u < logn; u ++, m <<= 1) {
+                int ht, hm, i1, j1;
+
+                ht = t >> 1;
+                hm = m >> 1;
+                for (i1 = 0, j1 = 0; i1 < hm; i1 ++, j1 += t) {
+                    int j, j2;
+
+                    j2 = j1 + ht;
+                    FalconFPR s_re, s_im;
+
+                    s_re = this.fpre.fpr_gm_tab[((m + i1) << 1) + 0];
+                    s_im = this.fpre.fpr_gm_tab[((m + i1) << 1) + 1];
+                    for (j = j1; j < j2; j ++) {
+                        FalconFPR x_re, x_im, y_re, y_im;
+                        FalconFPR[] res;
+
+                        x_re = fsrc[f + j];
+                        x_im = fsrc[f + j + hn];
+                        y_re = fsrc[f + j + ht];
+                        y_im = fsrc[f + j + ht + hn];
+                        res = FPC_MUL(y_re, y_im, s_re, s_im);
+                        y_re = res[0]; y_im = res[1];
+                        res = FPC_ADD(x_re, x_im, y_re, y_im);
+                        fsrc[f + j] = res[0]; fsrc[f + j + hn] = res[1];
+                        res = FPC_SUB(x_re, x_im, y_re, y_im);
+                        fsrc[f + j + ht] = res[0]; fsrc[f + j + ht + hn] = res[1];
+                    }
+                }
+                t = ht;
+            }
+        }
+
+        internal void iFFT(FalconFPR[] fsrc, int f, uint logn)
+        {
+            /*
+            * Inverse FFT algorithm in bit-reversal order uses the following
+            * iterative algorithm:
+            *
+            *   t = 1
+            *   for m = N; m > 1; m /= 2:
+            *       hm = m/2
+            *       dt = t*2
+            *       for i1 = 0; i1 < hm; i1 ++:
+            *           j1 = i1 * dt
+            *           s = iGM[hm + i1]
+            *           for j = j1; j < (j1 + t); j ++:
+            *               x = fsrc[f + j]
+            *               y = fsrc[f + j + t]
+            *               fsrc[f + j] = x + y
+            *               fsrc[f + j + t] = s * (x - y)
+            *       t = dt
+            *   for i1 = 0; i1 < N; i1 ++:
+            *       fsrc[f + i1] = fsrc[f + i1] / N
+            *
+            * iGM[k] contains (1/w)^rev(k) for primitive root w = exp(i*pi/N)
+            * (actually, iGM[k] = 1/GM[k] = conj(GM[k])).
+            *
+            * In the main loop (not counting the final division loop), in
+            * all iterations except the last, the first and second half of fsrc[f + ]
+            * (as an array of complex numbers) are separate. In our chosen
+            * representation, we do not keep the second half.
+            *
+            * The last iteration recombines the recomputed half with the
+            * implicit half, and should yield only real numbers since the
+            * target polynomial is real; moreover, s = i at that step.
+            * Thus, when considering x and y:
+            *    y = conj(x) since the final fsrc[f + j] must be real
+            *    Therefore, fsrc[f + j] is filled with 2*Re(x), and fsrc[f + j + t] is
+            *    filled with 2*Im(x).
+            * But we already have Re(x) and Im(x) in array slots j and j+t
+            * in our chosen representation. That last iteration is thus a
+            * simple doubling of the values in all the array.
+            *
+            * We make the last iteration a no-op by tweaking the final
+            * division into a division by N/2, not N.
+            */
+            int u, n, hn, t, m;
+
+            n = (int)1 << (int)logn;
+            t = 1;
+            m = n;
+            hn = n >> 1;
+            for (u = (int)logn; u > 1; u --) {
+                int hm, dt, i1, j1;
+
+                hm = m >> 1;
+                dt = t << 1;
+                for (i1 = 0, j1 = 0; j1 < hn; i1 ++, j1 += dt) {
+                    int j, j2;
+
+                    j2 = j1 + t;
+                    FalconFPR s_re, s_im;
+
+                    s_re = this.fpre.fpr_gm_tab[((hm + i1) << 1) + 0];
+                    s_im = this.fpre.fpr_neg(this.fpre.fpr_gm_tab[((hm + i1) << 1) + 1]);
+                    for (j = j1; j < j2; j ++) {
+                        FalconFPR x_re, x_im, y_re, y_im;
+                        FalconFPR[] res;
+
+                        x_re = fsrc[f + j];
+                        x_im = fsrc[f + j + hn];
+                        y_re = fsrc[f + j + t];
+                        y_im = fsrc[f + j + t + hn];
+                        res = FPC_ADD(x_re, x_im, y_re, y_im);
+                        fsrc[f + j] = res[0]; fsrc[f + j + hn] = res[1];
+                            
+                        res = FPC_SUB(x_re, x_im, y_re, y_im);
+                        x_re = res[0]; x_im = res[1];
+                        res = FPC_MUL(x_re, x_im, s_re, s_im);
+                        fsrc[f + j + t] = res[0]; fsrc[f + j + t + hn] = res[1];
+                    }
+                }
+                t = dt;
+                m = hm;
+            }
+
+            /*
+            * Last iteration is a no-op, provided that we divide by N/2
+            * instead of N. We need to make a special case for logn = 0.
+            */
+            if (logn > 0) {
+                FalconFPR ni;
+
+                ni = this.fpre.fpr_p2_tab[logn];
+                for (u = 0; u < n; u ++) {
+                    fsrc[f+u] = this.fpre.fpr_mul(fsrc[f+u], ni);
+                }
+            }
+        }
+
+        internal void poly_add(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                asrc[a + u] = this.fpre.fpr_add(asrc[a + u], bsrc[b + u]);
+            }
+        }
+
+        internal void poly_sub(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                asrc[a + u] = this.fpre.fpr_sub(asrc[a + u], bsrc[b + u]);
+            }
+        }
+
+        internal void poly_neg(FalconFPR[] asrc, int a, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                asrc[a + u] = this.fpre.fpr_neg(asrc[a + u]);
+            }
+        }
+
+        internal void poly_adj_fft(FalconFPR[] asrc, int a, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = (n >> 1); u < n; u ++) {
+                asrc[a + u] = this.fpre.fpr_neg(asrc[a + u]);
+            }
+        }
+
+        internal void poly_mul_fft(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR a_re, a_im, b_re, b_im;
+                FalconFPR[] res;
+
+                a_re = asrc[a + u];
+                a_im = asrc[a + u + hn];
+                b_re = bsrc[b + u];
+                b_im = bsrc[b + u + hn];
+                res = FPC_MUL(a_re, a_im, b_re, b_im);
+                asrc[a + u] = res[0]; asrc[a + u + hn] = res[1];
+            }
+        }
+
+        internal void poly_muladj_fft(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR a_re, a_im, b_re, b_im;
+                FalconFPR[] res;
+
+                a_re = asrc[a + u];
+                a_im = asrc[a + u + hn];
+                b_re = bsrc[b + u];
+                b_im = this.fpre.fpr_neg(bsrc[b + u + hn]);
+                res = FPC_MUL(a_re, a_im, b_re, b_im);
+                asrc[a + u] = res[0]; asrc[a + u + hn] = res[1];
+            }
+        }
+
+        internal void poly_mulselfadj_fft(FalconFPR[] asrc, int a, uint logn)
+        {
+            /*
+            * Since each coefficient is multiplied with its own conjugate,
+            * the result contains only real values.
+            */
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR a_re, a_im;
+
+                a_re = asrc[a + u];
+                a_im = asrc[a + u + hn];
+                asrc[a + u] = this.fpre.fpr_add(this.fpre.fpr_sqr(a_re), this.fpre.fpr_sqr(a_im));
+                asrc[a + u + hn] = this.fpre.fpr_zero;
+            }
+        }
+
+        internal void poly_mulconst(FalconFPR[] asrc, int a, FalconFPR x, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                asrc[a + u] = this.fpre.fpr_mul(asrc[a + u], x);
+            }
+        }
+
+        internal void poly_div_fft(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR a_re, a_im, b_re, b_im;
+                FalconFPR[] res;
+
+                a_re = asrc[a + u];
+                a_im = asrc[a + u + hn];
+                b_re = bsrc[b + u];
+                b_im = bsrc[b + u + hn];
+                res = FPC_DIV(a_re, a_im, b_re, b_im);
+                asrc[a + u] = res[0]; asrc[a + u + hn] = res[1];
+            }
+        }
+
+        internal void poly_invnorm2_fft(FalconFPR[] dsrc, int d,
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR a_re, a_im;
+                FalconFPR b_re, b_im;
+
+                a_re = asrc[a + u];
+                a_im = asrc[a + u + hn];
+                b_re = bsrc[b + u];
+                b_im = bsrc[b + u + hn];
+                dsrc[d + u] = this.fpre.fpr_inv(this.fpre.fpr_add(
+                    this.fpre.fpr_add(this.fpre.fpr_sqr(a_re), this.fpre.fpr_sqr(a_im)),
+                    this.fpre.fpr_add(this.fpre.fpr_sqr(b_re), this.fpre.fpr_sqr(b_im))));
+            }
+        }
+
+        internal void poly_add_muladj_fft(FalconFPR[] dsrc, int d,
+            FalconFPR[] Fsrc, int F, FalconFPR[] Gsrc, int G,
+            FalconFPR[] fsrc, int f, FalconFPR[] gsrc, int g, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR F_re, F_im, G_re, G_im;
+                FalconFPR f_re, f_im, g_re, g_im;
+                FalconFPR a_re, a_im, b_re, b_im;
+                FalconFPR[] res;
+
+
+                F_re = Fsrc[F + u];
+                F_im = Fsrc[F + u + hn];
+                G_re = Gsrc[G + u];
+                G_im = Gsrc[G + u + hn];
+                f_re = fsrc[f + u];
+                f_im = fsrc[f + u + hn];
+                g_re = gsrc[g + u];
+                g_im = gsrc[g + u + hn];
+
+                res = FPC_MUL(F_re, F_im, f_re, this.fpre.fpr_neg(f_im));
+                a_re = res[0]; a_im = res[1];
+                res = FPC_MUL(G_re, G_im, g_re, this.fpre.fpr_neg(g_im));
+                b_re = res[0]; b_im = res[1];
+                dsrc[d + u] = this.fpre.fpr_add(a_re, b_re);
+                dsrc[d + u + hn] = this.fpre.fpr_add(a_im, b_im);
+            }
+        }
+
+        internal void poly_mul_autoadj_fft(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                asrc[a + u] = this.fpre.fpr_mul(asrc[a + u], bsrc[b + u]);
+                asrc[a + u + hn] = this.fpre.fpr_mul(asrc[a + u + hn], bsrc[b + u]);
+            }
+        }
+
+        internal void poly_div_autoadj_fft(
+            FalconFPR[] asrc, int a, FalconFPR[] bsrc, int b, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR ib;
+
+                ib = this.fpre.fpr_inv(bsrc[b + u]);
+                asrc[a + u] = this.fpre.fpr_mul(asrc[a + u], ib);
+                asrc[a + u + hn] = this.fpre.fpr_mul(asrc[a + u + hn], ib);
+            }
+        }
+
+        internal void poly_LDL_fft(
+            FalconFPR[] g00src, int g00,
+            FalconFPR[] g01src, int g01, FalconFPR[] g11src, int g11, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR g00_re, g00_im, g01_re, g01_im, g11_re, g11_im;
+                FalconFPR[] res;
+                FalconFPR mu_re, mu_im;
+
+                g00_re = g00src[g00 + u];
+                g00_im = g00src[g00 + u + hn];
+                g01_re = g01src[g01 + u];
+                g01_im = g01src[g01 + u + hn];
+                g11_re = g11src[g11 + u];
+                g11_im = g11src[g11 + u + hn];
+                res = FPC_DIV(g01_re, g01_im, g00_re, g00_im);
+                mu_re = res[0]; mu_im = res[1];
+                res = FPC_MUL(mu_re, mu_im, g01_re, this.fpre.fpr_neg(g01_im));
+                g01_re = res[0]; g01_im = res[1];
+                res = FPC_SUB(g11_re, g11_im, g01_re, g01_im);
+                g11src[g11 + u] = res[0]; g11src[g11 + u + hn] = res[1];
+                g01src[g01 + u] = mu_re;
+                g01src[g01 + u + hn] = this.fpre.fpr_neg(mu_im);
+            }
+        }
+
+        internal void poly_LDLmv_fft(
+            FalconFPR[] d11src, int d11, FalconFPR[] l10src, int l10,
+            FalconFPR[] g00src, int g00, FalconFPR[] g01src, int g01,
+            FalconFPR[] g11src, int g11, uint logn)
+        {
+            int n, hn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            for (u = 0; u < hn; u ++) {
+                FalconFPR g00_re, g00_im, g01_re, g01_im, g11_re, g11_im;
+                FalconFPR[] res;
+                FalconFPR mu_re, mu_im;
+
+                g00_re = g00src[g00 + u];
+                g00_im = g00src[g00 + u + hn];
+                g01_re = g01src[g01 + u];
+                g01_im = g01src[g01 + u + hn];
+                g11_re = g11src[g11 + u];
+                g11_im = g11src[g11 + u + hn];
+                res = FPC_DIV(g01_re, g01_im, g00_re, g00_im);
+                mu_re = res[0]; mu_im = res[1];
+                res = FPC_MUL(mu_re, mu_im, g01_re, this.fpre.fpr_neg(g01_im));
+                g01_re = res[0]; g01_im = res[1];
+                res = FPC_SUB(g11_re, g11_im, g01_re, g01_im);
+                d11src[d11 + u] = res[0]; d11src[d11 + u + hn] = res[1];
+                l10src[l10 + u] = mu_re;
+                l10src[l10 + u + hn] = this.fpre.fpr_neg(mu_im);
+            }
+        }
+
+        internal void poly_split_fft(
+            FalconFPR[] f0src, int f0, FalconFPR[] f1src, int f1,
+            FalconFPR[] fsrc, int f, uint logn)
+        {
+            /*
+            * The FFT representation we use is in bit-reversed order
+            * (element i contains f(w^(rev(i))), where rev() is the
+            * bit-reversal function over the ring degree. This changes
+            * indexes with regards to the Falcon specification.
+            */
+            int n, hn, qn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            qn = hn >> 1;
+
+            /*
+            * We process complex values by pairs. For logn = 1, there is only
+            * one complex value (the other one is the implicit conjugate),
+            * so we add the two lines below because the loop will be
+            * skipped.
+            */
+            f0src[f0 + 0] = fsrc[f + 0];
+            f1src[f1 + 0] = fsrc[f + hn];
+
+            for (u = 0; u < qn; u ++) {
+                FalconFPR a_re, a_im, b_re, b_im, t_re, t_im;
+                FalconFPR[] res;
+
+                a_re = fsrc[f + (u << 1) + 0];
+                a_im = fsrc[f + (u << 1) + 0 + hn];
+                b_re = fsrc[f + (u << 1) + 1];
+                b_im = fsrc[f + (u << 1) + 1 + hn];
+
+                res = FPC_ADD(a_re, a_im, b_re, b_im);
+                t_re = res[0]; t_im = res[1];
+                f0src[f0 + u] = this.fpre.fpr_half(t_re);
+                f0src[f0 + u + qn] = this.fpre.fpr_half(t_im);
+
+                res = FPC_SUB(a_re, a_im, b_re, b_im);
+                t_re = res[0]; t_im = res[1];
+                res = FPC_MUL(t_re, t_im,
+                    this.fpre.fpr_gm_tab[((u + hn) << 1) + 0],
+                    this.fpre.fpr_neg(this.fpre.fpr_gm_tab[((u + hn) << 1) + 1]));
+                t_re = res[0]; t_im = res[1];
+                f1src[f1 + u] = this.fpre.fpr_half(t_re);
+                f1src[f1 + u + qn] = this.fpre.fpr_half(t_im);
+            }
+        }
+
+        internal void poly_merge_fft(
+            FalconFPR[] fsrc, int f,
+            FalconFPR[] f0src, int f0, FalconFPR[] f1src, int f1, uint logn)
+        {
+            int n, hn, qn, u;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            qn = hn >> 1;
+
+            /*
+            * An extra copy to handle the special case logn = 1.
+            */
+            fsrc[f + 0] = f0src[f0 + 0];
+            fsrc[f + hn] = f1src[f1 + 0];
+
+            for (u = 0; u < qn; u ++) {
+                FalconFPR a_re, a_im, 
+                          b_re, b_im;
+                FalconFPR t_re, t_im;
+                FalconFPR[] res;
+
+                a_re = f0src[f0 + u];
+                a_im = f0src[f0 + u + qn];
+                res = FPC_MUL(f1src[f1 + u], f1src[f1 + u + qn],
+                    this.fpre.fpr_gm_tab[((u + hn) << 1) + 0],
+                    this.fpre.fpr_gm_tab[((u + hn) << 1) + 1]);
+                b_re = res[0]; b_im = res[1];
+                res = FPC_ADD(a_re, a_im, b_re, b_im);
+                t_re = res[0]; t_im = res[1];
+                fsrc[f + (u << 1) + 0] = t_re;
+                fsrc[f + (u << 1) + 0 + hn] = t_im;
+                res = FPC_SUB(a_re, a_im, b_re, b_im);
+                t_re = res[0]; t_im = res[1];
+                fsrc[f + (u << 1) + 1] = t_re;
+                fsrc[f + (u << 1) + 1 + hn] = t_im;
+            }
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconFPR.cs b/crypto/src/pqc/crypto/falcon/FalconFPR.cs
new file mode 100644
index 000000000..b3f99f944
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconFPR.cs
@@ -0,0 +1,13 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconFPR
+    {
+        internal double v;
+
+        internal FalconFPR(double v) {
+            this.v = v;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconKeyGenerationParameters.cs b/crypto/src/pqc/crypto/falcon/FalconKeyGenerationParameters.cs
new file mode 100644
index 000000000..3531a6670
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconKeyGenerationParameters.cs
@@ -0,0 +1,22 @@
+using Org.BouncyCastle.Crypto;
+using Org.BouncyCastle.Security;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconKeyGenerationParameters
+        : KeyGenerationParameters
+    {
+        private FalconParameters parameters;
+
+        public FalconKeyGenerationParameters(SecureRandom random, FalconParameters parameters)
+            : base(random, 320)
+        {
+            this.parameters = parameters;
+        }
+
+        public FalconParameters GetParameters()
+        {
+            return this.parameters;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconKeyPairGenerator.cs b/crypto/src/pqc/crypto/falcon/FalconKeyPairGenerator.cs
new file mode 100644
index 000000000..018dcd3d2
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconKeyPairGenerator.cs
@@ -0,0 +1,55 @@
+using Org.BouncyCastle.Crypto;
+using Org.BouncyCastle.Security;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconKeyPairGenerator
+        : IAsymmetricCipherKeyPairGenerator
+    {
+        private FalconKeyGenerationParameters parameters;
+        private SecureRandom random;
+        private FalconNIST nist;
+        private uint logn;
+        private uint noncelen;
+
+        private int pk_size;
+        private int sk_size;
+
+        public void Init(KeyGenerationParameters param)
+        {
+            this.parameters = (FalconKeyGenerationParameters)param;
+            this.random = param.Random;
+            this.logn = ((FalconKeyGenerationParameters)param).GetParameters().GetLogN();
+            this.noncelen = ((FalconKeyGenerationParameters)param).GetParameters().GetNonceLength();
+            this.nist = new FalconNIST(random, logn, noncelen);
+            int n = 1 << (int)this.logn;
+            int sk_coeff_size = 8;
+            if (n == 1024)
+            {
+                sk_coeff_size = 5;
+            }
+            else if (n == 256 || n == 512)
+            {
+                sk_coeff_size = 6;
+            }
+            else if (n == 64 || n == 128)
+            {
+                sk_coeff_size = 7;
+            }
+            this.pk_size = 1 + (14 * n / 8);
+            this.sk_size = 1 + (2 * sk_coeff_size * n / 8) + (n);
+        }
+
+        public AsymmetricCipherKeyPair GenerateKeyPair()
+        {
+            byte[] pk, sk;
+            pk = new byte[pk_size];
+            sk = new byte[sk_size];
+            nist.crypto_sign_keypair(pk, 0, sk, 0);
+            FalconParameters p = ((FalconKeyGenerationParameters)this.parameters).GetParameters();
+            FalconPrivateKeyParameters privk = new FalconPrivateKeyParameters(p, sk);
+            FalconPublicKeyParameters pubk = new FalconPublicKeyParameters(p, pk);
+            return new AsymmetricCipherKeyPair(pubk, privk);
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconKeyParameters.cs b/crypto/src/pqc/crypto/falcon/FalconKeyParameters.cs
new file mode 100644
index 000000000..87b0eaec7
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconKeyParameters.cs
@@ -0,0 +1,22 @@
+using Org.BouncyCastle.Crypto;
+
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconKeyParameters 
+        : AsymmetricKeyParameter
+    {
+        private FalconParameters parameters;
+
+        public FalconKeyParameters(bool isprivate, FalconParameters parameters)
+            : base(isprivate)
+        {
+            this.parameters = parameters;
+        }
+
+        public FalconParameters GetParameters()
+        {
+            return parameters;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconKeygen.cs b/crypto/src/pqc/crypto/falcon/FalconKeygen.cs
new file mode 100644
index 000000000..7fe83056a
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconKeygen.cs
@@ -0,0 +1,3673 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconKeygen
+    {
+        FPREngine fpre;
+        FalconFFT ffte;
+        FalconSmallPrime[] PRIMES;
+        FalconCodec codec;
+        FalconVrfy vrfy;
+        internal FalconKeygen() {
+            this.fpre = new FPREngine();
+            this.PRIMES = new FalconSmallPrimes().PRIMES;
+            this.ffte = new FalconFFT(this.fpre);
+            this.codec = new FalconCodec();
+            this.vrfy = new FalconVrfy();
+        }
+        internal FalconKeygen(FalconCodec codec, FalconVrfy vrfy) {
+            this.fpre = new FPREngine();
+            this.PRIMES = new FalconSmallPrimes().PRIMES;
+            this.ffte = new FalconFFT();
+            this.codec = codec;
+            this.vrfy = vrfy;
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+
+        /*
+        * Reduce a small signed integer modulo a small prime. The source
+        * value x MUST be such that -p < x < p.
+        */
+        uint modp_set(int x, uint p)
+        {
+            uint w;
+
+            w = (uint)x;
+            w += (uint)(p & -(w >> 31));
+            return w;
+        }
+
+        /*
+        * Normalize a modular integer around 0.
+        */
+        int modp_norm(uint x, uint p)
+        {
+            return (int)(x - (p & (((x - ((p + 1) >> 1)) >> 31) - 1)));
+        }
+
+        /*
+        * Compute -1/p mod 2^31. This works for all odd integers p that fit
+        * on 31 bits.
+        */
+        uint modp_ninv31(uint p)
+        {
+            uint y;
+
+            y = 2 - p;
+            y *= 2 - p * y;
+            y *= 2 - p * y;
+            y *= 2 - p * y;
+            y *= 2 - p * y;
+            return (uint)(0x7FFFFFFF & -y);
+        }
+
+        /*
+        * Compute R = 2^31 mod p.
+        */
+        uint modp_R(uint p)
+        {
+            /*
+            * Since 2^30 < p < 2^31, we know that 2^31 mod p is simply
+            * 2^31 - p.
+            */
+            return ((uint)1 << 31) - p;
+        }
+
+        /*
+        * Addition modulo p.
+        */
+        uint modp_add(uint a, uint b, uint p)
+        {
+            uint d;
+
+            d = a + b - p;
+            d += (uint)(p & -(d >> 31));
+            return d;
+        }
+
+        /*
+        * Subtraction modulo p.
+        */
+        uint modp_sub(uint a, uint b, uint p)
+        {
+            uint d;
+
+            d = a - b;
+            d += (uint)(p & -(d >> 31));
+            return d;
+        }
+
+        /*
+        * Halving modulo p.
+        */
+        /* unused
+        uint modp_half(uint a, uint p)
+        {
+            a += p & -(a & 1);
+            return a >> 1;
+        }
+        */
+
+        /*
+        * Montgomery multiplication modulo p. The 'p0i' value is -1/p mod 2^31.
+        * It is required that p is an odd integer.
+        */
+        uint modp_montymul(uint a, uint b, uint p, uint p0i)
+        {
+            ulong z, w;
+            uint d;
+
+            z = (ulong)a * (ulong)b;
+            w = ((z * p0i) & (ulong)0x7FFFFFFF) * p;
+            d = (uint)((z + w) >> 31) - p;
+            d += (uint)(p & -(d >> 31));
+            return d;
+        }
+
+        /*
+        * Compute R2 = 2^62 mod p.
+        */
+        uint modp_R2(uint p, uint p0i)
+        {
+            uint z;
+
+            /*
+            * Compute z = 2^31 mod p (this is the value 1 in Montgomery
+            * representation), then double it with an addition.
+            */
+            z = modp_R(p);
+            z = modp_add(z, z, p);
+
+            /*
+            * Square it five times to obtain 2^32 in Montgomery representation
+            * (i.e. 2^63 mod p).
+            */
+            z = modp_montymul(z, z, p, p0i);
+            z = modp_montymul(z, z, p, p0i);
+            z = modp_montymul(z, z, p, p0i);
+            z = modp_montymul(z, z, p, p0i);
+            z = modp_montymul(z, z, p, p0i);
+
+            /*
+            * Halve the value mod p to get 2^62.
+            */
+            z = (uint)((z + (p & -(z & 1))) >> 1);
+            return z;
+        }
+
+        /*
+        * Compute 2^(31*x) modulo p. This works for integers x up to 2^11.
+        * p must be prime such that 2^30 < p < 2^31; p0i must be equal to
+        * -1/p mod 2^31; R2 must be equal to 2^62 mod p.
+        */
+        uint modp_Rx(uint x, uint p, uint p0i, uint R2)
+        {
+            int i;
+            uint r, z;
+
+            /*
+            * 2^(31*x) = (2^31)*(2^(31*(x-1))); i.e. we want the Montgomery
+            * representation of (2^31)^e mod p, where e = x-1.
+            * R2 is 2^31 in Montgomery representation.
+            */
+            x --;
+            r = R2;
+            z = modp_R(p);
+            for (i = 0; (1U << i) <= x; i ++) {
+                if ((x & (1U << i)) != 0) {
+                    z = modp_montymul(z, r, p, p0i);
+                }
+                r = modp_montymul(r, r, p, p0i);
+            }
+            return z;
+        }
+
+        /*
+        * Division modulo p. If the divisor (b) is 0, then 0 is returned.
+        * This function computes proper results only when p is prime.
+        * Parameters:
+        *   a     dividend
+        *   b     divisor
+        *   p     odd prime modulus
+        *   p0i   -1/p mod 2^31
+        *   R     2^31 mod R
+        */
+        uint modp_div(uint a, uint b, uint p, uint p0i, uint R)
+        {
+            uint z, e;
+            int i;
+
+            e = p - 2;
+            z = R;
+            for (i = 30; i >= 0; i --) {
+                uint z2;
+
+                z = modp_montymul(z, z, p, p0i);
+                z2 = modp_montymul(z, b, p, p0i);
+                z ^= (uint)((z ^ z2) & -(uint)((e >> i) & 1));
+            }
+
+            /*
+            * The loop above just assumed that b was in Montgomery
+            * representation, i.e. really contained b*R; under that
+            * assumption, it returns 1/b in Montgomery representation,
+            * which is R/b. But we gave it b in normal representation,
+            * so the loop really returned R/(b/R) = R^2/b.
+            *
+            * We want a/b, so we need one Montgomery multiplication with a,
+            * which also remove one of the R factors, and another such
+            * multiplication to remove the second R factor.
+            */
+            z = modp_montymul(z, 1, p, p0i);
+            return modp_montymul(a, z, p, p0i);
+        }
+
+        /*
+        * Bit-reversal index table.
+        */
+        ushort[] REV10 = {
+            0,  512,  256,  768,  128,  640,  384,  896,   64,  576,  320,  832,
+            192,  704,  448,  960,   32,  544,  288,  800,  160,  672,  416,  928,
+            96,  608,  352,  864,  224,  736,  480,  992,   16,  528,  272,  784,
+            144,  656,  400,  912,   80,  592,  336,  848,  208,  720,  464,  976,
+            48,  560,  304,  816,  176,  688,  432,  944,  112,  624,  368,  880,
+            240,  752,  496, 1008,    8,  520,  264,  776,  136,  648,  392,  904,
+            72,  584,  328,  840,  200,  712,  456,  968,   40,  552,  296,  808,
+            168,  680,  424,  936,  104,  616,  360,  872,  232,  744,  488, 1000,
+            24,  536,  280,  792,  152,  664,  408,  920,   88,  600,  344,  856,
+            216,  728,  472,  984,   56,  568,  312,  824,  184,  696,  440,  952,
+            120,  632,  376,  888,  248,  760,  504, 1016,    4,  516,  260,  772,
+            132,  644,  388,  900,   68,  580,  324,  836,  196,  708,  452,  964,
+            36,  548,  292,  804,  164,  676,  420,  932,  100,  612,  356,  868,
+            228,  740,  484,  996,   20,  532,  276,  788,  148,  660,  404,  916,
+            84,  596,  340,  852,  212,  724,  468,  980,   52,  564,  308,  820,
+            180,  692,  436,  948,  116,  628,  372,  884,  244,  756,  500, 1012,
+            12,  524,  268,  780,  140,  652,  396,  908,   76,  588,  332,  844,
+            204,  716,  460,  972,   44,  556,  300,  812,  172,  684,  428,  940,
+            108,  620,  364,  876,  236,  748,  492, 1004,   28,  540,  284,  796,
+            156,  668,  412,  924,   92,  604,  348,  860,  220,  732,  476,  988,
+            60,  572,  316,  828,  188,  700,  444,  956,  124,  636,  380,  892,
+            252,  764,  508, 1020,    2,  514,  258,  770,  130,  642,  386,  898,
+            66,  578,  322,  834,  194,  706,  450,  962,   34,  546,  290,  802,
+            162,  674,  418,  930,   98,  610,  354,  866,  226,  738,  482,  994,
+            18,  530,  274,  786,  146,  658,  402,  914,   82,  594,  338,  850,
+            210,  722,  466,  978,   50,  562,  306,  818,  178,  690,  434,  946,
+            114,  626,  370,  882,  242,  754,  498, 1010,   10,  522,  266,  778,
+            138,  650,  394,  906,   74,  586,  330,  842,  202,  714,  458,  970,
+            42,  554,  298,  810,  170,  682,  426,  938,  106,  618,  362,  874,
+            234,  746,  490, 1002,   26,  538,  282,  794,  154,  666,  410,  922,
+            90,  602,  346,  858,  218,  730,  474,  986,   58,  570,  314,  826,
+            186,  698,  442,  954,  122,  634,  378,  890,  250,  762,  506, 1018,
+            6,  518,  262,  774,  134,  646,  390,  902,   70,  582,  326,  838,
+            198,  710,  454,  966,   38,  550,  294,  806,  166,  678,  422,  934,
+            102,  614,  358,  870,  230,  742,  486,  998,   22,  534,  278,  790,
+            150,  662,  406,  918,   86,  598,  342,  854,  214,  726,  470,  982,
+            54,  566,  310,  822,  182,  694,  438,  950,  118,  630,  374,  886,
+            246,  758,  502, 1014,   14,  526,  270,  782,  142,  654,  398,  910,
+            78,  590,  334,  846,  206,  718,  462,  974,   46,  558,  302,  814,
+            174,  686,  430,  942,  110,  622,  366,  878,  238,  750,  494, 1006,
+            30,  542,  286,  798,  158,  670,  414,  926,   94,  606,  350,  862,
+            222,  734,  478,  990,   62,  574,  318,  830,  190,  702,  446,  958,
+            126,  638,  382,  894,  254,  766,  510, 1022,    1,  513,  257,  769,
+            129,  641,  385,  897,   65,  577,  321,  833,  193,  705,  449,  961,
+            33,  545,  289,  801,  161,  673,  417,  929,   97,  609,  353,  865,
+            225,  737,  481,  993,   17,  529,  273,  785,  145,  657,  401,  913,
+            81,  593,  337,  849,  209,  721,  465,  977,   49,  561,  305,  817,
+            177,  689,  433,  945,  113,  625,  369,  881,  241,  753,  497, 1009,
+            9,  521,  265,  777,  137,  649,  393,  905,   73,  585,  329,  841,
+            201,  713,  457,  969,   41,  553,  297,  809,  169,  681,  425,  937,
+            105,  617,  361,  873,  233,  745,  489, 1001,   25,  537,  281,  793,
+            153,  665,  409,  921,   89,  601,  345,  857,  217,  729,  473,  985,
+            57,  569,  313,  825,  185,  697,  441,  953,  121,  633,  377,  889,
+            249,  761,  505, 1017,    5,  517,  261,  773,  133,  645,  389,  901,
+            69,  581,  325,  837,  197,  709,  453,  965,   37,  549,  293,  805,
+            165,  677,  421,  933,  101,  613,  357,  869,  229,  741,  485,  997,
+            21,  533,  277,  789,  149,  661,  405,  917,   85,  597,  341,  853,
+            213,  725,  469,  981,   53,  565,  309,  821,  181,  693,  437,  949,
+            117,  629,  373,  885,  245,  757,  501, 1013,   13,  525,  269,  781,
+            141,  653,  397,  909,   77,  589,  333,  845,  205,  717,  461,  973,
+            45,  557,  301,  813,  173,  685,  429,  941,  109,  621,  365,  877,
+            237,  749,  493, 1005,   29,  541,  285,  797,  157,  669,  413,  925,
+            93,  605,  349,  861,  221,  733,  477,  989,   61,  573,  317,  829,
+            189,  701,  445,  957,  125,  637,  381,  893,  253,  765,  509, 1021,
+            3,  515,  259,  771,  131,  643,  387,  899,   67,  579,  323,  835,
+            195,  707,  451,  963,   35,  547,  291,  803,  163,  675,  419,  931,
+            99,  611,  355,  867,  227,  739,  483,  995,   19,  531,  275,  787,
+            147,  659,  403,  915,   83,  595,  339,  851,  211,  723,  467,  979,
+            51,  563,  307,  819,  179,  691,  435,  947,  115,  627,  371,  883,
+            243,  755,  499, 1011,   11,  523,  267,  779,  139,  651,  395,  907,
+            75,  587,  331,  843,  203,  715,  459,  971,   43,  555,  299,  811,
+            171,  683,  427,  939,  107,  619,  363,  875,  235,  747,  491, 1003,
+            27,  539,  283,  795,  155,  667,  411,  923,   91,  603,  347,  859,
+            219,  731,  475,  987,   59,  571,  315,  827,  187,  699,  443,  955,
+            123,  635,  379,  891,  251,  763,  507, 1019,    7,  519,  263,  775,
+            135,  647,  391,  903,   71,  583,  327,  839,  199,  711,  455,  967,
+            39,  551,  295,  807,  167,  679,  423,  935,  103,  615,  359,  871,
+            231,  743,  487,  999,   23,  535,  279,  791,  151,  663,  407,  919,
+            87,  599,  343,  855,  215,  727,  471,  983,   55,  567,  311,  823,
+            183,  695,  439,  951,  119,  631,  375,  887,  247,  759,  503, 1015,
+            15,  527,  271,  783,  143,  655,  399,  911,   79,  591,  335,  847,
+            207,  719,  463,  975,   47,  559,  303,  815,  175,  687,  431,  943,
+            111,  623,  367,  879,  239,  751,  495, 1007,   31,  543,  287,  799,
+            159,  671,  415,  927,   95,  607,  351,  863,  223,  735,  479,  991,
+            63,  575,  319,  831,  191,  703,  447,  959,  127,  639,  383,  895,
+            255,  767,  511, 1023
+        };
+
+        /*
+        * Compute the roots for NTT and inverse NTT (binary case). Input
+        * parameter g is a primitive 2048-th root of 1 modulo p (i.e. g^1024 =
+        * -1 mod p). This fills gm[] and igm[] with powers of g and 1/g:
+        *   gm[rev(i)] = g^i mod p
+        *   igm[rev(i)] = (1/g)^i mod p
+        * where rev() is the "bit reversal" function over 10 bits. It fills
+        * the arrays only up to N = 2^logn values.
+        *
+        * The values stored in gm[] and igm[] are in Montgomery representation.
+        *
+        * p must be a prime such that p = 1 mod 2048.
+        */
+        void modp_mkgm2(uint[] gmsrc, int gm, uint[] igmsrc, int igm, uint logn,
+            uint g, uint p, uint p0i)
+        {
+            int u, n;
+            uint k;
+            uint ig, x1, x2, R2;
+
+            n = (int)1 << (int)logn;
+
+            /*
+            * We want g such that g^(2N) = 1 mod p, but the provided
+            * generator has order 2048. We must square it a few times.
+            */
+            R2 = modp_R2(p, p0i);
+            g = modp_montymul(g, R2, p, p0i);
+            for (k = logn; k < 10; k ++) {
+                g = modp_montymul(g, g, p, p0i);
+            }
+
+            ig = modp_div(R2, g, p, p0i, modp_R(p));
+            k = 10 - logn;
+            x1 = x2 = modp_R(p);
+            for (u = 0; u < n; u ++) {
+                int v;
+
+                v = REV10[u << (int)k];
+                gmsrc[gm+v] = x1;
+                igmsrc[igm+v] = x2;
+                x1 = modp_montymul(x1, g, p, p0i);
+                x2 = modp_montymul(x2, ig, p, p0i);
+            }
+        }
+
+        /*
+        * Compute the NTT over a polynomial (binary case). Polynomial elements
+        * are a[0], a[stride], a[2 * stride]...
+        */
+        void modp_NTT2_ext(uint[] asrc, int a, int stride, uint[] gmsrc, int gm, uint logn,
+            uint p, uint p0i)
+        {
+            int t, m, n;
+
+            if (logn == 0) {
+                return;
+            }
+            n = (int)1 << (int)logn;
+            t = n;
+            for (m = 1; m < n; m <<= 1) {
+                int ht, u, v1;
+
+                ht = t >> 1;
+                for (u = 0, v1 = 0; u < m; u ++, v1 += t) {
+                    uint s;
+                    int v;
+                    int r1;
+                    int r2;
+
+                    s = gmsrc[gm+m + u];
+                    r1 = a + v1 * stride;
+                    r2 = r1 + ht * stride;
+                    for (v = 0; v < ht; v ++, r1 += stride, r2 += stride) {
+                        uint x, y;
+
+                        x = asrc[r1];
+                        y = modp_montymul(asrc[r2], s, p, p0i);
+                        asrc[r1] = modp_add(x, y, p);
+                        asrc[r2] = modp_sub(x, y, p);
+                    }
+                }
+                t = ht;
+            }
+        }
+
+        /*
+        * Compute the inverse NTT over a polynomial (binary case).
+        */
+        void modp_iNTT2_ext(uint[] asrc, int a, int stride, uint[] igmsrc, int igm, uint logn,
+            uint p, uint p0i)
+        {
+            int t, m, n, k;
+            uint ni;
+            int r;
+
+            if (logn == 0) {
+                return;
+            }
+            n = (int)1 << (int)logn;
+            t = 1;
+            for (m = n; m > 1; m >>= 1) {
+                int hm, dt, u, v1;
+
+                hm = m >> 1;
+                dt = t << 1;
+                for (u = 0, v1 = 0; u < hm; u ++, v1 += dt) {
+                    uint s;
+                    int v;
+                    int r1;
+                    int r2;
+
+                    s = igmsrc[igm+hm + u];
+                    r1 = a + v1 * stride;
+                    r2 = r1 + t * stride;
+                    for (v = 0; v < t; v ++, r1 += stride, r2 += stride) {
+                        uint x, y;
+
+                        x = asrc[r1];
+                        y = asrc[r2];
+                        asrc[r1] = modp_add(x, y, p);
+                        asrc[r2] = modp_montymul(
+                            modp_sub(x, y, p), s, p, p0i);;
+                    }
+                }
+                t = dt;
+            }
+
+            /*
+            * We need 1/n in Montgomery representation, i.e. R/n. Since
+            * 1 <= logn <= 10, R/n is an integer; morever, R/n <= 2^30 < p,
+            * thus a simple shift will do.
+            */
+            ni = (uint)1 << (int)(31 - logn);
+            for (k = 0, r = a; k < n; k ++, r += stride) {
+                asrc[r] = modp_montymul(asrc[r], ni, p, p0i);
+            }
+        }
+
+        /*
+        * Simplified macros for NTT and iNTT (binary case) when the elements
+        * are consecutive in RAM.
+        */
+        void modp_NTT2(uint[] asrc, int a, uint[] gmsrc, int gm, uint logn, uint p, uint p0i) {
+            this.modp_NTT2_ext(asrc, a, 1, gmsrc, gm, logn, p, p0i);
+        }
+        void modp_iNTT2(uint[] asrc, int a, uint[] igmsrc, int igm, uint logn, uint p, uint p0i) {
+            this.modp_iNTT2_ext(asrc, a, 1, igmsrc, igm, logn, p, p0i);
+        }
+
+        /*
+        * Given polynomial f in NTT representation modulo p, compute f' of degree
+        * less than N/2 such that f' = f0^2 - X*f1^2, where f0 and f1 are
+        * polynomials of degree less than N/2 such that f = f0(X^2) + X*f1(X^2).
+        *
+        * The new polynomial is written "in place" over the first N/2 elements
+        * of f.
+        *
+        * If applied logn times successively on a given polynomial, the resulting
+        * degree-0 polynomial is the resultant of f and X^N+1 modulo p.
+        *
+        * This function applies only to the binary case; it is invoked from
+        * solve_NTRU_binary_depth1().
+        */
+        void modp_poly_rec_res(uint[] fsrc, int f, uint logn,
+            uint p, uint p0i, uint R2)
+        {
+            int hn, u;
+
+            hn = (int)1 << (int)(logn - 1);
+            for (u = 0; u < hn; u ++) {
+                uint w0, w1;
+
+                w0 = fsrc[f + (u << 1) + 0];
+                w1 = fsrc[f + (u << 1) + 1];
+                fsrc[f + u] = modp_montymul(modp_montymul(w0, w1, p, p0i), R2, p, p0i);
+            }
+        }
+
+        /* ==================================================================== */
+        /*
+        * Custom bignum implementation.
+        *
+        * This is a very reduced set of functionalities. We need to do the
+        * following operations:
+        *
+        *  - Rebuild the resultant and the polynomial coefficients from their
+        *    values modulo small primes (of length 31 bits each).
+        *
+        *  - Compute an extended GCD between the two computed resultants.
+        *
+        *  - Extract top bits and add scaled values during the successive steps
+        *    of Babai rounding.
+        *
+        * When rebuilding values using CRT, we must also recompute the product
+        * of the small prime factors. We always do it one small factor at a
+        * time, so the "complicated" operations can be done modulo the small
+        * prime with the modp_* functions. CRT coefficients (inverses) are
+        * precomputed.
+        *
+        * All values are positive until the last step: when the polynomial
+        * coefficients have been rebuilt, we normalize them around 0. But then,
+        * only additions and subtractions on the upper few bits are needed
+        * afterwards.
+        *
+        * We keep big integers as arrays of 31-bit words (in uint values);
+        * the top bit of each uint is kept equal to 0. Using 31-bit words
+        * makes it easier to keep track of carries. When negative values are
+        * used, two's complement is used.
+        */
+
+        /*
+        * Subtract integer b from integer a. Both integers are supposed to have
+        * the same size. The carry (0 or 1) is returned. Source arrays a and b
+        * MUST be distinct.
+        *
+        * The operation is performed as described above if ctr = 1. If
+        * ctl = 0, the value a[] is unmodified, but all memory accesses are
+        * still performed, and the carry is computed and returned.
+        */
+        uint zint_sub(uint[] asrc, int a, uint[] bsrc, int b, int len,
+            uint ctl)
+        {
+            int u;
+            uint cc, m;
+
+            cc = 0;
+            m = (uint)(-ctl);
+            for (u = 0; u < len; u ++) {
+                uint aw, w;
+
+                aw = asrc[a + u];
+                w = aw - bsrc[b + u] - cc;
+                cc = w >> 31;
+                aw ^= ((w & 0x7FFFFFFF) ^ aw) & m;
+                asrc[a + u] = aw;
+            }
+            return cc;
+        }
+
+        /*
+        * Mutiply the provided big integer m with a small value x.
+        * This function assumes that x < 2^31. The carry word is returned.
+        */
+        uint zint_mul_small(uint[] msrc, int m, int mlen, uint x)
+        {
+            int u;
+            uint cc;
+
+            cc = 0;
+            for (u = 0; u < mlen; u ++) {
+                ulong z;
+
+                z = (ulong)msrc[m+u] * (ulong)x + cc;
+                msrc[m+u] = (uint)z & 0x7FFFFFFF;
+                cc = (uint)(z >> 31);
+            }
+            return cc;
+        }
+
+        /*
+        * Reduce a big integer d modulo a small integer p.
+        * Rules:
+        *  d is uint
+        *  p is prime
+        *  2^30 < p < 2^31
+        *  p0i = -(1/p) mod 2^31
+        *  R2 = 2^62 mod p
+        */
+        uint zint_mod_small_uint(uint[] dsrc, int d, int dlen,
+            uint p, uint p0i, uint R2)
+        {
+            uint x;
+            int u;
+
+            /*
+            * Algorithm: we inject words one by one, starting with the high
+            * word. Each step is:
+            *  - multiply x by 2^31
+            *  - add new word
+            */
+            x = 0;
+            u = dlen;
+            while (u -- > 0) {
+                uint w;
+
+                x = modp_montymul(x, R2, p, p0i);
+                w = dsrc[d+u] - p;
+                w += (uint)(p & -(w >> 31));
+                x = modp_add(x, w, p);
+            }
+            return x;
+        }
+
+        /*
+        * Similar to zint_mod_small_uint(), except that d may be signed.
+        * Extra parameter is Rx = 2^(31*dlen) mod p.
+        */
+        uint zint_mod_small_signed(uint[] dsrc, int d, int dlen,
+            uint p, uint p0i, uint R2, uint Rx)
+        {
+            uint z;
+
+            if (dlen == 0) {
+                return 0;
+            }
+            z = zint_mod_small_uint(dsrc, d, dlen, p, p0i, R2);
+            z = modp_sub(z, (uint)(Rx & -(dsrc[d + dlen - 1] >> 30)), p);
+            return z;
+        }
+
+        /*
+        * Add y*s to x. x and y initially have length 'len' words; the new x
+        * has length 'len+1' words. 's' must fit on 31 bits. x[] and y[] must
+        * not overlap.
+        */
+        void zint_add_mul_small(uint[] xsrc, int x,
+            uint[] ysrc, int y, int len, uint s)
+        {
+            int u;
+            uint cc;
+
+            cc = 0;
+            for (u = 0; u < len; u ++) {
+                uint xw, yw;
+                ulong z;
+
+                xw = xsrc[x+u];
+                yw = ysrc[y+u];
+                z = (ulong)yw * (ulong)s + (ulong)xw + (ulong)cc;
+                xsrc[x+u] = (uint)z & 0x7FFFFFFF;
+                cc = (uint)(z >> 31);
+            }
+            xsrc[x+len] = cc;
+        }
+
+        /*
+        * Normalize a modular integer around 0: if x > p/2, then x is replaced
+        * with x - p (signed encoding with two's complement); otherwise, x is
+        * untouched. The two integers x and p are encoded over the same length.
+        */
+        void zint_norm_zero(uint[] xsrc, int x, uint[] psrc, int p, int len)
+        {
+            int u;
+            uint r, bb;
+
+            /*
+            * Compare x with p/2. We use the shifted version of p, and p
+            * is odd, so we really compare with (p-1)/2; we want to perform
+            * the subtraction if and only if x > (p-1)/2.
+            */
+            r = 0;
+            bb = 0;
+            u = len;
+            while (u -- > 0) {
+                uint wx, wp, cc;
+
+                /*
+                * Get the two words to compare in wx and wp (both over
+                * 31 bits exactly).
+                */
+                wx = xsrc[x+u];
+                wp = (psrc[p+u] >> 1) | (bb << 30);
+                bb = psrc[p+u] & 1;
+
+                /*
+                * We set cc to -1, 0 or 1, depending on whether wp is
+                * lower than, equal to, or greater than wx.
+                */
+                cc = wp - wx;
+                cc = (uint)(((uint)(-cc) >> 31) | (uint)-(cc >> 31));
+
+                /*
+                * If r != 0 then it is either 1 or -1, and we keep its
+                * value. Otherwise, if r = 0, then we replace it with cc.
+                */
+                r |= cc & ((r & 1) - 1);
+            }
+
+            /*
+            * At this point, r = -1, 0 or 1, depending on whether (p-1)/2
+            * is lower than, equal to, or greater than x. We thus want to
+            * do the subtraction only if r = -1.
+            */
+            zint_sub(xsrc, x, psrc, p, len, r >> 31);
+        }
+
+        /*
+        * Rebuild integers from their RNS representation. There are 'num'
+        * integers, and each consists in 'xlen' words. 'xx' points at that
+        * first word of the first integer; subsequent integers are accessed
+        * by adding 'xstride' repeatedly.
+        *
+        * The words of an integer are the RNS representation of that integer,
+        * using the provided 'primes' are moduli. This function replaces
+        * each integer with its multi-word value (little-endian order).
+        *
+        * If "normalize_signed" is non-zero, then the returned value is
+        * normalized to the -m/2..m/2 interval (where m is the product of all
+        * small prime moduli); two's complement is used for negative values.
+        */
+        void zint_rebuild_CRT(uint[] xxsrc, int xx, int xlen, int xstride,
+            int num, FalconSmallPrime[] primes, int normalize_signed,
+            uint[] tmpsrc, int tmp)
+        {
+            int u;
+            int x;
+
+            tmpsrc[tmp + 0] = primes[0].p;
+            for (u = 1; u < xlen; u ++) {
+                /*
+                * At the entry of each loop iteration:
+                *  - the first u words of each array have been
+                *    reassembled;
+                *  - the first u words of tmp[] contains the
+                * product of the prime moduli processed so far.
+                *
+                * We call 'q' the product of all previous primes.
+                */
+                uint p, p0i, s, R2;
+                int v;
+
+                p = primes[u].p;
+                s = primes[u].s;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+
+                for (v = 0, x = xx; v < num; v ++, x += xstride) {
+                    uint xp, xq, xr;
+                    /*
+                    * xp = the integer x modulo the prime p for this
+                    *      iteration
+                    * xq = (x mod q) mod p
+                    */
+                    xp = xxsrc[x + u];
+                    xq = zint_mod_small_uint(xxsrc, x, u, p, p0i, R2);
+
+                    /*
+                    * New value is (x mod q) + q * (s * (xp - xq) mod p)
+                    */
+                    xr = modp_montymul(s, modp_sub(xp, xq, p), p, p0i);
+                    zint_add_mul_small(xxsrc, x, tmpsrc, tmp, u, xr);
+                }
+
+                /*
+                * Update product of primes in tmp[].
+                */
+                tmpsrc[tmp + u] = zint_mul_small(tmpsrc, tmp, u, p);
+            }
+
+            /*
+            * Normalize the reconstructed values around 0.
+            */
+            if (normalize_signed != 0) {
+                for (u = 0, x = xx; u < num; u ++, x += xstride) {
+                    zint_norm_zero(xxsrc, x, tmpsrc, tmp, xlen);
+                }
+            }
+        }
+
+        /*
+        * Negate a big integer conditionally: value a is replaced with -a if
+        * and only if ctl = 1. Control value ctl must be 0 or 1.
+        */
+        void zint_negate(uint[] asrc, int a, int len, uint ctl)
+        {
+            int u;
+            uint cc, m;
+
+            /*
+            * If ctl = 1 then we flip the bits of a by XORing with
+            * 0x7FFFFFFF, and we add 1 to the value. If ctl = 0 then we XOR
+            * with 0 and add 0, which leaves the value unchanged.
+            */
+            cc = ctl;
+            m = ((uint)-ctl >> 1);
+            for (u = 0; u < len; u ++) {
+                uint aw;
+
+                aw = asrc[a+u];
+                aw = (aw ^ m) + cc;
+                asrc[a+u] = aw & 0x7FFFFFFF;
+                cc = aw >> 31;
+            }
+        }
+
+        /*
+        * Replace a with (a*xa+b*xb)/(2^31) and b with (a*ya+b*yb)/(2^31).
+        * The low bits are dropped (the caller should compute the coefficients
+        * such that these dropped bits are all zeros). If either or both
+        * yields a negative value, then the value is negated.
+        *
+        * Returned value is:
+        *  0  both values were positive
+        *  1  new a had to be negated
+        *  2  new b had to be negated
+        *  3  both new a and new b had to be negated
+        *
+        * Coefficients xa, xb, ya and yb may use the full signed 32-bit range.
+        */
+        uint zint_co_reduce(uint[] asrc, int a, uint[] bsrc, int b, int len,
+            long xa, long xb, long ya, long yb)
+        {
+            int u;
+            long cca, ccb;
+            uint nega, negb;
+
+            cca = 0;
+            ccb = 0;
+            for (u = 0; u < len; u ++) {
+                uint wa, wb;
+                ulong za, zb;
+
+                wa = asrc[a + u];
+                wb = bsrc[b + u];
+                za = (ulong)((long)wa * xa + (long)wb * xb + cca);
+                zb = (ulong)((long)wa * ya + (long)wb * yb + ccb);
+                if (u > 0) {
+                    asrc[a + u - 1] = (uint)za & 0x7FFFFFFF;
+                    bsrc[b + u - 1] = (uint)zb & 0x7FFFFFFF;
+                }
+                //cca = *(long *)&za >> 31;
+                cca = (long)za >> 31;
+                ccb = (long)zb >> 31;
+                //ccb = *(long *)&zb >> 31;
+            }
+            asrc[a + len - 1] = (uint)cca;
+            bsrc[b + len - 1] = (uint)ccb;
+
+            nega = (uint)((ulong)cca >> 63);
+            negb = (uint)((ulong)ccb >> 63);
+            zint_negate(asrc, a, len, nega);
+            zint_negate(bsrc, b, len, negb);
+            return nega | (negb << 1);
+        }
+
+        /*
+        * Finish modular reduction. Rules on input parameters:
+        *
+        *   if neg = 1, then -m <= a < 0
+        *   if neg = 0, then 0 <= a < 2*m
+        *
+        * If neg = 0, then the top word of a[] is allowed to use 32 bits.
+        *
+        * Modulus m must be odd.
+        */
+        void zint_finish_mod(uint[] asrc, int a, int len, uint[] msrc, int m, uint neg)
+        {
+            int u;
+            uint cc, xm, ym;
+
+            /*
+            * First pass: compare a (assumed nonnegative) with m. Note that
+            * if the top word uses 32 bits, subtracting m must yield a
+            * value less than 2^31 since a < 2*m.
+            */
+            cc = 0;
+            for (u = 0; u < len; u ++) {
+                cc = (asrc[a+u] - msrc[m+u] - cc) >> 31;
+            }
+
+            /*
+            * If neg = 1 then we must add m (regardless of cc)
+            * If neg = 0 and cc = 0 then we must subtract m
+            * If neg = 0 and cc = 1 then we must do nothing
+            *
+            * In the loop below, we conditionally subtract either m or -m
+            * from a. Word xm is a word of m (if neg = 0) or -m (if neg = 1);
+            * but if neg = 0 and cc = 1, then ym = 0 and it forces mw to 0.
+            */
+            xm = ((uint)-neg >> 1);
+            ym = (uint)(-(neg | (1 - cc)));
+            cc = neg;
+            for (u = 0; u < len; u ++) {
+                uint aw, mw;
+
+                aw = asrc[a+u];
+                mw = (msrc[m+u] ^ xm) & ym;
+                aw = aw - mw - cc;
+                asrc[a+u] = aw & 0x7FFFFFFF;
+                cc = aw >> 31;
+            }
+        }
+
+        /*
+        * Replace a with (a*xa+b*xb)/(2^31) mod m, and b with
+        * (a*ya+b*yb)/(2^31) mod m. Modulus m must be odd; m0i = -1/m[0] mod 2^31.
+        */
+        void zint_co_reduce_mod(uint[] asrc, int a, uint[] bsrc, int b, uint[] msrc, int m, int len,
+            uint m0i, long xa, long xb, long ya, long yb)
+        {
+            int u;
+            long cca, ccb;
+            uint fa, fb;
+
+            /*
+            * These are actually four combined Montgomery multiplications.
+            */
+            cca = 0;
+            ccb = 0;
+            fa = ((asrc[a + 0] * (uint)xa + bsrc[b + 0] * (uint)xb) * m0i) & 0x7FFFFFFF;
+            fb = ((asrc[a + 0] * (uint)ya + bsrc[b + 0] * (uint)yb) * m0i) & 0x7FFFFFFF;
+            for (u = 0; u < len; u ++) {
+                uint wa, wb;
+                ulong za, zb;
+
+                wa = asrc[a + u];
+                wb = bsrc[b + u];
+                //za = wa * (ulong)xa + wb * (ulong)xb
+                //    + msrc[m + u] * (ulong)fa + (ulong)cca;
+                //zb = wa * (ulong)ya + wb * (ulong)yb
+                //    + msrc[m + u] * (ulong)fb + (ulong)ccb;
+                za = (ulong)((long)wa * xa + (long)wb * xb
+                        + (long)msrc[m + u] * fa + cca);
+                zb = (ulong)((long)wa * ya + (long)wb * yb
+                        + (long)msrc[m + u] * fb + ccb);
+                if (u > 0) {
+                    asrc[a + u - 1] = (uint)za & 0x7FFFFFFF;
+                    bsrc[b + u - 1] = (uint)zb & 0x7FFFFFFF;
+                }
+                //cca = *(long *)&za >> 31;
+                //ccb = *(long *)&zb >> 31;
+                cca = (long)za >> 31;
+                ccb = (long)zb >> 31;
+            }
+            asrc[a + len - 1] = (uint)cca;
+            bsrc[b + len - 1] = (uint)ccb;
+
+            /*
+            * At this point:
+            *   -m <= a < 2*m
+            *   -m <= b < 2*m
+            * (this is a case of Montgomery reduction)
+            * The top words of 'a' and 'b' may have a 32-th bit set.
+            * We want to add or subtract the modulus, as required.
+            */
+            zint_finish_mod(asrc, a, len, msrc, m, (uint)((ulong)cca >> 63));
+            zint_finish_mod(bsrc, b, len, msrc, m, (uint)((ulong)ccb >> 63));
+        }
+
+        /*
+        * Compute a GCD between two positive big integers x and y. The two
+        * integers must be odd. Returned value is 1 if the GCD is 1, 0
+        * otherwise. When 1 is returned, arrays u and v are filled with values
+        * such that:
+        *   0 <= u <= y
+        *   0 <= v <= x
+        *   x*u - y*v = 1
+        * x[] and y[] are unmodified. Both input values must have the same
+        * encoded length. Temporary array must be large enough to accommodate 4
+        * extra values of that length. Arrays u, v and tmp may not overlap with
+        * each other, or with either x or y.
+        */
+        int zint_bezout(uint[] usrc, int u, uint[] vsrc, int v,
+            uint[] xsrc, int x, uint[] ysrc, int y,
+            int len, uint[] tmpsrc, int tmp)
+        {
+            /*
+            * Algorithm is an extended binary GCD. We maintain 6 values
+            * a, b, u0, u1, v0 and v1 with the following invariants:
+            *
+            *  a = x*u0 - y*v0
+            *  b = x*u1 - y*v1
+            *  0 <= a <= x
+            *  0 <= b <= y
+            *  0 <= u0 < y
+            *  0 <= v0 < x
+            *  0 <= u1 <= y
+            *  0 <= v1 < x
+            *
+            * Initial values are:
+            *
+            *  a = x   u0 = 1   v0 = 0
+            *  b = y   u1 = y   v1 = x-1
+            *
+            * Each iteration reduces either a or b, and maintains the
+            * invariants. Algorithm stops when a = b, at which point their
+            * common value is GCD(a,b) and (u0,v0) (or (u1,v1)) contains
+            * the values (u,v) we want to return.
+            *
+            * The formal definition of the algorithm is a sequence of steps:
+            *
+            *  - If a is even, then:
+            *        a <- a/2
+            *        u0 <- u0/2 mod y
+            *        v0 <- v0/2 mod x
+            *
+            *  - Otherwise, if b is even, then:
+            *        b <- b/2
+            *        u1 <- u1/2 mod y
+            *        v1 <- v1/2 mod x
+            *
+            *  - Otherwise, if a > b, then:
+            *        a <- (a-b)/2
+            *        u0 <- (u0-u1)/2 mod y
+            *        v0 <- (v0-v1)/2 mod x
+            *
+            *  - Otherwise:
+            *        b <- (b-a)/2
+            *        u1 <- (u1-u0)/2 mod y
+            *        v1 <- (v1-v0)/2 mod y
+            *
+            * We can show that the operations above preserve the invariants:
+            *
+            *  - If a is even, then u0 and v0 are either both even or both
+            *    odd (since a = x*u0 - y*v0, and x and y are both odd).
+            *    If u0 and v0 are both even, then (u0,v0) <- (u0/2,v0/2).
+            *    Otherwise, (u0,v0) <- ((u0+y)/2,(v0+x)/2). Either way,
+            *    the a = x*u0 - y*v0 invariant is preserved.
+            *
+            *  - The same holds for the case where b is even.
+            *
+            *  - If a and b are odd, and a > b, then:
+            *
+            *      a-b = x*(u0-u1) - y*(v0-v1)
+            *
+            *    In that situation, if u0 < u1, then x*(u0-u1) < 0, but
+            *    a-b > 0; therefore, it must be that v0 < v1, and the
+            *    first part of the update is: (u0,v0) <- (u0-u1+y,v0-v1+x),
+            *    which preserves the invariants. Otherwise, if u0 > u1,
+            *    then u0-u1 >= 1, thus x*(u0-u1) >= x. But a <= x and
+            *    b >= 0, hence a-b <= x. It follows that, in that case,
+            *    v0-v1 >= 0. The first part of the update is then:
+            *    (u0,v0) <- (u0-u1,v0-v1), which again preserves the
+            *    invariants.
+            *
+            *    Either way, once the subtraction is done, the new value of
+            *    a, which is the difference of two odd values, is even,
+            *    and the remaining of this step is a subcase of the
+            *    first algorithm case (i.e. when a is even).
+            *
+            *  - If a and b are odd, and b > a, then the a similar
+            *    argument holds.
+            *
+            * The values a and b start at x and y, respectively. Since x
+            * and y are odd, their GCD is odd, and it is easily seen that
+            * all steps conserve the GCD (GCD(a-b,b) = GCD(a, b);
+            * GCD(a/2,b) = GCD(a,b) if GCD(a,b) is odd). Moreover, either a
+            * or b is reduced by at least one bit at each iteration, so
+            * the algorithm necessarily converges on the case a = b, at
+            * which point the common value is the GCD.
+            *
+            * In the algorithm expressed above, when a = b, the fourth case
+            * applies, and sets b = 0. Since a contains the GCD of x and y,
+            * which are both odd, a must be odd, and subsequent iterations
+            * (if any) will simply divide b by 2 repeatedly, which has no
+            * consequence. Thus, the algorithm can run for more iterations
+            * than necessary; the final GCD will be in a, and the (u,v)
+            * coefficients will be (u0,v0).
+            *
+            *
+            * The presentation above is bit-by-bit. It can be sped up by
+            * noticing that all decisions are taken based on the low bits
+            * and high bits of a and b. We can extract the two top words
+            * and low word of each of a and b, and compute reduction
+            * parameters pa, pb, qa and qb such that the new values for
+            * a and b are:
+            *    a' = (a*pa + b*pb) / (2^31)
+            *    b' = (a*qa + b*qb) / (2^31)
+            * the two divisions being exact. The coefficients are obtained
+            * just from the extracted words, and may be slightly off, requiring
+            * an optional correction: if a' < 0, then we replace pa with -pa
+            * and pb with -pb. Each such step will reduce the total length
+            * (sum of lengths of a and b) by at least 30 bits at each
+            * iteration.
+            */
+            int u0, u1, v0, v1, a, b;
+            uint x0i, y0i;
+            uint num, rc;
+            int j;
+
+            if (len == 0) {
+                return 0;
+            }
+
+            /*
+            * u0 and v0 are the u and v result buffers; the four other
+            * values (u1, v1, a and b) are taken from tmp[].
+            */
+            u0 = u; // usrc
+            v0 = v; // vsrc
+            u1 = tmp; // tmpsrc
+            v1 = u1 + len; // tmpsrc
+            a = v1 + len; // tmpsrc
+            b = a + len; // tmpsrc
+
+            /*
+            * We'll need the Montgomery reduction coefficients.
+            */
+            x0i = modp_ninv31(xsrc[x + 0]);
+            y0i = modp_ninv31(ysrc[y + 0]);
+
+            /*
+            * Initialize a, b, u0, u1, v0 and v1.
+            *  a = x   u0 = 1   v0 = 0
+            *  b = y   u1 = y   v1 = x-1
+            * Note that x is odd, so computing x-1 is easy.
+            */
+            // memcpy(a, x, len * sizeof *x);
+            Array.Copy(xsrc, x, tmpsrc, a, len);
+            // memcpy(b, y, len * sizeof *y);
+            Array.Copy(ysrc, y, tmpsrc, b, len);
+            usrc[u0+0] = 1;
+            // memset(u0 + 1, 0, (len - 1) * sizeof *u0);
+            // memset(v0, 0, len * sizeof *v0);
+            for (int i = 1; i < len; i++) {
+                usrc[u0+i] = 0;
+                vsrc[v0+i] = 0;
+            }
+            vsrc[v0+0] = 0;
+            // memcpy(u1, y, len * sizeof *u1);
+            Array.Copy(ysrc, y, tmpsrc, u1, len);
+            // memcpy(v1, x, len * sizeof *v1);
+            Array.Copy(xsrc, x, tmpsrc, v1, len);
+            tmpsrc[v1+0] --;
+
+            /*
+            * Each input operand may be as large as 31*len bits, and we
+            * reduce the total length by at least 30 bits at each iteration.
+            */
+            for (num = 62 * (uint)len + 30; num >= 30; num -= 30) {
+                uint c0, c1;
+                uint a0, a1, b0, b1;
+                ulong a_hi, b_hi;
+                uint a_lo, b_lo;
+                long pa, pb, qa, qb;
+                int i;
+                uint r;
+
+                /*
+                * Extract the top words of a and b. If j is the highest
+                * index >= 1 such that a[j] != 0 or b[j] != 0, then we
+                * want (a[j] << 31) + a[j-1] and (b[j] << 31) + b[j-1].
+                * If a and b are down to one word each, then we use
+                * a[0] and b[0].
+                */
+                //c0 = (uint)-1;
+                //c1 = (uint)-1;
+                c0 = uint.MaxValue;
+                c1 = uint.MaxValue;
+                a0 = 0;
+                a1 = 0;
+                b0 = 0;
+                b1 = 0;
+                j = len;
+                while (j -- > 0) {
+                    uint aw, bw;
+
+                    aw = tmpsrc[a+j];
+                    bw = tmpsrc[b+j];
+                    a0 ^= (a0 ^ aw) & c0;
+                    a1 ^= (a1 ^ aw) & c1;
+                    b0 ^= (b0 ^ bw) & c0;
+                    b1 ^= (b1 ^ bw) & c1;
+                    c1 = c0;
+                    c0 &= (((aw | bw) + 0x7FFFFFFF) >> 31) - (uint)1;
+                }
+
+                /*
+                * If c1 = 0, then we grabbed two words for a and b.
+                * If c1 != 0 but c0 = 0, then we grabbed one word. It
+                * is not possible that c1 != 0 and c0 != 0, because that
+                * would mean that both integers are zero.
+                */
+                a1 |= a0 & c1;
+                a0 &= ~c1;
+                b1 |= b0 & c1;
+                b0 &= ~c1;
+                a_hi = ((ulong)a0 << 31) + a1;
+                b_hi = ((ulong)b0 << 31) + b1;
+                a_lo = tmpsrc[a+0];
+                b_lo = tmpsrc[b+0];
+
+                /*
+                * Compute reduction factors:
+                *
+                *   a' = a*pa + b*pb
+                *   b' = a*qa + b*qb
+                *
+                * such that a' and b' are both multiple of 2^31, but are
+                * only marginally larger than a and b.
+                */
+                pa = 1;
+                pb = 0;
+                qa = 0;
+                qb = 1;
+                for (i = 0; i < 31; i ++) {
+                    /*
+                    * At each iteration:
+                    *
+                    *   a <- (a-b)/2 if: a is odd, b is odd, a_hi > b_hi
+                    *   b <- (b-a)/2 if: a is odd, b is odd, a_hi <= b_hi
+                    *   a <- a/2 if: a is even
+                    *   b <- b/2 if: a is odd, b is even
+                    *
+                    * We multiply a_lo and b_lo by 2 at each
+                    * iteration, thus a division by 2 really is a
+                    * non-multiplication by 2.
+                    */
+                    uint rt, oa, ob, cAB, cBA, cA;
+                    ulong rz;
+
+                    /*
+                    * rt = 1 if a_hi > b_hi, 0 otherwise.
+                    */
+                    rz = b_hi - a_hi;
+                    rt = (uint)((rz ^ ((a_hi ^ b_hi)
+                        & (a_hi ^ rz))) >> 63);
+
+                    /*
+                    * cAB = 1 if b must be subtracted from a
+                    * cBA = 1 if a must be subtracted from b
+                    * cA = 1 if a must be divided by 2
+                    *
+                    * Rules:
+                    *
+                    *   cAB and cBA cannot both be 1.
+                    *   If a is not divided by 2, b is.
+                    */
+                    oa = (a_lo >> i) & 1;
+                    ob = (b_lo >> i) & 1;
+                    cAB = oa & ob & rt;
+                    cBA = (uint)(oa & ob & ~(int)rt);
+                    cA = cAB | (oa ^ 1);
+
+                    /*
+                    * Conditional subtractions.
+                    */
+                    a_lo -= (uint)(b_lo & -cAB);
+                    a_hi -= b_hi & (ulong)-(long)cAB;
+                    pa -= (qa & -(long)cAB);
+                    pb -= (qb & -(long)cAB);
+                    b_lo -= (uint)(a_lo & -cBA);
+                    b_hi -= a_hi & (ulong)-(long)cBA;
+                    qa -= pa & -(long)cBA;
+                    qb -= pb & -(long)cBA;
+
+                    /*
+                    * Shifting.
+                    */
+                    a_lo += a_lo & (cA - 1);
+                    pa += pa & ((long)cA - 1);
+                    pb += pb & ((long)cA - 1);
+                    a_hi ^= (a_hi ^ (a_hi >> 1)) & (ulong)-(long)cA;
+                    b_lo += (uint)(b_lo & -cA);
+                    qa += qa & -(long)cA;
+                    qb += qb & -(long)cA;
+                    b_hi ^= (b_hi ^ (b_hi >> 1)) & ((ulong)cA - 1);
+                }
+
+                /*
+                * Apply the computed parameters to our values. We
+                * may have to correct pa and pb depending on the
+                * returned value of zint_co_reduce() (when a and/or b
+                * had to be negated).
+                */
+                r = zint_co_reduce(tmpsrc, a, tmpsrc, b, len, pa, pb, qa, qb);
+                pa -= (pa + pa) & -(long)(r & 1);
+                pb -= (pb + pb) & -(long)(r & 1);
+                qa -= (qa + qa) & -(long)(r >> 1);
+                qb -= (qb + qb) & -(long)(r >> 1);
+                zint_co_reduce_mod(usrc, u0, tmpsrc, u1, ysrc, y, len, y0i, pa, pb, qa, qb);
+                zint_co_reduce_mod(vsrc, v0, tmpsrc, v1, xsrc, x, len, x0i, pa, pb, qa, qb);
+            }
+
+            /*
+            * At that point, array a[] should contain the GCD, and the
+            * results (u,v) should already be set. We check that the GCD
+            * is indeed 1. We also check that the two operands x and y
+            * are odd.
+            */
+            rc = tmpsrc[a+0] ^ 1;
+            for (j = 1; j < len; j ++) {
+                rc |= tmpsrc[a+j];
+            }
+            return (int)((1 - ((rc | -rc) >> 31)) & xsrc[x+0] & ysrc[y+0]);
+        }
+
+        /*
+        * Add k*y*2^sc to x. The result is assumed to fit in the array of
+        * size xlen (truncation is applied if necessary).
+        * Scale factor 'sc' is provided as sch and scl, such that:
+        *   sch = sc / 31
+        *   scl = sc % 31
+        * xlen MUST NOT be lower than ylen.
+        *
+        * x[] and y[] are both signed integers, using two's complement for
+        * negative values.
+        */
+        void zint_add_scaled_mul_small(uint[] xsrc, int x, int xlen,
+            uint[] ysrc, int y, int ylen, int k,
+            uint sch, uint scl)
+        {
+            int u;
+            uint ysign, tw;
+            int cc;
+
+            if (ylen == 0) {
+                return;
+            }
+
+            ysign = ((uint)-(ysrc[y + ylen - 1] >> 30) >> 1);
+            tw = 0;
+            cc = 0;
+            for (u = (int)sch; u < xlen; u ++) {
+                int v;
+                uint wy, wys, ccu;
+                ulong z;
+
+                /*
+                * Get the next word of y (scaled).
+                */
+                v = u - (int)sch;
+                wy = v < ylen ? ysrc[y + v] : ysign;
+                wys = ((wy << (int)scl) & 0x7FFFFFFF) | tw;
+                tw = wy >> (31 - (int)scl);
+
+                /*
+                * The expression below does not overflow.
+                */
+                z = (ulong)((long)wys * (long)k + (long)xsrc[x+u] + cc);
+                xsrc[x+u] = (uint)z & 0x7FFFFFFF;
+
+                /*
+                * Right-shifting the signed value z would yield
+                * implementation-defined results (arithmetic shift is
+                * not guaranteed). However, we can cast to uint,
+                * and get the next carry as an uint word. We can
+                * then convert it back to signed by using the guaranteed
+                * fact that 'int' uses two's complement with no
+                * trap representation or padding bit, and with a layout
+                * compatible with that of 'uint'.
+                */
+                ccu = (uint)(z >> 31);
+                //cc = *(int *)&ccu;
+                cc = (int)ccu;
+            }
+        }
+
+        /*
+        * Subtract y*2^sc from x. The result is assumed to fit in the array of
+        * size xlen (truncation is applied if necessary).
+        * Scale factor 'sc' is provided as sch and scl, such that:
+        *   sch = sc / 31
+        *   scl = sc % 31
+        * xlen MUST NOT be lower than ylen.
+        *
+        * x[] and y[] are both signed integers, using two's complement for
+        * negative values.
+        */
+        void zint_sub_scaled(uint[] xsrc, int x, int xlen,
+            uint[] ysrc, int y, int ylen, uint sch, uint scl)
+        {
+            int u;
+            uint ysign, tw;
+            uint cc;
+
+            if (ylen == 0) {
+                return;
+            }
+
+            ysign = (uint)(-(ysrc[y + ylen - 1] >> 30) >> 1);
+            tw = 0;
+            cc = 0;
+            for (u = (int)sch; u < xlen; u ++) {
+                int v;
+                uint w, wy, wys;
+
+                /*
+                * Get the next word of y (scaled).
+                */
+                v = u - (int)sch;
+                wy = v < ylen ? ysrc[y + v] : ysign;
+                wys = ((wy << (int)scl) & 0x7FFFFFFF) | tw;
+                tw = wy >> (int)(31 - scl);
+
+                w = xsrc[x+u] - wys - cc;
+                xsrc[x+u] = w & 0x7FFFFFFF;
+                cc = w >> 31;
+            }
+        }
+
+        /*
+        * Convert a one-word signed big integer into a signed value.
+        */
+        int zint_one_to_plain(uint[] xsrc, int x)
+        {
+            uint w;
+
+            w = xsrc[x+0];
+            w |= (w & 0x40000000) << 1;
+            //return *(int *)&w;
+            return (int)w;
+        }
+
+        /* ==================================================================== */
+
+        /*
+        * Convert a polynomial to floating-point values.
+        *
+        * Each coefficient has length flen words, and starts fstride words after
+        * the previous.
+        *
+        * IEEE-754 binary64 values can represent values in a finite range,
+        * roughly 2^(-1023) to 2^(+1023); thus, if coefficients are too large,
+        * they should be "trimmed" by pointing not to the lowest word of each,
+        * but upper.
+        */
+        void poly_big_to_fp(FalconFPR[] dsrc, int d, uint[] fsrc, int f, int flen, int fstride,
+            uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            if (flen == 0) {
+                for (u = 0; u < n; u ++) {
+                    dsrc[d + u] = this.fpre.fpr_zero;
+                }
+                return;
+            }
+            for (u = 0; u < n; u ++, f += fstride) {
+                int v;
+                uint neg, cc, xm;
+                FalconFPR x, fsc;
+
+                /*
+                * Get sign of the integer; if it is negative, then we
+                * will load its absolute value instead, and negate the
+                * result.
+                */
+                neg = (uint)(-(fsrc[f + flen - 1] >> 30));
+                xm = neg >> 1;
+                cc = neg & 1;
+                x = this.fpre.fpr_zero;
+                fsc = this.fpre.fpr_one;
+                for (v = 0; v < flen; v ++, fsc = this.fpre.fpr_mul(fsc, this.fpre.fpr_ptwo31)) {
+                    uint w;
+
+                    w = (fsrc[f + v] ^ xm) + cc;
+                    cc = w >> 31;
+                    w &= 0x7FFFFFFF;
+                    w -= (w << 1) & neg;
+                    //x = this.fpre.fpr_add(x, this.fpre.fpr_mul(this.fpre.fpr_of(*(int*)&w), fsc));
+                    x = this.fpre.fpr_add(x, this.fpre.fpr_mul(this.fpre.fpr_of((int)w), fsc));
+                }
+                dsrc[d + u] = x;
+            }
+        }
+
+        /*
+        * Convert a polynomial to small integers. Source values are supposed
+        * to be one-word integers, signed over 31 bits. Returned value is 0
+        * if any of the coefficients exceeds the provided limit (in absolute
+        * value), or 1 on success.
+        *
+        * This is not constant-time; this is not a problem here, because on
+        * any failure, the NTRU-solving process will be deemed to have failed
+        * and the (f,g) polynomials will be discarded.
+        */
+        int poly_big_to_small(sbyte[] dsrc, int d, uint[] ssrc, int s, int lim, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                int z;
+
+                z = zint_one_to_plain(ssrc, s + u);
+                if (z < -lim || z > lim) {
+                    return 0;
+                }
+                dsrc[d+u] = (sbyte)z;
+            }
+            return 1;
+        }
+
+        /*
+        * Subtract k*f from F, where F, f and k are polynomials modulo X^N+1.
+        * Coefficients of polynomial k are small integers (signed values in the
+        * -2^31..2^31 range) scaled by 2^sc. Value sc is provided as sch = sc / 31
+        * and scl = sc % 31.
+        *
+        * This function implements the basic quadratic multiplication algorithm,
+        * which is efficient in space (no extra buffer needed) but slow at
+        * high degree.
+        */
+        void poly_sub_scaled(uint[] Fsrc, int F, int Flen, int Fstride,
+            uint[] fsrc, int f, int flen, int fstride,
+            int[] ksrc, int k, uint sch, uint scl, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                int kf;
+                int v;
+                int x;
+                int y;
+
+                kf = -ksrc[k+u];
+                x = F + u * Fstride;
+                y = f;
+                for (v = 0; v < n; v ++) {
+                    zint_add_scaled_mul_small(
+                        Fsrc, x, Flen, fsrc, y, flen, kf, sch, scl);
+                    if (u + v == n - 1) {
+                        x = F;
+                        kf = -kf;
+                    } else {
+                        x += Fstride;
+                    }
+                    y += fstride;
+                }
+            }
+        }
+
+        /*
+        * Subtract k*f from F. Coefficients of polynomial k are small integers
+        * (signed values in the -2^31..2^31 range) scaled by 2^sc. This function
+        * assumes that the degree is large, and integers relatively small.
+        * The value sc is provided as sch = sc / 31 and scl = sc % 31.
+        */
+        void poly_sub_scaled_ntt(uint[] Fsrc, int F, int Flen, int Fstride,
+            uint[] fsrc, int f, int flen, int fstride,
+            int[] ksrc, int k, uint sch, uint scl, uint logn,
+            uint[] tmpsrc, int tmp)
+        {
+            int gm, igm, fk, t1, x;
+            int y;
+            int n, u, tlen;
+            FalconSmallPrime[] primes;
+
+            n = (int)1 << (int)logn;
+            tlen = flen + 1;
+            gm = tmp;
+            igm = gm + n;
+            fk = igm + n;
+            t1 = fk + n * tlen;
+
+            primes = this.PRIMES;
+
+            /*
+            * Compute k*f in fk[], in RNS notation.
+            */
+            for (u = 0; u < tlen; u ++) {
+                uint p, p0i, R2, Rx;
+                int v;
+
+                p = primes[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+                Rx = modp_Rx((uint)flen, p, p0i, R2);
+                modp_mkgm2(tmpsrc, gm, tmpsrc, igm, logn, primes[u].g, p, p0i);
+
+                for (v = 0; v < n; v ++) {
+                    tmpsrc[t1+v] = modp_set(ksrc[k+v], p);
+                }
+                modp_NTT2(tmpsrc, t1, tmpsrc, gm, logn, p, p0i);
+                for (v = 0, y = f, x = fk + u;
+                    v < n; v ++, y += fstride, x += tlen)
+                {
+                    tmpsrc[x] = zint_mod_small_signed(tmpsrc, y, flen, p, p0i, R2, Rx);
+                }
+                modp_NTT2_ext(tmpsrc, fk + u, tlen, tmpsrc, gm, logn, p, p0i);
+                for (v = 0, x = fk + u; v < n; v ++, x += tlen) {
+                    tmpsrc[x] = modp_montymul(
+                        modp_montymul(tmpsrc[t1+v], tmpsrc[x], p, p0i), R2, p, p0i);
+                }
+                modp_iNTT2_ext(tmpsrc, fk + u, tlen, tmpsrc, igm, logn, p, p0i);
+            }
+
+            /*
+            * Rebuild k*f.
+            */
+            zint_rebuild_CRT(tmpsrc, fk, tlen, tlen, n, primes, 1, tmpsrc, t1);
+
+            /*
+            * Subtract k*f, scaled, from F.
+            */
+            for (u = 0, x = F, y = fk; u < n; u ++, x += Fstride, y += tlen) {
+                zint_sub_scaled(tmpsrc, x, Flen, tmpsrc, y, tlen, sch, scl);
+            }
+        }
+
+        /* ==================================================================== */
+
+        /*
+        * Get a random 8-byte integer from a SHAKE-based RNG. This function
+        * ensures consistent interpretation of the SHAKE output so that
+        * the same values will be obtained over different platforms, in case
+        * a known seed is used.
+        */
+        ulong get_rng_u64(SHAKE256 rng)
+        {
+            /*
+            * We enforce little-endian representation.
+            */
+
+            byte[] tmp = new byte[8];
+
+            rng.i_shake256_extract(tmp, 0, 8);
+            return (ulong)tmp[0]
+                | ((ulong)tmp[1] << 8)
+                | ((ulong)tmp[2] << 16)
+                | ((ulong)tmp[3] << 24)
+                | ((ulong)tmp[4] << 32)
+                | ((ulong)tmp[5] << 40)
+                | ((ulong)tmp[6] << 48)
+                | ((ulong)tmp[7] << 56);
+        }
+
+
+        /*
+        * Table below incarnates a discrete Gaussian distribution:
+        *    D(x) = exp(-(x^2)/(2*sigma^2))
+        * where sigma = 1.17*sqrt(q/(2*N)), q = 12289, and N = 1024.
+        * Element 0 of the table is P(x = 0).
+        * For k > 0, element k is P(x >= k+1 | x > 0).
+        * Probabilities are scaled up by 2^63.
+        */
+        ulong[] gauss_1024_12289 = {
+            1283868770400643928u,  6416574995475331444u,  4078260278032692663u,
+            2353523259288686585u,  1227179971273316331u,   575931623374121527u,
+            242543240509105209u,    91437049221049666u,    30799446349977173u,
+                9255276791179340u,     2478152334826140u,      590642893610164u,
+                125206034929641u,       23590435911403u,        3948334035941u,
+                    586753615614u,          77391054539u,           9056793210u,
+                    940121950u,             86539696u,              7062824u,
+                        510971u,                32764u,                 1862u,
+                            94u,                    4u,                    0u
+        };
+
+        /*
+        * Generate a random value with a Gaussian distribution centered on 0.
+        * The RNG must be ready for extraction (already flipped).
+        *
+        * Distribution has standard deviation 1.17*sqrt(q/(2*N)). The
+        * precomputed table is for N = 1024. Since the sum of two independent
+        * values of standard deviation sigma has standard deviation
+        * sigma*sqrt(2), then we can just generate more values and add them
+        * together for lower dimensions.
+        */
+        int mkgauss(SHAKE256 rng, uint logn)
+        {
+            uint u, g;
+            int val;
+
+            g = 1U << (int)(10 - logn);
+            val = 0;
+            for (u = 0; u < g; u ++) {
+                /*
+                * Each iteration generates one value with the
+                * Gaussian distribution for N = 1024.
+                *
+                * We use two random 64-bit values. First value
+                * decides on whether the generated value is 0, and,
+                * if not, the sign of the value. Second random 64-bit
+                * word is used to generate the non-zero value.
+                *
+                * For constant-time code we have to read the complete
+                * table. This has negligible cost, compared with the
+                * remainder of the keygen process (solving the NTRU
+                * equation).
+                */
+                ulong r;
+                uint f, v, k, neg;
+
+                /*
+                * First value:
+                *  - flag 'neg' is randomly selected to be 0 or 1.
+                *  - flag 'f' is set to 1 if the generated value is zero,
+                *    or set to 0 otherwise.
+                */
+                r = get_rng_u64(rng);
+                neg = (uint)(r >> 63);
+                r &= ~((ulong)1 << 63);
+                f = (uint)((r - gauss_1024_12289[0]) >> 63);
+
+                /*
+                * We produce a new random 63-bit integer r, and go over
+                * the array, starting at index 1. We store in v the
+                * index of the first array element which is not greater
+                * than r, unless the flag f was already 1.
+                */
+                v = 0;
+                r = get_rng_u64(rng);
+                r &= ~((ulong)1 << 63);
+                for (k = 1; k < gauss_1024_12289.Length; k ++)
+                {
+                    uint t;
+
+                    t = (uint)((r - gauss_1024_12289[k]) >> 63) ^ 1;
+                    v |= (uint)(k & -(t & (f ^ 1)));
+                    f |= t;
+                }
+
+                /*
+                * We apply the sign ('neg' flag). If the value is zero,
+                * the sign has no effect.
+                */
+                v = (uint)((v ^ -neg) + neg);
+
+                /*
+                * Generated value is added to val.
+                */
+                //val += *(int *)&v;
+                val += (int)v;
+            }
+            return val;
+        }
+
+        /*
+        * The MAX_BL_SMALL[] and MAX_BL_LARGE[] contain the lengths, in 31-bit
+        * words, of intermediate values in the computation:
+        *
+        *   MAX_BL_SMALL[depth]: length for the input f and g at that depth
+        *   MAX_BL_LARGE[depth]: length for the unreduced F and G at that depth
+        *
+        * Rules:
+        *
+        *  - Within an array, values grow.
+        *
+        *  - The 'SMALL' array must have an entry for maximum depth, corresponding
+        *    to the size of values used in the binary GCD. There is no such value
+        *    for the 'LARGE' array (the binary GCD yields already reduced
+        *    coefficients).
+        *
+        *  - MAX_BL_LARGE[depth] >= MAX_BL_SMALL[depth + 1].
+        *
+        *  - Values must be large enough to handle the common cases, with some
+        *    margins.
+        *
+        *  - Values must not be "too large" either because we will convert some
+        *    integers into floating-point values by considering the top 10 words,
+        *    i.e. 310 bits; hence, for values of length more than 10 words, we
+        *    should take care to have the length centered on the expected size.
+        *
+        * The following average lengths, in bits, have been measured on thousands
+        * of random keys (fg = max length of the absolute value of coefficients
+        * of f and g at that depth; FG = idem for the unreduced F and G; for the
+        * maximum depth, F and G are the output of binary GCD, multiplied by q;
+        * for each value, the average and standard deviation are provided).
+        *
+        * Binary case:
+        *    depth: 10    fg: 6307.52 (24.48)    FG: 6319.66 (24.51)
+        *    depth:  9    fg: 3138.35 (12.25)    FG: 9403.29 (27.55)
+        *    depth:  8    fg: 1576.87 ( 7.49)    FG: 4703.30 (14.77)
+        *    depth:  7    fg:  794.17 ( 4.98)    FG: 2361.84 ( 9.31)
+        *    depth:  6    fg:  400.67 ( 3.10)    FG: 1188.68 ( 6.04)
+        *    depth:  5    fg:  202.22 ( 1.87)    FG:  599.81 ( 3.87)
+        *    depth:  4    fg:  101.62 ( 1.02)    FG:  303.49 ( 2.38)
+        *    depth:  3    fg:   50.37 ( 0.53)    FG:  153.65 ( 1.39)
+        *    depth:  2    fg:   24.07 ( 0.25)    FG:   78.20 ( 0.73)
+        *    depth:  1    fg:   10.99 ( 0.08)    FG:   39.82 ( 0.41)
+        *    depth:  0    fg:    4.00 ( 0.00)    FG:   19.61 ( 0.49)
+        *
+        * Integers are actually represented either in binary notation over
+        * 31-bit words (signed, using two's complement), or in RNS, modulo
+        * many small primes. These small primes are close to, but slightly
+        * lower than, 2^31. Use of RNS loses less than two bits, even for
+        * the largest values.
+        *
+        * IMPORTANT: if these values are modified, then the temporary buffer
+        * sizes (FALCON_KEYGEN_TEMP_*, in inner.h) must be recomputed
+        * accordingly.
+        */
+
+        int[] MAX_BL_SMALL = {
+            1, 1, 2, 2, 4, 7, 14, 27, 53, 106, 209
+        };
+
+        int[] MAX_BL_LARGE = {
+            2, 2, 5, 7, 12, 21, 40, 78, 157, 308
+        };
+
+        /*
+        * Average and standard deviation for the maximum size (in bits) of
+        * coefficients of (f,g), depending on depth. These values are used
+        * to compute bounds for Babai's reduction.
+        */
+        int[] BITLENGTH_avg = { // BITLENGTH[i][0] = avg, [i][1] = std
+                4,
+               11,
+               24,
+               50,
+              102,
+              202,
+              401,
+              794,
+             1577,
+             3138,
+             6308,
+        };
+        int[] BITLENGTH_std = { // BITLENGTH[i][0] = avg, [i][1] = std
+              0,
+              1,
+              1,
+              1,
+              1,
+              2,
+              4,
+              5,
+              8,
+             13,
+             25
+        };
+
+        /*
+        * Minimal recursion depth at which we rebuild intermediate values
+        * when reconstructing f and g.
+        */
+        const int DEPTH_INT_FG = 4;
+
+        /*
+        * Compute squared norm of a short vector. Returned value is saturated to
+        * 2^32-1 if it is not lower than 2^31.
+        */
+        uint poly_small_sqnorm(sbyte[] fsrc, int f, uint logn)
+        {
+            int n, u;
+            uint s, ng;
+
+            n = (int)1 << (int)logn;
+            s = 0;
+            ng = 0;
+            for (u = 0; u < n; u ++) {
+                int z;
+
+                z = fsrc[f+u];
+                s += (uint)(z * z);
+                ng |= s;
+            }
+            return (uint)(s | -(ng >> 31));
+        }
+
+        /*
+        * Convert a small vector to floating point.
+        */
+        void poly_small_to_fp(FalconFPR[] xsrc, int x, sbyte[] fsrc, int f, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                xsrc[x + u] = this.fpre.fpr_of(fsrc[f + u]);
+            }
+        }
+
+        /*
+        * Input: f,g of degree N = 2^logn; 'depth' is used only to get their
+        * individual length.
+        *
+        * Output: f',g' of degree N/2, with the length for 'depth+1'.
+        *
+        * Values are in RNS; input and/or output may also be in NTT.
+        */
+        void make_fg_step(uint[] datasrc, int data, uint logn, uint depth,
+            int in_ntt, int out_ntt)
+        {
+            int n, hn, u;
+            int slen, tlen;
+            int fd, gd;
+            int fs, gs;
+            int gm, igm, t1;
+            FalconSmallPrime[] primes;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            slen = MAX_BL_SMALL[depth];
+            tlen = MAX_BL_SMALL[depth + 1];
+            primes = this.PRIMES;
+
+            /*
+            * Prepare room for the result.
+            */
+            fd = data;
+            gd = fd + hn * tlen;
+            fs = gd + hn * tlen;
+            gs = fs + n * slen;
+            gm = gs + n * slen;
+            igm = gm + n;
+            t1 = igm + n;
+            // memmove(fs, data, 2 * n * slen * sizeof *data);
+            Array.Copy(datasrc, data, datasrc, fs, 2 * n * slen);
+
+            /*
+            * First slen words: we use the input values directly, and apply
+            * inverse NTT as we go.
+            */
+            for (u = 0; u < slen; u ++) {
+                uint p, p0i, R2;
+                int v;
+                int x;
+
+                p = primes[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+                modp_mkgm2(datasrc, gm, datasrc, igm, logn, primes[u].g, p, p0i);
+
+                for (v = 0, x = fs + u; v < n; v ++, x += slen) {
+                    datasrc[t1 + v] = datasrc[x];
+                }
+                if (in_ntt == 0) {
+                    modp_NTT2(datasrc, t1, datasrc, gm, logn, p, p0i);
+                }
+                for (v = 0, x = fd + u; v < hn; v ++, x += tlen) {
+                    uint w0, w1;
+
+                    w0 = datasrc[t1 + (v << 1) + 0];
+                    w1 = datasrc[t1 + (v << 1) + 1];
+                    datasrc[x] = modp_montymul(
+                        modp_montymul(w0, w1, p, p0i), R2, p, p0i);
+                }
+                if (in_ntt != 0) {
+                    modp_iNTT2_ext(datasrc, fs + u, slen, datasrc, igm, logn, p, p0i);
+                }
+
+                for (v = 0, x = gs + u; v < n; v ++, x += slen) {
+                    datasrc[t1 + v] = datasrc[x];
+                }
+                if (in_ntt == 0) {
+                    modp_NTT2(datasrc, t1, datasrc, gm, logn, p, p0i);
+                }
+                for (v = 0, x = gd + u; v < hn; v ++, x += tlen) {
+                    uint w0, w1;
+
+                    w0 = datasrc[t1 + (v << 1) + 0];
+                    w1 = datasrc[t1 + (v << 1) + 1];
+                    datasrc[x] = modp_montymul(
+                        modp_montymul(w0, w1, p, p0i), R2, p, p0i);
+                }
+                if (in_ntt != 0) {
+                    modp_iNTT2_ext(datasrc, gs + u, slen, datasrc, igm, logn, p, p0i);
+                }
+
+                if (out_ntt == 0) {
+                    modp_iNTT2_ext(datasrc, fd + u, tlen, datasrc, igm, logn - 1, p, p0i);
+                    modp_iNTT2_ext(datasrc, gd + u, tlen, datasrc, igm, logn - 1, p, p0i);
+                }
+            }
+
+            /*
+            * Since the fs and gs words have been de-NTTized, we can use the
+            * CRT to rebuild the values.
+            */
+            zint_rebuild_CRT(datasrc, fs, slen, slen, n, primes, 1, datasrc, gm);
+            zint_rebuild_CRT(datasrc, gs, slen, slen, n, primes, 1, datasrc, gm);
+
+            /*
+            * Remaining words: use modular reductions to extract the values.
+            */
+            for (u = slen; u < tlen; u ++) {
+                uint p, p0i, R2, Rx;
+                int v;
+                int x;
+
+                p = primes[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+                Rx = modp_Rx((uint)slen, p, p0i, R2);
+                modp_mkgm2(datasrc, gm, datasrc, igm, logn, primes[u].g, p, p0i);
+                for (v = 0, x = fs; v < n; v ++, x += slen) {
+                    datasrc[t1 + v] = zint_mod_small_signed(datasrc, x, slen, p, p0i, R2, Rx);
+                }
+                modp_NTT2(datasrc, t1, datasrc, gm, logn, p, p0i);
+                for (v = 0, x = fd + u; v < hn; v ++, x += tlen) {
+                    uint w0, w1;
+
+                    w0 = datasrc[t1 + (v << 1) + 0];
+                    w1 = datasrc[t1 + (v << 1) + 1];
+                    datasrc[x] = modp_montymul(
+                        modp_montymul(w0, w1, p, p0i), R2, p, p0i);
+                }
+                for (v = 0, x = gs; v < n; v ++, x += slen) {
+                    datasrc[t1 + v] = zint_mod_small_signed(datasrc, x, slen, p, p0i, R2, Rx);
+                }
+                modp_NTT2(datasrc, t1, datasrc, gm, logn, p, p0i);
+                for (v = 0, x = gd + u; v < hn; v ++, x += tlen) {
+                    uint w0, w1;
+
+                    w0 = datasrc[t1 + (v << 1) + 0];
+                    w1 = datasrc[t1 + (v << 1) + 1];
+                    datasrc[x] = modp_montymul(
+                        modp_montymul(w0, w1, p, p0i), R2, p, p0i);
+                }
+
+                if (out_ntt == 0) {
+                    modp_iNTT2_ext(datasrc, fd + u, tlen, datasrc, igm, logn - 1, p, p0i);
+                    modp_iNTT2_ext(datasrc, gd + u, tlen, datasrc, igm, logn - 1, p, p0i);
+                }
+            }
+        }
+
+        /*
+        * Compute f and g at a specific depth, in RNS notation.
+        *
+        * Returned values are stored in the data[] array, at slen words per integer.
+        *
+        * Conditions:
+        *   0 <= depth <= logn
+        *
+        * Space use in data[]: enough room for any two successive values (f', g',
+        * f and g).
+        */
+        void make_fg(uint[] datasrc, int data, sbyte[] fsrc, int f, sbyte[] gsrc, int g,
+            uint logn, uint depth, int out_ntt)
+        {
+            int n, u;
+            int ft, gt; 
+            uint p0;
+            uint d;
+            FalconSmallPrime[] primes;
+
+            n = (int)1 << (int)logn;
+            ft = data;
+            gt = ft + n;
+            primes = this.PRIMES;
+            p0 = primes[0].p;
+            for (u = 0; u < n; u ++) {
+                datasrc[ft + u] = modp_set(fsrc[f+u], p0);
+                datasrc[gt + u] = modp_set(gsrc[g+u], p0);
+            }
+
+            if (depth == 0 && out_ntt != 0) {
+                int gm, igm;
+                uint p, p0i;
+
+                p = primes[0].p;
+                p0i = modp_ninv31(p);
+                gm = gt + n;
+                igm = gm + n;
+                modp_mkgm2(datasrc, gm, datasrc, igm, logn, primes[0].g, p, p0i);
+                modp_NTT2(datasrc, ft, datasrc, gm, logn, p, p0i);
+                modp_NTT2(datasrc, gt, datasrc, gm, logn, p, p0i);
+                return;
+            }
+
+            for (d = 0; d < depth; d ++) {
+                make_fg_step(datasrc, data, logn - d, d,
+                    d != 0 ? 1 : 0, ((d + 1) < depth || out_ntt != 0)? 1 : 0);
+            }
+        }
+
+        /*
+        * Solving the NTRU equation, deepest level: compute the resultants of
+        * f and g with X^N+1, and use binary GCD. The F and G values are
+        * returned in tmp[].
+        *
+        * Returned value: 1 on success, 0 on error.
+        */
+        int solve_NTRU_deepest(uint logn_top,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, uint[] tmpsrc, int tmp)
+        {
+            int len;
+            int Fp, Gp; 
+            int fp, gp; 
+            int t1; 
+            uint q;
+            FalconSmallPrime[] primes;
+
+            len = MAX_BL_SMALL[logn_top];
+            primes = this.PRIMES;
+
+            Fp = tmp;
+            Gp = Fp + len;
+            fp = Gp + len;
+            gp = fp + len;
+            t1 = gp + len;
+
+            make_fg(tmpsrc, fp, fsrc, f, gsrc, g, logn_top, logn_top, 0);
+
+            /*
+            * We use the CRT to rebuild the resultants as big integers.
+            * There are two such big integers. The resultants are always
+            * nonnegative.
+            */
+            zint_rebuild_CRT(tmpsrc, fp, len, len, 2, primes, 0, tmpsrc, t1);
+
+            /*
+            * Apply the binary GCD. The zint_bezout() function works only
+            * if both inputs are odd.
+            *
+            * We can test on the result and return 0 because that would
+            * imply failure of the NTRU solving equation, and the (f,g)
+            * values will be abandoned in that case.
+            */
+            if (zint_bezout(tmpsrc, Gp, tmpsrc, Fp, tmpsrc, fp, tmpsrc, gp, len, tmpsrc, t1) == 0) {
+                return 0;
+            }
+
+            /*
+            * Multiply the two values by the target value q. Values must
+            * fit in the destination arrays.
+            * We can again test on the returned words: a non-zero output
+            * of zint_mul_small() means that we exceeded our array
+            * capacity, and that implies failure and rejection of (f,g).
+            */
+            q = 12289;
+            if (zint_mul_small(tmpsrc, Fp, len, q) != 0
+                || zint_mul_small(tmpsrc, Gp, len, q) != 0)
+            {
+                return 0;
+            }
+
+            return 1;
+        }
+
+        /*
+        * Solving the NTRU equation, intermediate level. Upon entry, the F and G
+        * from the previous level should be in the tmp[] array.
+        * This function MAY be invoked for the top-level (in which case depth = 0).
+        *
+        * Returned value: 1 on success, 0 on error.
+        */
+        int solve_NTRU_intermediate(uint logn_top,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, uint depth, uint[] tmpsrc, int tmp)
+        {
+            /*
+            * In this function, 'logn' is the log2 of the degree for
+            * this step. If N = 2^logn, then:
+            *  - the F and G values already in fk->tmp (from the deeper
+            *    levels) have degree N/2;
+            *  - this function should return F and G of degree N.
+            */
+            uint logn;
+            int n, hn, slen, dlen, llen, rlen, FGlen, u;
+            int Fd, Gd;
+            int Ft, Gt;
+            int ft, gt, t1;
+            FalconFPR[] rt1; FalconFPR[] rt2; 
+            FalconFPR[] rt3; FalconFPR[] rt4; FalconFPR[] rt5;
+            int scale_fg, minbl_fg, maxbl_fg, maxbl_FG, scale_k;
+            int x, y;
+            int[] k;
+            FalconSmallPrime[] primes;
+
+            logn = logn_top - depth;
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+
+            /*
+            * slen = size for our input f and g; also size of the reduced
+            *        F and G we return (degree N)
+            *
+            * dlen = size of the F and G obtained from the deeper level
+            *        (degree N/2 or N/3)
+            *
+            * llen = size for intermediary F and G before reduction (degree N)
+            *
+            * We build our non-reduced F and G as two independent halves each,
+            * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1).
+            */
+            slen = MAX_BL_SMALL[depth];
+            dlen = MAX_BL_SMALL[depth + 1];
+            llen = MAX_BL_LARGE[depth];
+            primes = this.PRIMES;
+
+            /*
+            * Fd and Gd are the F and G from the deeper level.
+            */
+            Fd = tmp;
+            Gd = Fd + dlen * hn;
+
+            /*
+            * Compute the input f and g for this level. Note that we get f
+            * and g in RNS + NTT representation.
+            */
+            ft = Gd + dlen * hn;
+            make_fg(tmpsrc, ft, fsrc, f, gsrc, g, logn_top, depth, 1);
+
+            /*
+            * Move the newly computed f and g to make room for our candidate
+            * F and G (unreduced).
+            */
+            Ft = tmp;
+            Gt = Ft + n * llen;
+            t1 = Gt + n * llen;
+            // memmove(t1, ft, 2 * n * slen * sizeof *ft);
+            Array.Copy(tmpsrc, ft, tmpsrc, t1, 2 * n * slen);
+            ft = t1;
+            gt = ft + slen * n;
+            t1 = gt + slen * n;
+
+            /*
+            * Move Fd and Gd _after_ f and g.
+            */
+            // memmove(t1, Fd, 2 * hn * dlen * sizeof *Fd);
+            Array.Copy(tmpsrc, Fd, tmpsrc, t1, 2 * hn * dlen);
+            Fd = t1;
+            Gd = Fd + hn * dlen;
+
+            /*
+            * We reduce Fd and Gd modulo all the small primes we will need,
+            * and store the values in Ft and Gt (only n/2 values in each).
+            */
+            for (u = 0; u < llen; u ++) {
+                uint p, p0i, R2, Rx;
+                int v;
+                int xs, ys, xd, yd;
+
+                p = primes[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+                Rx = modp_Rx((uint)dlen, p, p0i, R2);
+                for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u;
+                    v < hn;
+                    v ++, xs += dlen, ys += dlen, xd += llen, yd += llen)
+                {
+                    tmpsrc[xd] = zint_mod_small_signed(tmpsrc, xs, dlen, p, p0i, R2, Rx);
+                    tmpsrc[yd] = zint_mod_small_signed(tmpsrc, ys, dlen, p, p0i, R2, Rx);
+                }
+            }
+
+            /*
+            * We do not need Fd and Gd after that point.
+            */
+
+            /*
+            * Compute our F and G modulo sufficiently many small primes.
+            */
+            for (u = 0; u < llen; u ++) {
+                uint p, p0i, R2;
+                int gm, igm;
+                int fx, gx;
+                int Fp, Gp;
+                int v;
+
+                /*
+                * All computations are done modulo p.
+                */
+                p = primes[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+
+                /*
+                * If we processed slen words, then f and g have been
+                * de-NTTized, and are in RNS; we can rebuild them.
+                */
+                if (u == slen) {
+                    zint_rebuild_CRT(tmpsrc, ft, slen, slen, n, primes, 1, tmpsrc, t1);
+                    zint_rebuild_CRT(tmpsrc, gt, slen, slen, n, primes, 1, tmpsrc, t1);
+                }
+
+                gm = t1;
+                igm = gm + n;
+                fx = igm + n;
+                gx = fx + n;
+
+                modp_mkgm2(tmpsrc, gm, tmpsrc, igm, logn, primes[u].g, p, p0i);
+
+                if (u < slen) {
+                    for (v = 0, x = ft + u, y = gt + u;
+                        v < n; v ++, x += slen, y += slen)
+                    {
+                        tmpsrc[fx+v] = tmpsrc[x];
+                        tmpsrc[gx+v] = tmpsrc[y];
+                    }
+                    modp_iNTT2_ext(tmpsrc, ft + u, slen, tmpsrc, igm, logn, p, p0i);
+                    modp_iNTT2_ext(tmpsrc, gt + u, slen, tmpsrc, igm, logn, p, p0i);
+                } else {
+                    uint Rx;
+
+                    Rx = modp_Rx((uint)slen, p, p0i, R2);
+                    for (v = 0, x = ft, y = gt;
+                        v < n; v ++, x += slen, y += slen)
+                    {
+                        tmpsrc[fx+v] = zint_mod_small_signed(tmpsrc, x, slen,
+                            p, p0i, R2, Rx);
+                        tmpsrc[gx+v] = zint_mod_small_signed(tmpsrc, y, slen,
+                            p, p0i, R2, Rx);
+                    }
+                    modp_NTT2(tmpsrc, fx, tmpsrc, gm, logn, p, p0i);
+                    modp_NTT2(tmpsrc, gx, tmpsrc, gm, logn, p, p0i);
+                }
+
+                /*
+                * Get F' and G' modulo p and in NTT representation
+                * (they have degree n/2). These values were computed in
+                * a previous step, and stored in Ft and Gt.
+                */
+                Fp = gx + n;
+                Gp = Fp + hn;
+                for (v = 0, x = Ft + u, y = Gt + u;
+                    v < hn; v ++, x += llen, y += llen)
+                {
+                    tmpsrc[Fp+v] = tmpsrc[x];
+                    tmpsrc[Gp+v] = tmpsrc[y];
+                }
+                modp_NTT2(tmpsrc, Fp, tmpsrc, gm, logn - 1, p, p0i);
+                modp_NTT2(tmpsrc, Gp, tmpsrc, gm, logn - 1, p, p0i);
+
+                /*
+                * Compute our F and G modulo p.
+                *
+                * General case:
+                *
+                *   we divide degree by d = 2 or 3
+                *   f'(x^d) = N(f)(x^d) = f * adj(f)
+                *   g'(x^d) = N(g)(x^d) = g * adj(g)
+                *   f'*G' - g'*F' = q
+                *   F = F'(x^d) * adj(g)
+                *   G = G'(x^d) * adj(f)
+                *
+                * We compute things in the NTT. We group roots of phi
+                * such that all roots x in a group share the same x^d.
+                * If the roots in a group are x_1, x_2... x_d, then:
+                *
+                *   N(f)(x_1^d) = f(x_1)*f(x_2)*...*f(x_d)
+                *
+                * Thus, we have:
+                *
+                *   G(x_1) = f(x_2)*f(x_3)*...*f(x_d)*G'(x_1^d)
+                *   G(x_2) = f(x_1)*f(x_3)*...*f(x_d)*G'(x_1^d)
+                *   ...
+                *   G(x_d) = f(x_1)*f(x_2)*...*f(x_{d-1})*G'(x_1^d)
+                *
+                * In all cases, we can thus compute F and G in NTT
+                * representation by a few simple multiplications.
+                * Moreover, in our chosen NTT representation, roots
+                * from the same group are consecutive in RAM.
+                */
+                for (v = 0, x = Ft + u, y = Gt + u; v < hn;
+                    v ++, x += (llen << 1), y += (llen << 1))
+                {
+                    uint ftA, ftB, gtA, gtB;
+                    uint mFp, mGp;
+
+                    ftA = tmpsrc[fx + (v << 1) + 0];
+                    ftB = tmpsrc[fx + (v << 1) + 1];
+                    gtA = tmpsrc[gx + (v << 1) + 0];
+                    gtB = tmpsrc[gx + (v << 1) + 1];
+                    mFp = modp_montymul(tmpsrc[Fp+v], R2, p, p0i);
+                    mGp = modp_montymul(tmpsrc[Gp+v], R2, p, p0i);
+                    tmpsrc[x+0] = modp_montymul(gtB, mFp, p, p0i);
+                    tmpsrc[x+llen] = modp_montymul(gtA, mFp, p, p0i);
+                    tmpsrc[y+0] = modp_montymul(ftB, mGp, p, p0i);
+                    tmpsrc[y+llen] = modp_montymul(ftA, mGp, p, p0i);
+                }
+                modp_iNTT2_ext(tmpsrc, Ft + u, llen, tmpsrc, igm, logn, p, p0i);
+                modp_iNTT2_ext(tmpsrc, Gt + u, llen, tmpsrc, igm, logn, p, p0i);
+            }
+
+            /*
+            * Rebuild F and G with the CRT.
+            */
+            zint_rebuild_CRT(tmpsrc, Ft, llen, llen, n, primes, 1, tmpsrc, t1);
+            zint_rebuild_CRT(tmpsrc, Gt, llen, llen, n, primes, 1, tmpsrc, t1);
+
+            /*
+            * At that point, Ft, Gt, ft and gt are consecutive in RAM (in that
+            * order).
+            */
+
+            /*
+            * Apply Babai reduction to bring back F and G to size slen.
+            *
+            * We use the FFT to compute successive approximations of the
+            * reduction coefficient. We first isolate the top bits of
+            * the coefficients of f and g, and convert them to floating
+            * point; with the FFT, we compute adj(f), adj(g), and
+            * 1/(f*adj(f)+g*adj(g)).
+            *
+            * Then, we repeatedly apply the following:
+            *
+            *   - Get the top bits of the coefficients of F and G into
+            *     floating point, and use the FFT to compute:
+            *        (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g))
+            *
+            *   - Convert back that value into normal representation, and
+            *     round it to the nearest integers, yielding a polynomial k.
+            *     Proper scaling is applied to f, g, F and G so that the
+            *     coefficients fit on 32 bits (signed).
+            *
+            *   - Subtract k*f from F and k*g from G.
+            *
+            * Under normal conditions, this process reduces the size of F
+            * and G by some bits at each iteration. For constant-time
+            * operation, we do not want to measure the actual length of
+            * F and G; instead, we do the following:
+            *
+            *   - f and g are converted to floating-point, with some scaling
+            *     if necessary to keep values in the representable range.
+            *
+            *   - For each iteration, we _assume_ a maximum size for F and G,
+            *     and use the values at that size. If we overreach, then
+            *     we get zeros, which is harmless: the resulting coefficients
+            *     of k will be 0 and the value won't be reduced.
+            *
+            *   - We conservatively assume that F and G will be reduced by
+            *     at least 25 bits at each iteration.
+            *
+            * Even when reaching the bottom of the reduction, reduction
+            * coefficient will remain low. If it goes out-of-range, then
+            * something wrong occurred and the whole NTRU solving fails.
+            */
+
+            /*
+            * Memory layout:
+            *  - We need to compute and keep adj(f), adj(g), and
+            *    1/(f*adj(f)+g*adj(g)) (sizes N, N and N/2 fp numbers,
+            *    respectively).
+            *  - At each iteration we need two extra fp buffer (N fp values),
+            *    and produce a k (N 32-bit words). k will be shared with one
+            *    of the fp buffers.
+            *  - To compute k*f and k*g efficiently (with the NTT), we need
+            *    some extra room; we reuse the space of the temporary buffers.
+            *
+            * Arrays of 'fpr' are obtained from the temporary array itself.
+            * We ensure that the base is at a properly aligned offset (the
+            * source array tmp[] is supposed to be already aligned).
+            */
+
+            // rt3 = align_fpr(tmp, t1);
+            rt1 = new FalconFPR[n];
+            rt2 = new FalconFPR[n];
+            rt3 = new FalconFPR[n];
+            rt4 = new FalconFPR[n];
+            rt5 = new FalconFPR[n >> 1];
+            k = new int[n];
+            /*
+            * Get f and g into rt3 and rt4 as floating-point approximations.
+            *
+            * We need to "scale down" the floating-point representation of
+            * coefficients when they are too big. We want to keep the value
+            * below 2^310 or so. Thus, when values are larger than 10 words,
+            * we consider only the top 10 words. Array lengths have been
+            * computed so that average maximum length will fall in the
+            * middle or the upper half of these top 10 words.
+            */
+            rlen = (slen > 10) ? 10 : slen;
+            poly_big_to_fp(rt3, 0, tmpsrc, ft + slen - rlen, rlen, slen, logn);
+            poly_big_to_fp(rt4, 0, tmpsrc, gt + slen - rlen, rlen, slen, logn);
+
+            /*
+            * Values in rt3 and rt4 are downscaled by 2^(scale_fg).
+            */
+            scale_fg = 31 * (int)(slen - rlen);
+
+            /*
+            * Estimated boundaries for the maximum size (in bits) of the
+            * coefficients of (f,g). We use the measured average, and
+            * allow for a deviation of at most six times the standard
+            * deviation.
+            */
+            minbl_fg = BITLENGTH_avg[depth] - 6 * BITLENGTH_std[depth];
+            maxbl_fg = BITLENGTH_avg[depth] + 6 * BITLENGTH_std[depth];
+
+            /*
+            * Compute 1/(f*adj(f)+g*adj(g)) in rt5. We also keep adj(f)
+            * and adj(g) in rt3 and rt4, respectively.
+            */
+            this.ffte.FFT(rt3, 0, logn);
+            this.ffte.FFT(rt4, 0, logn);
+            this.ffte.poly_invnorm2_fft(rt5, 0, rt3, 0, rt4, 0, logn);
+            this.ffte.poly_adj_fft(rt3, 0, logn);
+            this.ffte.poly_adj_fft(rt4, 0, logn);
+
+            /*
+            * Reduce F and G repeatedly.
+            *
+            * The expected maximum bit length of coefficients of F and G
+            * is kept in maxbl_FG, with the corresponding word length in
+            * FGlen.
+            */
+            FGlen = llen;
+            maxbl_FG = 31 * (int)llen;
+
+            /*
+            * Each reduction operation computes the reduction polynomial
+            * "k". We need that polynomial to have coefficients that fit
+            * on 32-bit signed integers, with some scaling; thus, we use
+            * a descending sequence of scaling values, down to zero.
+            *
+            * The size of the coefficients of k is (roughly) the difference
+            * between the size of the coefficients of (F,G) and the size
+            * of the coefficients of (f,g). Thus, the maximum size of the
+            * coefficients of k is, at the start, maxbl_FG - minbl_fg;
+            * this is our starting scale value for k.
+            *
+            * We need to estimate the size of (F,G) during the execution of
+            * the algorithm; we are allowed some overestimation but not too
+            * much (poly_big_to_fp() uses a 310-bit window). Generally
+            * speaking, after applying a reduction with k scaled to
+            * scale_k, the size of (F,G) will be size(f,g) + scale_k + dd,
+            * where 'dd' is a few bits to account for the fact that the
+            * reduction is never perfect (intuitively, dd is on the order
+            * of sqrt(N), so at most 5 bits; we here allow for 10 extra
+            * bits).
+            *
+            * The size of (f,g) is not known exactly, but maxbl_fg is an
+            * upper bound.
+            */
+            scale_k = maxbl_FG - minbl_fg;
+
+            for (;;) {
+                int scale_FG, dc, new_maxbl_FG;
+                uint scl, sch;
+                FalconFPR pdc, pt;
+
+                /*
+                * Convert current F and G into floating-point. We apply
+                * scaling if the current length is more than 10 words.
+                */
+                rlen = (FGlen > 10) ? 10 : FGlen;
+                scale_FG = 31 * (int)(FGlen - rlen);
+                poly_big_to_fp(rt1, 0, tmpsrc, Ft + FGlen - rlen, rlen, llen, logn);
+                poly_big_to_fp(rt2, 0, tmpsrc, Gt + FGlen - rlen, rlen, llen, logn);
+
+                /*
+                * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) in rt2.
+                */
+                this.ffte.FFT(rt1, 0, logn);
+                this.ffte.FFT(rt2, 0, logn);
+                this.ffte.poly_mul_fft(rt1, 0, rt3, 0, logn);
+                this.ffte.poly_mul_fft(rt2, 0, rt4, 0, logn);
+                this.ffte.poly_add(rt2, 0, rt1, 0, logn);
+                this.ffte.poly_mul_autoadj_fft(rt2, 0, rt5, 0, logn);
+                this.ffte.iFFT(rt2, 0, logn);
+
+                /*
+                * (f,g) are scaled by 'scale_fg', meaning that the
+                * numbers in rt3/rt4 should be multiplied by 2^(scale_fg)
+                * to have their true mathematical value.
+                *
+                * (F,G) are similarly scaled by 'scale_FG'. Therefore,
+                * the value we computed in rt2 is scaled by
+                * 'scale_FG-scale_fg'.
+                *
+                * We want that value to be scaled by 'scale_k', hence we
+                * apply a corrective scaling. After scaling, the values
+                * should fit in -2^31-1..+2^31-1.
+                */
+                dc = scale_k - scale_FG + scale_fg;
+
+                /*
+                * We will need to multiply values by 2^(-dc). The value
+                * 'dc' is not secret, so we can compute 2^(-dc) with a
+                * non-constant-time process.
+                * (We could use ldexp(), but we prefer to avoid any
+                * dependency on libm. When using FP emulation, we could
+                * use our this.fpre.fpr_ldexp(), which is constant-time.)
+                */
+                if (dc < 0) {
+                    dc = -dc;
+                    pt = this.fpre.fpr_two;
+                } else {
+                    pt = this.fpre.fpr_onehalf;
+                }
+                pdc = this.fpre.fpr_one;
+                while (dc != 0) {
+                    if ((dc & 1) != 0) {
+                        pdc = this.fpre.fpr_mul(pdc, pt);
+                    }
+                    dc >>= 1;
+                    pt = this.fpre.fpr_sqr(pt);
+                }
+
+                for (u = 0; u < n; u ++) {
+                    FalconFPR xv;
+
+                    xv = this.fpre.fpr_mul(rt2[u], pdc);
+
+                    /*
+                    * Sometimes the values can be out-of-bounds if
+                    * the algorithm fails; we must not call
+                    * this.fpre.fpr_rint() (and cast to int) if the value
+                    * is not in-bounds. Note that the test does not
+                    * break constant-time discipline, since any
+                    * failure here implies that we discard the current
+                    * secret key (f,g).
+                    */
+                    if (!this.fpre.fpr_lt(this.fpre.fpr_mtwo31m1, xv)
+                        || !this.fpre.fpr_lt(xv, this.fpre.fpr_ptwo31m1))
+                    {
+                        return 0;
+                    }
+                    k[u] = (int)this.fpre.fpr_rint(xv);
+                }
+
+                /*
+                * Values in k[] are integers. They really are scaled
+                * down by maxbl_FG - minbl_fg bits.
+                *
+                * If we are at low depth, then we use the NTT to
+                * compute k*f and k*g.
+                */
+                sch = (uint)(scale_k / 31);
+                scl = (uint)(scale_k % 31);
+                if (depth <= DEPTH_INT_FG) {
+                    poly_sub_scaled_ntt(tmpsrc, Ft, FGlen, llen, tmpsrc, ft, slen, slen,
+                        k, 0, sch, scl, logn, tmpsrc, t1);
+                    poly_sub_scaled_ntt(tmpsrc, Gt, FGlen, llen, tmpsrc, gt, slen, slen,
+                        k, 0, sch, scl, logn, tmpsrc, t1);
+                } else {
+                    poly_sub_scaled(tmpsrc, Ft, FGlen, llen, tmpsrc, ft, slen, slen,
+                        k, 0, sch, scl, logn);
+                    poly_sub_scaled(tmpsrc, Gt, FGlen, llen, tmpsrc, gt, slen, slen,
+                        k, 0, sch, scl, logn);
+                }
+
+                /*
+                * We compute the new maximum size of (F,G), assuming that
+                * (f,g) has _maximal_ length (i.e. that reduction is
+                * "late" instead of "early". We also adjust FGlen
+                * accordingly.
+                */
+                new_maxbl_FG = scale_k + maxbl_fg + 10;
+                if (new_maxbl_FG < maxbl_FG) {
+                    maxbl_FG = new_maxbl_FG;
+                    if ((int)FGlen * 31 >= maxbl_FG + 31) {
+                        FGlen --;
+                    }
+                }
+
+                /*
+                * We suppose that scaling down achieves a reduction by
+                * at least 25 bits per iteration. We stop when we have
+                * done the loop with an unscaled k.
+                */
+                if (scale_k <= 0) {
+                    break;
+                }
+                scale_k -= 25;
+                if (scale_k < 0) {
+                    scale_k = 0;
+                }
+            }
+
+            /*
+            * If (F,G) length was lowered below 'slen', then we must take
+            * care to re-extend the sign.
+            */
+            if (FGlen < slen) {
+                for (u = 0; u < n; u ++, Ft += llen, Gt += llen) {
+                    int v;
+                    uint sw;
+
+                    sw = ((uint)-(tmpsrc[Ft+FGlen - 1] >> 30) >> 1);
+                    for (v = FGlen; v < slen; v ++) {
+                        tmpsrc[Ft+v] = sw;
+                    }
+                    sw = ((uint)-(tmpsrc[Gt+FGlen - 1] >> 30) >> 1);
+                    for (v = FGlen; v < slen; v ++) {
+                        tmpsrc[Gt+v] = sw;
+                    }
+                }
+            }
+
+            /*
+            * Compress encoding of all values to 'slen' words (this is the
+            * expected output format).
+            */
+            for (u = 0, x = tmp, y = tmp;
+                u < (n << 1); u ++, x += slen, y += llen)
+            {
+                // memmove(x, y, slen * sizeof *y);
+                Array.Copy(tmpsrc, y, tmpsrc, x, slen);
+            }
+            return 1;
+        }
+
+        /*
+        * Solving the NTRU equation, binary case, depth = 1. Upon entry, the
+        * F and G from the previous level should be in the tmp[] array.
+        *
+        * Returned value: 1 on success, 0 on error.
+        */
+        int solve_NTRU_binary_depth1(uint logn_top,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, uint[] tmpsrc, int tmp)
+        {
+            /*
+            * The first half of this function is a copy of the corresponding
+            * part in solve_NTRU_intermediate(), for the reconstruction of
+            * the unreduced F and G. The second half (Babai reduction) is
+            * done differently, because the unreduced F and G fit in 53 bits
+            * of precision, allowing a much simpler process with lower RAM
+            * usage.
+            */
+            uint depth, logn;
+            int n_top, n, hn, slen, dlen, llen, u;
+            int Fd, Gd, Ft, Gt;
+            int ft, gt, t1;
+            FalconFPR[] rt1; FalconFPR[] rt2; FalconFPR[] rt3; 
+            FalconFPR[] rt4; FalconFPR[] rt5; FalconFPR[] rt6;
+            int x, y;
+
+            depth = 1;
+            n_top = (int)1 << (int)logn_top;
+            logn = logn_top - depth;
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+
+            /*
+            * Equations are:
+            *
+            *   f' = f0^2 - X^2*f1^2
+            *   g' = g0^2 - X^2*g1^2
+            *   F' and G' are a solution to f'G' - g'F' = q (from deeper levels)
+            *   F = F'*(g0 - X*g1)
+            *   G = G'*(f0 - X*f1)
+            *
+            * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to
+            * degree N/2 (their odd-indexed coefficients are all zero).
+            */
+
+            /*
+            * slen = size for our input f and g; also size of the reduced
+            *        F and G we return (degree N)
+            *
+            * dlen = size of the F and G obtained from the deeper level
+            *        (degree N/2)
+            *
+            * llen = size for intermediary F and G before reduction (degree N)
+            *
+            * We build our non-reduced F and G as two independent halves each,
+            * of degree N/2 (F = F0 + X*F1, G = G0 + X*G1).
+            */
+            slen = MAX_BL_SMALL[depth];
+            dlen = MAX_BL_SMALL[depth + 1];
+            llen = MAX_BL_LARGE[depth];
+
+            /*
+            * Fd and Gd are the F and G from the deeper level. Ft and Gt
+            * are the destination arrays for the unreduced F and G.
+            */
+            Fd = tmp;
+            Gd = Fd + dlen * hn;
+            Ft = Gd + dlen * hn;
+            Gt = Ft + llen * n;
+
+            /*
+            * We reduce Fd and Gd modulo all the small primes we will need,
+            * and store the values in Ft and Gt.
+            */
+            for (u = 0; u < llen; u ++) {
+                uint p, p0i, R2, Rx;
+                int v;
+                int xs, ys;
+                int xd, yd;
+
+                p = this.PRIMES[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+                Rx = modp_Rx((uint)dlen, p, p0i, R2);
+                for (v = 0, xs = Fd, ys = Gd, xd = Ft + u, yd = Gt + u;
+                    v < hn;
+                    v ++, xs += dlen, ys += dlen, xd += llen, yd += llen)
+                {
+                    tmpsrc[xd] = zint_mod_small_signed(tmpsrc, xs, dlen, p, p0i, R2, Rx);
+                    tmpsrc[yd] = zint_mod_small_signed(tmpsrc, ys, dlen, p, p0i, R2, Rx);
+                }
+            }
+
+            /*
+            * Now Fd and Gd are not needed anymore; we can squeeze them out.
+            */
+            // memmove(tmp, Ft, llen * n * sizeof(uint));
+            Array.Copy(tmpsrc, Ft, tmpsrc, tmp, llen * n);
+            Ft = tmp;
+            // memmove(Ft + llen * n, Gt, llen * n * sizeof(uint));
+            Array.Copy(tmpsrc, Gt, tmpsrc, Ft + llen * n, llen * n);
+            Gt = Ft + llen * n;
+            ft = Gt + llen * n;
+            gt = ft + slen * n;
+
+            t1 = gt + slen * n;
+
+            /*
+            * Compute our F and G modulo sufficiently many small primes.
+            */
+            for (u = 0; u < llen; u ++) {
+                uint p, p0i, R2;
+                int gm, igm;
+                int fx, gx;
+                int Fp, Gp;
+                uint e;
+                int v;
+
+                /*
+                * All computations are done modulo p.
+                */
+                p = this.PRIMES[u].p;
+                p0i = modp_ninv31(p);
+                R2 = modp_R2(p, p0i);
+
+                /*
+                * We recompute things from the source f and g, of full
+                * degree. However, we will need only the n first elements
+                * of the inverse NTT table (igm); the call to modp_mkgm()
+                * below will fill n_top elements in igm[] (thus overflowing
+                * into fx[]) but later code will overwrite these extra
+                * elements.
+                */
+                gm = t1;
+                igm = gm + n_top;
+                fx = igm + n;
+                gx = fx + n_top;
+                modp_mkgm2(tmpsrc, gm, tmpsrc, igm, logn_top, this.PRIMES[u].g, p, p0i);
+
+                /*
+                * Set ft and gt to f and g modulo p, respectively.
+                */
+                for (v = 0; v < n_top; v ++) {
+                    tmpsrc[fx+v] = modp_set(fsrc[f+v], p);
+                    tmpsrc[gx+v] = modp_set(gsrc[g+v], p);
+                }
+
+                /*
+                * Convert to NTT and compute our f and g.
+                */
+                modp_NTT2(tmpsrc, fx, tmpsrc, gm, logn_top, p, p0i);
+                modp_NTT2(tmpsrc, gx, tmpsrc, gm, logn_top, p, p0i);
+                for (e = logn_top; e > logn; e --) {
+                    modp_poly_rec_res(tmpsrc, fx, e, p, p0i, R2);
+                    modp_poly_rec_res(tmpsrc, gx, e, p, p0i, R2);
+                }
+
+                /*
+                * From that point onward, we only need tables for
+                * degree n, so we can save some space.
+                */
+                if (depth > 0) { /* always true */
+                    // memmove(gm + n, igm, n * sizeof *igm);
+                    Array.Copy(tmpsrc, igm, tmpsrc, gm + n, n);
+                    igm = gm + n;
+                    // memmove(igm + n, fx, n * sizeof *ft);
+                    Array.Copy(tmpsrc, fx, tmpsrc, igm + n, n);
+                    fx = igm + n;
+                    // memmove(fx + n, gx, n * sizeof *gt);
+                    Array.Copy(tmpsrc, gx, tmpsrc, fx + n, n);
+                    gx = fx + n;
+                }
+
+                /*
+                * Get F' and G' modulo p and in NTT representation
+                * (they have degree n/2). These values were computed
+                * in a previous step, and stored in Ft and Gt.
+                */
+                Fp = gx + n;
+                Gp = Fp + hn;
+                for (v = 0, x = Ft + u, y = Gt + u;
+                    v < hn; v ++, x += llen, y += llen)
+                {
+                    tmpsrc[Fp+v] = tmpsrc[x];
+                    tmpsrc[Gp+v] = tmpsrc[y];
+                }
+                modp_NTT2(tmpsrc, Fp, tmpsrc, gm, logn - 1, p, p0i);
+                modp_NTT2(tmpsrc, Gp, tmpsrc, gm, logn - 1, p, p0i);
+
+                /*
+                * Compute our F and G modulo p.
+                *
+                * Equations are:
+                *
+                *   f'(x^2) = N(f)(x^2) = f * adj(f)
+                *   g'(x^2) = N(g)(x^2) = g * adj(g)
+                *
+                *   f'*G' - g'*F' = q
+                *
+                *   F = F'(x^2) * adj(g)
+                *   G = G'(x^2) * adj(f)
+                *
+                * The NTT representation of f is f(w) for all w which
+                * are roots of phi. In the binary case, as well as in
+                * the ternary case for all depth except the deepest,
+                * these roots can be grouped in pairs (w,-w), and we
+                * then have:
+                *
+                *   f(w) = adj(f)(-w)
+                *   f(-w) = adj(f)(w)
+                *
+                * and w^2 is then a root for phi at the half-degree.
+                *
+                * At the deepest level in the ternary case, this still
+                * holds, in the following sense: the roots of x^2-x+1
+                * are (w,-w^2) (for w^3 = -1, and w != -1), and we
+                * have:
+                *
+                *   f(w) = adj(f)(-w^2)
+                *   f(-w^2) = adj(f)(w)
+                *
+                * In all case, we can thus compute F and G in NTT
+                * representation by a few simple multiplications.
+                * Moreover, the two roots for each pair are consecutive
+                * in our bit-reversal encoding.
+                */
+                for (v = 0, x = Ft + u, y = Gt + u;
+                    v < hn; v ++, x += (llen << 1), y += (llen << 1))
+                {
+                    uint ftA, ftB, gtA, gtB;
+                    uint mFp, mGp;
+
+                    ftA = tmpsrc[fx + (v << 1) + 0];
+                    ftB = tmpsrc[fx + (v << 1) + 1];
+                    gtA = tmpsrc[gx + (v << 1) + 0];
+                    gtB = tmpsrc[gx + (v << 1) + 1];
+                    mFp = modp_montymul(tmpsrc[Fp+v], R2, p, p0i);
+                    mGp = modp_montymul(tmpsrc[Gp+v], R2, p, p0i);
+                    tmpsrc[x+0] = modp_montymul(gtB, mFp, p, p0i);
+                    tmpsrc[x+llen] = modp_montymul(gtA, mFp, p, p0i);
+                    tmpsrc[y+0] = modp_montymul(ftB, mGp, p, p0i);
+                    tmpsrc[y+llen] = modp_montymul(ftA, mGp, p, p0i);
+                }
+                modp_iNTT2_ext(tmpsrc, Ft + u, llen, tmpsrc, igm, logn, p, p0i);
+                modp_iNTT2_ext(tmpsrc, Gt + u, llen, tmpsrc, igm, logn, p, p0i);
+
+                /*
+                * Also save ft and gt (only up to size slen).
+                */
+                if (u < slen) {
+                    modp_iNTT2(tmpsrc, fx, tmpsrc, igm, logn, p, p0i);
+                    modp_iNTT2(tmpsrc, gx, tmpsrc, igm, logn, p, p0i);
+                    for (v = 0, x = ft + u, y = gt + u;
+                        v < n; v ++, x += slen, y += slen)
+                    {
+                        tmpsrc[x] = tmpsrc[fx+v];
+                        tmpsrc[y] = tmpsrc[gx+v];
+                    }
+                }
+            }
+
+            /*
+            * Rebuild f, g, F and G with the CRT. Note that the elements of F
+            * and G are consecutive, and thus can be rebuilt in a single
+            * loop; similarly, the elements of f and g are consecutive.
+            */
+            zint_rebuild_CRT(tmpsrc, Ft, llen, llen, n << 1, this.PRIMES, 1, tmpsrc, t1);
+            zint_rebuild_CRT(tmpsrc, ft, slen, slen, n << 1, this.PRIMES, 1, tmpsrc, t1);
+
+            /*
+            * Here starts the Babai reduction, specialized for depth = 1.
+            *
+            * Candidates F and G (from Ft and Gt), and base f and g (ft and gt),
+            * are converted to floating point. There is no scaling, and a
+            * single pass is sufficient.
+            */
+
+            /*
+            * Convert F and G into floating point (rt1 and rt2).
+            */
+            rt1 = new FalconFPR[n];
+            rt2 = new FalconFPR[n];
+            poly_big_to_fp(rt1, 0, tmpsrc, Ft, llen, llen, logn);
+            poly_big_to_fp(rt2, 0, tmpsrc, Gt, llen, llen, logn);
+
+            /*
+            * Integer representation of F and G is no longer needed, we
+            * can remove it.
+            */
+            // memmove(tmp, ft, 2 * slen * n * sizeof *ft);
+            Array.Copy(tmpsrc, ft, tmpsrc, tmp, 2 * slen * n);
+            ft = tmp;
+            gt = ft + slen * n;
+            rt3 = new FalconFPR[n];
+            rt4 = new FalconFPR[n];
+
+            /*
+            * Convert f and g into floating point (rt3 and rt4).
+            */
+            poly_big_to_fp(rt3, 0, tmpsrc, ft, slen, slen, logn);
+            poly_big_to_fp(rt4, 0, tmpsrc, gt, slen, slen, logn);
+
+            /*
+            * We now have:
+            *   rt1 = F
+            *   rt2 = G
+            *   rt3 = f
+            *   rt4 = g
+            * in that order in RAM. We convert all of them to FFT.
+            */
+            this.ffte.FFT(rt1, 0, logn);
+            this.ffte.FFT(rt2, 0, logn);
+            this.ffte.FFT(rt3, 0, logn);
+            this.ffte.FFT(rt4, 0, logn);
+
+            /*
+            * Compute:
+            *   rt5 = F*adj(f) + G*adj(g)
+            *   rt6 = 1 / (f*adj(f) + g*adj(g))
+            * (Note that rt6 is half-length.)
+            */
+            rt5 = new FalconFPR[n];
+            rt6 = new FalconFPR[n];
+            this.ffte.poly_add_muladj_fft(rt5, 0, rt1, 0, rt2, 0, rt3, 0, rt4, 0, logn);
+            this.ffte.poly_invnorm2_fft(rt6, 0, rt3, 0, rt4, 0, logn);
+
+            /*
+            * Compute:
+            *   rt5 = (F*adj(f)+G*adj(g)) / (f*adj(f)+g*adj(g))
+            */
+            this.ffte.poly_mul_autoadj_fft(rt5, 0, rt6, 0, logn);
+
+            /*
+            * Compute k as the rounded version of rt5. Check that none of
+            * the values is larger than 2^63-1 (in absolute value)
+            * because that would make the this.fpre.fpr_rint() do something undefined;
+            * note that any out-of-bounds value here implies a failure and
+            * (f,g) will be discarded, so we can make a simple test.
+            */
+            this.ffte.iFFT(rt5, 0, logn);
+            for (u = 0; u < n; u ++) {
+                FalconFPR z;
+
+                z = rt5[u];
+                if (!this.fpre.fpr_lt(z, this.fpre.fpr_ptwo63m1) || !this.fpre.fpr_lt(this.fpre.fpr_mtwo63m1, z)) {
+                    return 0;
+                }
+                rt5[u] = this.fpre.fpr_of(this.fpre.fpr_rint(z));
+            }
+            this.ffte.FFT(rt5, 0, logn);
+
+            /*
+            * Subtract k*f from F, and k*g from G.
+            */
+            this.ffte.poly_mul_fft(rt3, 0, rt5, 0, logn);
+            this.ffte.poly_mul_fft(rt4, 0, rt5, 0, logn);
+            this.ffte.poly_sub(rt1, 0, rt3, 0, logn);
+            this.ffte.poly_sub(rt2, 0, rt4, 0, logn);
+            this.ffte.iFFT(rt1, 0, logn);
+            this.ffte.iFFT(rt2, 0, logn);
+
+            /*
+            * Convert back F and G to integers, and return.
+            */
+            Ft = tmp;
+            Gt = Ft + n;
+            for (u = 0; u < n; u ++) {
+                tmpsrc[Ft+u] = (uint)this.fpre.fpr_rint(rt1[u]);
+                tmpsrc[Gt+u] = (uint)this.fpre.fpr_rint(rt2[u]);
+            }
+
+            return 1;
+        }
+
+        /*
+        * Solving the NTRU equation, top level. Upon entry, the F and G
+        * from the previous level should be in the tmp[] array.
+        *
+        * Returned value: 1 on success, 0 on error.
+        */
+        int solve_NTRU_binary_depth0(uint logn,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, uint[] tmpsrc, int tmp)
+        {
+            int n, hn, u;
+            uint p, p0i, R2;
+            int Fp, Gp; 
+            int t1, t2, t3, t4, t5;
+            int gm, igm, ft, gt;
+            int rt2,  rt3;
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+
+            /*
+            * Equations are:
+            *
+            *   f' = f0^2 - X^2*f1^2
+            *   g' = g0^2 - X^2*g1^2
+            *   F' and G' are a solution to f'G' - g'F' = q (from deeper levels)
+            *   F = F'*(g0 - X*g1)
+            *   G = G'*(f0 - X*f1)
+            *
+            * f0, f1, g0, g1, f', g', F' and G' are all "compressed" to
+            * degree N/2 (their odd-indexed coefficients are all zero).
+            *
+            * Everything should fit in 31-bit integers, hence we can just use
+            * the first small prime p = 2147473409.
+            */
+            p = this.PRIMES[0].p;
+            p0i = modp_ninv31(p);
+            R2 = modp_R2(p, p0i);
+
+            Fp = tmp;
+            Gp = Fp + hn;
+            ft = Gp + hn;
+            gt = ft + n;
+            gm = gt + n;
+            igm = gm + n;
+
+            modp_mkgm2(tmpsrc, gm, tmpsrc, igm, logn, PRIMES[0].g, p, p0i);
+
+            /*
+            * Convert F' anf G' in NTT representation.
+            */
+            for (u = 0; u < hn; u ++) {
+                tmpsrc[Fp+u] = modp_set(zint_one_to_plain(tmpsrc, Fp + u), p);
+                tmpsrc[Gp+u] = modp_set(zint_one_to_plain(tmpsrc, Gp + u), p);
+            }
+            modp_NTT2(tmpsrc, Fp, tmpsrc, gm, logn - 1, p, p0i);
+            modp_NTT2(tmpsrc, Gp, tmpsrc, gm, logn - 1, p, p0i);
+
+            /*
+            * Load f and g and convert them to NTT representation.
+            */
+            for (u = 0; u < n; u ++) {
+                tmpsrc[ft+u] = modp_set(fsrc[f+u], p);
+                tmpsrc[gt+u] = modp_set(gsrc[g+u], p);
+            }
+            modp_NTT2(tmpsrc, ft, tmpsrc, gm, logn, p, p0i);
+            modp_NTT2(tmpsrc, gt, tmpsrc, gm, logn, p, p0i);
+
+            /*
+            * Build the unreduced F,G in ft and gt.
+            */
+            for (u = 0; u < n; u += 2) {
+                uint ftA, ftB, gtA, gtB;
+                uint mFp, mGp;
+
+                ftA = tmpsrc[ft + u + 0];
+                ftB = tmpsrc[ft + u + 1];
+                gtA = tmpsrc[gt + u + 0];
+                gtB = tmpsrc[gt + u + 1];
+                mFp = modp_montymul(tmpsrc[Fp + (u >> 1)], R2, p, p0i);
+                mGp = modp_montymul(tmpsrc[Gp + (u >> 1)], R2, p, p0i);
+                tmpsrc[ft + u + 0] = modp_montymul(gtB, mFp, p, p0i);
+                tmpsrc[ft + u + 1] = modp_montymul(gtA, mFp, p, p0i);
+                tmpsrc[gt + u + 0] = modp_montymul(ftB, mGp, p, p0i);
+                tmpsrc[gt + u + 1] = modp_montymul(ftA, mGp, p, p0i);
+            }
+            modp_iNTT2(tmpsrc, ft, tmpsrc, igm, logn, p, p0i);
+            modp_iNTT2(tmpsrc, gt, tmpsrc, igm, logn, p, p0i);
+
+            Gp = Fp + n;
+            t1 = Gp + n;
+            // memmove(Fp, ft, 2 * n * sizeof *ft);
+            Array.Copy(tmpsrc, ft, tmpsrc, Fp, 2 * n);
+
+            /*
+            * We now need to apply the Babai reduction. At that point,
+            * we have F and G in two n-word arrays.
+            *
+            * We can compute F*adj(f)+G*adj(g) and f*adj(f)+g*adj(g)
+            * modulo p, using the NTT. We still move memory around in
+            * order to save RAM.
+            */
+            t2 = t1 + n;
+            t3 = t2 + n;
+            t4 = t3 + n;
+            t5 = t4 + n;
+
+            /*
+            * Compute the NTT tables in t1 and t2. We do not keep t2
+            * (we'll recompute it later on).
+            */
+            modp_mkgm2(tmpsrc, t1, tmpsrc, t2, logn, PRIMES[0].g, p, p0i);
+
+            /*
+            * Convert F and G to NTT.
+            */
+            modp_NTT2(tmpsrc, Fp, tmpsrc, t1, logn, p, p0i);
+            modp_NTT2(tmpsrc, Gp, tmpsrc, t1, logn, p, p0i);
+
+            /*
+            * Load f and adj(f) in t4 and t5, and convert them to NTT
+            * representation.
+            */
+            tmpsrc[t4+0] = tmpsrc[t5+0] = modp_set(fsrc[f + 0], p);
+            for (u = 1; u < n; u ++) {
+                tmpsrc[t4+u] = modp_set(fsrc[f + u], p);
+                tmpsrc[t5+n - u] = modp_set(-fsrc[f + u], p);
+            }
+            modp_NTT2(tmpsrc, t4, tmpsrc, t1, logn, p, p0i);
+            modp_NTT2(tmpsrc, t5, tmpsrc, t1, logn, p, p0i);
+
+            /*
+            * Compute F*adj(f) in t2, and f*adj(f) in t3.
+            */
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = modp_montymul(tmpsrc[t5+u], R2, p, p0i);
+                tmpsrc[t2+u] = modp_montymul(w, tmpsrc[Fp+u], p, p0i);
+                tmpsrc[t3+u] = modp_montymul(w, tmpsrc[t4+u], p, p0i);
+            }
+
+            /*
+            * Load g and adj(g) in t4 and t5, and convert them to NTT
+            * representation.
+            */
+            tmpsrc[t4+0] = tmpsrc[t5+0] = modp_set(gsrc[g + 0], p);
+            for (u = 1; u < n; u ++) {
+                tmpsrc[t4+u] = modp_set(gsrc[g + u], p);
+                tmpsrc[t5+n - u] = modp_set(-gsrc[g + u], p);
+            }
+            modp_NTT2(tmpsrc, t4, tmpsrc, t1, logn, p, p0i);
+            modp_NTT2(tmpsrc, t5, tmpsrc, t1, logn, p, p0i);
+
+            /*
+            * Add G*adj(g) to t2, and g*adj(g) to t3.
+            */
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = modp_montymul(tmpsrc[t5+u], R2, p, p0i);
+                tmpsrc[t2+u] = modp_add(tmpsrc[t2+u],
+                    modp_montymul(w, tmpsrc[Gp+u], p, p0i), p);
+                tmpsrc[t3+u] = modp_add(tmpsrc[t3+u],
+                    modp_montymul(w, tmpsrc[t4+u], p, p0i), p);
+            }
+
+            /*
+            * Convert back t2 and t3 to normal representation (normalized
+            * around 0), and then
+            * move them to t1 and t2. We first need to recompute the
+            * inverse table for NTT.
+            */
+            modp_mkgm2(tmpsrc, t1, tmpsrc, t4, logn, this.PRIMES[0].g, p, p0i);
+            modp_iNTT2(tmpsrc, t2, tmpsrc, t4, logn, p, p0i);
+            modp_iNTT2(tmpsrc, t3, tmpsrc, t4, logn, p, p0i);
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t1+u] = (uint)modp_norm(tmpsrc[t2+u], p);
+                tmpsrc[t2+u] = (uint)modp_norm(tmpsrc[t3+u], p);
+            }
+
+            /*
+            * At that point, array contents are:
+            *
+            *   F (NTT representation) (Fp)
+            *   G (NTT representation) (Gp)
+            *   F*adj(f)+G*adj(g) (t1)
+            *   f*adj(f)+g*adj(g) (t2)
+            *
+            * We want to divide t1 by t2. The result is not integral; it
+            * must be rounded. We thus need to use the FFT.
+            */
+
+            /*
+            * Get f*adj(f)+g*adj(g) in FFT representation. Since this
+            * polynomial is auto-adjoint, all its coordinates in FFT
+            * representation are actually real, so we can truncate off
+            * the imaginary parts.
+            */
+            FalconFPR[] rtmp = new FalconFPR[2 * n];
+            rt3 = n;
+            for (u = 0; u < n; u ++) {
+                rtmp[rt3+u] = this.fpre.fpr_of((int)tmpsrc[t2+u]);
+            }
+            this.ffte.FFT(rtmp, rt3, logn);
+            rt2 = 0;
+            // memmove(rt2, rt3, hn * sizeof *rt3);
+            Array.Copy(rtmp, rt3, rtmp, rt2, hn);
+
+            /*
+            * Convert F*adj(f)+G*adj(g) in FFT representation.
+            */
+            rt3 = rt2 + hn;
+            for (u = 0; u < n; u ++) {
+                rtmp[rt3+u] = this.fpre.fpr_of((int)tmpsrc[t1 + u]);
+            }
+            this.ffte.FFT(rtmp, rt3, logn);
+
+            /*
+            * Compute (F*adj(f)+G*adj(g))/(f*adj(f)+g*adj(g)) and get
+            * its rounded normal representation in t1.
+            */
+            this.ffte.poly_div_autoadj_fft(rtmp, rt3, rtmp, rt2, logn);
+            this.ffte.iFFT(rtmp, rt3, logn);
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t1+u] = modp_set((int)this.fpre.fpr_rint(rtmp[rt3+u]), p);
+            }
+
+            /*
+            * RAM contents are now:
+            *
+            *   F (NTT representation) (Fp)
+            *   G (NTT representation) (Gp)
+            *   k (t1)
+            *
+            * We want to compute F-k*f, and G-k*g.
+            */
+            t2 = t1 + n;
+            t3 = t2 + n;
+            t4 = t3 + n;
+            t5 = t4 + n;
+            modp_mkgm2(tmpsrc, t2, tmpsrc, t3, logn, this.PRIMES[0].g, p, p0i);
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t4+u] = modp_set(fsrc[f+u], p);
+                tmpsrc[t5+u] = modp_set(gsrc[g+u], p);
+            }
+            modp_NTT2(tmpsrc, t1, tmpsrc, t2, logn, p, p0i);
+            modp_NTT2(tmpsrc, t4, tmpsrc, t2, logn, p, p0i);
+            modp_NTT2(tmpsrc, t5, tmpsrc, t2, logn, p, p0i);
+            for (u = 0; u < n; u ++) {
+                uint kw;
+
+                kw = modp_montymul(tmpsrc[t1+u], R2, p, p0i);
+                tmpsrc[Fp+u] = modp_sub(tmpsrc[Fp+u],
+                    modp_montymul(kw, tmpsrc[t4+u], p, p0i), p);
+                tmpsrc[Gp+u] = modp_sub(tmpsrc[Gp+u],
+                    modp_montymul(kw, tmpsrc[t5+u], p, p0i), p);
+            }
+            modp_iNTT2(tmpsrc, Fp, tmpsrc, t3, logn, p, p0i);
+            modp_iNTT2(tmpsrc, Gp, tmpsrc, t3, logn, p, p0i);
+            for (u = 0; u < n; u ++) {
+                tmpsrc[Fp+u] = (uint)modp_norm(tmpsrc[Fp+u], p);
+                tmpsrc[Gp+u] = (uint)modp_norm(tmpsrc[Gp+u], p);
+            }
+
+            return 1;
+        }
+
+        /*
+        * Solve the NTRU equation. Returned value is 1 on success, 0 on error.
+        * G can be NULL, in which case that value is computed but not returned.
+        * If any of the coefficients of F and G exceeds lim (in absolute value),
+        * then 0 is returned.
+        */
+        int solve_NTRU(uint logn, sbyte[] Fsrc, int F, sbyte[] Gsrc, int G,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, int lim, uint[] tmpsrc, int tmp)
+        {
+            int n, u;
+            int ft, gt, Ft, Gt, gm;
+            uint p, p0i, r;
+            FalconSmallPrime[] primes;
+
+            n = (int)1 << (int)logn;
+
+            if (solve_NTRU_deepest(logn, fsrc, f, gsrc, g, tmpsrc, tmp) == 0) {
+                return 0;
+            }
+
+            /*
+            * For logn <= 2, we need to use solve_NTRU_intermediate()
+            * directly, because coefficients are a bit too large and
+            * do not fit the hypotheses in solve_NTRU_binary_depth0().
+            */
+            if (logn <= 2) {
+                uint depth;
+
+                depth = logn;
+                while (depth -- > 0) {
+                    if (solve_NTRU_intermediate(logn, fsrc, f, gsrc, g, depth, tmpsrc, tmp) == 0) {
+                        return 0;
+                    }
+                }
+            } else {
+                uint depth;
+
+                depth = logn;
+                while (depth -- > 2) {
+                    // TODO check what causes this to fail
+                    if (solve_NTRU_intermediate(logn, fsrc, f, gsrc, g, depth, tmpsrc, tmp) == 0) {
+                        return 0;
+                    }
+                }
+                if (solve_NTRU_binary_depth1(logn, fsrc, f, gsrc, g, tmpsrc, tmp) == 0) {
+                    return 0;
+                }
+                if (solve_NTRU_binary_depth0(logn, fsrc, f, gsrc, g, tmpsrc, tmp) == 0) {
+                    return 0;
+                }
+            }
+
+            /*
+            * If no buffer has been provided for G, use a temporary one.
+            */
+            if (Gsrc == null) {
+                G = 0;
+                Gsrc = new sbyte[n];
+            }
+
+            /*
+            * Final F and G are in fk->tmp, one word per coefficient
+            * (signed value over 31 bits).
+            */
+            if (poly_big_to_small(Fsrc, F, tmpsrc, tmp, lim, logn) == 0
+                || poly_big_to_small(Gsrc, G, tmpsrc, tmp + n, lim, logn) == 0)
+            {
+                return 0;
+            }
+
+            /*
+            * Verify that the NTRU equation is fulfilled. Since all elements
+            * have short lengths, verifying modulo a small prime p works, and
+            * allows using the NTT.
+            *
+            * We put Gt[] first in tmp[], and process it first, so that it does
+            * not overlap with G[] in case we allocated it ourselves.
+            */
+            Gt = tmp;
+            ft = Gt + n;
+            gt = ft + n;
+            Ft = gt + n;
+            gm = Ft + n;
+
+            primes = this.PRIMES;
+            p = primes[0].p;
+            p0i = modp_ninv31(p);
+            modp_mkgm2(tmpsrc, gm, tmpsrc, tmp, logn, primes[0].g, p, p0i);
+            for (u = 0; u < n; u ++) {
+                tmpsrc[Gt+u] = modp_set(Gsrc[G+u], p);
+            }
+            for (u = 0; u < n; u ++) {
+                tmpsrc[ft+u] = modp_set(fsrc[f+u], p);
+                tmpsrc[gt+u] = modp_set(gsrc[g+u], p);
+                tmpsrc[Ft+u] = modp_set(Fsrc[F+u], p);
+            }
+            modp_NTT2(tmpsrc, ft, tmpsrc, gm, logn, p, p0i);
+            modp_NTT2(tmpsrc, gt, tmpsrc, gm, logn, p, p0i);
+            modp_NTT2(tmpsrc, Ft, tmpsrc, gm, logn, p, p0i);
+            modp_NTT2(tmpsrc, Gt, tmpsrc, gm, logn, p, p0i);
+            r = modp_montymul(12289, 1, p, p0i);
+            for (u = 0; u < n; u ++) {
+                uint z;
+
+                z = modp_sub(modp_montymul(tmpsrc[ft+u], tmpsrc[Gt+u], p, p0i),
+                    modp_montymul(tmpsrc[gt+u], tmpsrc[Ft+u], p, p0i), p);
+                if (z != r) {
+                    return 0;
+                }
+            }
+
+            return 1;
+        }
+
+        /*
+        * Generate a random polynomial with a Gaussian distribution. This function
+        * also makes sure that the resultant of the polynomial with phi is odd.
+        */
+        void poly_small_mkgauss(SHAKE256 rng, sbyte[] fsrc, int f, uint logn)
+        {
+            int n, u;
+            uint mod2;
+
+            n = (int)1 << (int)logn;
+            mod2 = 0;
+            for (u = 0; u < n; u ++) {
+                int s;
+
+                for(;;) {
+                    s = mkgauss(rng, logn);
+
+                    /*
+                    * We need the coefficient to fit within -127..+127;
+                    * realistically, this is always the case except for
+                    * the very low degrees (N = 2 or 4), for which there
+                    * is no real security anyway.
+                    */
+                    if (s < -127 || s > 127) {
+                        continue; // restart
+                    }
+
+                    /*
+                    * We need the sum of all coefficients to be 1; otherwise,
+                    * the resultant of the polynomial with X^N+1 will be even,
+                    * and the binary GCD will fail.
+                    */
+                    if (u == n - 1) {
+                        if ((mod2 ^ (uint)(s & 1)) == 0) {
+                            continue; // restart
+                        }
+                    } else {
+                        mod2 ^= (uint)(s & 1);
+                    }
+                    fsrc[f+u] = (sbyte)s;
+                    break; // end
+                }
+            }
+        }
+
+        internal void keygen(SHAKE256 rng,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, sbyte[] Fsrc, int F, sbyte[] Gsrc, int G, ushort[] hsrc, int h,
+            uint logn)
+        {
+            /*
+            * Algorithm is the following:
+            *
+            *  - Generate f and g with the Gaussian distribution.
+            *
+            *  - If either Res(f,phi) or Res(g,phi) is even, try again.
+            *
+            *  - If ||(f,g)|| is too large, try again.
+            *
+            *  - If ||B~_{f,g}|| is too large, try again.
+            *
+            *  - If f is not invertible mod phi mod q, try again.
+            *
+            *  - Compute h = g/f mod phi mod q.
+            *
+            *  - Solve the NTRU equation fG - gF = q; if the solving fails,
+            *    try again. Usual failure condition is when Res(f,phi)
+            *    and Res(g,phi) are not prime to each other.
+            */
+            int n, u;
+            int h2, tmp2;
+            SHAKE256 rc;
+
+            n = (int)1 << (int)logn;
+            rc = rng;
+
+            /*
+            * We need to generate f and g randomly, until we find values
+            * such that the norm of (g,-f), and of the orthogonalized
+            * vector, are satisfying. The orthogonalized vector is:
+            *   (q*adj(f)/(f*adj(f)+g*adj(g)), q*adj(g)/(f*adj(f)+g*adj(g)))
+            * (it is actually the (N+1)-th row of the Gram-Schmidt basis).
+            *
+            * In the binary case, coefficients of f and g are generated
+            * independently of each other, with a discrete Gaussian
+            * distribution of standard deviation 1.17*sqrt(q/(2*N)). Then,
+            * the two vectors have expected norm 1.17*sqrt(q), which is
+            * also our acceptance bound: we require both vectors to be no
+            * larger than that (this will be satisfied about 1/4th of the
+            * time, thus we expect sampling new (f,g) about 4 times for that
+            * step).
+            *
+            * We require that Res(f,phi) and Res(g,phi) are both odd (the
+            * NTRU equation solver requires it).
+            */
+            for (;;) {
+                int rt1, rt2, rt3;
+                FalconFPR bnorm;
+                uint normf, normg, norm;
+                int lim;
+
+                /*
+                * The poly_small_mkgauss() function makes sure
+                * that the sum of coefficients is 1 modulo 2
+                * (i.e. the resultant of the polynomial with phi
+                * will be odd).
+                */
+                poly_small_mkgauss(rc, fsrc, f, logn);
+                poly_small_mkgauss(rc, gsrc, g, logn);
+
+                /*
+                * Verify that all coefficients are within the bounds
+                * defined in max_fg_bits. This is the case with
+                * overwhelming probability; this guarantees that the
+                * key will be encodable with FALCON_COMP_TRIM.
+                */
+                lim = 1 << (this.codec.max_fg_bits[logn] - 1);
+                for (u = 0; u < n; u ++) {
+                    /*
+                    * We can use non-CT tests since on any failure
+                    * we will discard f and g.
+                    */
+                    if (fsrc[f+u] >= lim || fsrc[f+u] <= -lim
+                        || gsrc[g+u] >= lim || gsrc[g+u] <= -lim)
+                    {
+                        lim = -1;
+                        break;
+                    }
+                }
+                if (lim < 0) {
+                    continue;
+                }
+
+                /*
+                * Bound is 1.17*sqrt(q). We compute the squared
+                * norms. With q = 12289, the squared bound is:
+                *   (1.17^2)* 12289 = 16822.4121
+                * Since f and g are integral, the squared norm
+                * of (g,-f) is an integer.
+                */
+                normf = poly_small_sqnorm(fsrc, f, logn);
+                normg = poly_small_sqnorm(gsrc, g, logn);
+                norm = (uint)((normf + normg) | -((normf | normg) >> 31));
+                if (norm >= 16823) {
+                    continue;
+                }
+
+                /*
+                * We compute the orthogonalized vector norm.
+                */
+                FalconFPR[] rtmp = new FalconFPR[3 * n];
+                rt1 = 0;
+                rt2 = rt1 + n;
+                rt3 = rt2 + n;
+                poly_small_to_fp(rtmp, rt1, fsrc, f, logn);
+                poly_small_to_fp(rtmp, rt2, gsrc, g, logn);
+                this.ffte.FFT(rtmp, rt1, logn);
+                this.ffte.FFT(rtmp, rt2, logn);
+                this.ffte.poly_invnorm2_fft(rtmp, rt3, rtmp, rt1, rtmp, rt2, logn);
+                this.ffte.poly_adj_fft(rtmp, rt1, logn);
+                this.ffte.poly_adj_fft(rtmp, rt2, logn);
+                this.ffte.poly_mulconst(rtmp, rt1, this.fpre.fpr_q, logn);
+                this.ffte.poly_mulconst(rtmp, rt2, this.fpre.fpr_q, logn);
+                this.ffte.poly_mul_autoadj_fft(rtmp, rt1, rtmp, rt3, logn);
+                this.ffte.poly_mul_autoadj_fft(rtmp, rt2, rtmp, rt3, logn);
+                this.ffte.iFFT(rtmp, rt1, logn);
+                this.ffte.iFFT(rtmp, rt2, logn);
+                bnorm = this.fpre.fpr_zero;
+                for (u = 0; u < n; u ++) {
+                    bnorm = this.fpre.fpr_add(bnorm, this.fpre.fpr_sqr(rtmp[rt1+u]));
+                    bnorm = this.fpre.fpr_add(bnorm, this.fpre.fpr_sqr(rtmp[rt2+u]));
+                }
+                if (!this.fpre.fpr_lt(bnorm, this.fpre.fpr_bnorm_max)) {
+                    continue;
+                }
+
+                /*
+                * Compute public key h = g/f mod X^N+1 mod q. If this
+                * fails, we must restart.
+                */
+                ushort[] htmp;
+                ushort[] h2src;
+                if (hsrc == null) {
+                    htmp = new ushort[2 * n];
+                    h2 = 0;
+                    h2src = htmp;
+                    tmp2 = h2 + n;
+                } else {
+                    htmp = new ushort[n];
+                    h2 = h;
+                    h2src = hsrc;
+                    tmp2 = 0;
+                }
+                if (vrfy.compute_public(h2src, h2, fsrc, f, gsrc, g, logn, htmp, tmp2) == 0) {
+                    continue;
+                }
+
+                /*
+                * Solve the NTRU equation to get F and G.
+                */
+                uint[] itmp = logn > 2 ? new uint[28 * n] : new uint[28 * n * 3];
+                lim = (1 << (this.codec.max_FG_bits[logn] - 1)) - 1;
+                if (solve_NTRU(logn, Fsrc, F, Gsrc, G, fsrc, f, gsrc, g, lim, itmp, 0) == 0) {
+                    continue;
+                }
+
+                /*
+                * Key pair is generated.
+                */
+                break;
+            }
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconNIST.cs b/crypto/src/pqc/crypto/falcon/FalconNIST.cs
new file mode 100644
index 000000000..50459532f
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconNIST.cs
@@ -0,0 +1,303 @@
+using System;
+using Org.BouncyCastle.Security;
+using Org.BouncyCastle.Utilities;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconNIST
+    {
+        private FalconCodec codec;
+        private FalconVrfy vrfy;
+        private FalconCommon common;
+        private SecureRandom random;
+        private uint logn;
+        private uint noncelen;
+        private int CRYPTO_BYTES;
+        private int CRYPTO_PUBLICKEYBYTES;
+        private int CRYPTO_SECRETKEYBYTES;
+
+        internal uint GetNonceLength() {
+            return this.noncelen;
+        }
+        internal uint GetLogn() {
+            return this.logn;
+        }
+        internal int GetCryptoBytes() {
+            return this.CRYPTO_BYTES;
+        }
+
+        internal FalconNIST(SecureRandom random, uint logn, uint noncelen) {
+            this.logn = logn;
+            this.codec = new FalconCodec();
+            this.common = new FalconCommon();
+            this.vrfy = new FalconVrfy(this.common);
+            this.random = random;
+            this.noncelen = noncelen;
+            int n = (int)1 << (int)logn;
+            this.CRYPTO_PUBLICKEYBYTES = 1 + (14 * n / 8);
+            if (logn == 10)
+            {
+                this.CRYPTO_SECRETKEYBYTES = 2305;
+                this.CRYPTO_BYTES = 1330;
+            }
+            else if (logn == 9 || logn == 8)
+            {
+                this.CRYPTO_SECRETKEYBYTES = 1 + (6 * n * 2 / 8) + n;
+                this.CRYPTO_BYTES = 690; // TODO find what the byte length is here when not at degree 9 or 10
+            }
+            else if (logn == 7 || logn == 6)
+            {
+                this.CRYPTO_SECRETKEYBYTES = 1 + (7 * n * 2 / 8) + n;
+                this.CRYPTO_BYTES = 690;
+            }
+            else
+            {
+                this.CRYPTO_SECRETKEYBYTES = 1 + (n * 2) + n;
+                this.CRYPTO_BYTES = 690;
+            }
+        }
+
+        internal int crypto_sign_keypair(byte[] pksrc, int pk, byte[] sksrc, int sk)
+        {
+            int n = (int)1 << (int)this.logn;
+            SHAKE256 rng = new SHAKE256();
+            sbyte[] f = new sbyte[n],
+                    g = new sbyte[n],
+                    F = new sbyte[n];
+            ushort[] h = new ushort[n];
+            byte[] seed = new byte[48];
+            int u, v;
+            FalconKeygen keygen = new FalconKeygen(this.codec, this.vrfy);
+
+            /*
+            * Generate key pair.
+            */
+            this.random.NextBytes(seed);
+            rng.i_shake256_init();
+            rng.i_shake256_inject(seed, 0, seed.Length);
+            rng.i_shake256_flip();
+            keygen.keygen(rng, f, 0, g, 0, F, 0, null, 0, h, 0, this.logn);
+
+            /*
+            * Encode private key.
+            */
+            sksrc[sk+0] = (byte)(0x50 + this.logn);
+            u = 1;
+            v = this.codec.trim_i8_encode(sksrc, sk + u, CRYPTO_SECRETKEYBYTES - u,
+                f, 0, this.logn, this.codec.max_fg_bits[this.logn]);
+            if (v == 0) {
+                // TODO check which exception types to use here
+                throw new InvalidOperationException("f encode failed");
+            }
+            u += v;
+            v = this.codec.trim_i8_encode(sksrc, sk + u, CRYPTO_SECRETKEYBYTES - u,
+                g, 0, this.logn, this.codec.max_fg_bits[this.logn]);
+            if (v == 0) {
+                throw new InvalidOperationException("g encode failed");
+            }
+            u += v;
+            v = this.codec.trim_i8_encode(sksrc, sk + u, CRYPTO_SECRETKEYBYTES - u,
+                F, 0, this.logn, this.codec.max_FG_bits[this.logn]);
+            if (v == 0) {
+                 throw new InvalidOperationException("F encode failed");
+            }
+            u += v;
+            if (u != CRYPTO_SECRETKEYBYTES) {
+                 throw new InvalidOperationException("secret key encoding failed");
+            }
+
+            /*
+            * Encode public key.
+            */
+            pksrc[pk+0] = (byte)(0x00 + this.logn);
+            v = this.codec.modq_encode(pksrc, pk + 1, CRYPTO_PUBLICKEYBYTES - 1, h, 0, this.logn);
+            if (v != CRYPTO_PUBLICKEYBYTES - 1) {
+                 throw new InvalidOperationException("public key encoding failed");
+            }
+
+            return 0;
+        }
+
+        internal byte[] crypto_sign(byte[] sm,
+            byte[] msrc, int m, uint mlen,
+            byte[] sksrc, int sk)
+        {
+            // TEMPALLOC union {
+            //     uint8_t b[72 * 1024];
+            //     uint64_t dummy_u64;
+            //     fpr dummy_fpr;
+            // } tmp;
+            int u, v, sig_len;
+            int n = (int)1 << (int)this.logn;
+            sbyte[] f = new sbyte[n],
+                    g = new sbyte[n],
+                    F = new sbyte[n],
+                    G = new sbyte[n];
+            short[] sig = new short[n];
+            ushort[] hm = new ushort[n];
+            byte[] seed = new byte[48],
+                    nonce = new byte[this.noncelen];
+            byte[] esig = new byte[this.CRYPTO_BYTES - 2 - this.noncelen];
+            SHAKE256 sc = new SHAKE256();
+            FalconSign signer = new FalconSign(this.common);
+
+            /*
+            * Decode the private key.
+            */
+            if (sksrc[sk+0] != 0x50 + this.logn) {
+                throw new ArgumentException("private key header incorrect");
+            }
+            u = 1;
+            v = this.codec.trim_i8_decode(f, 0, this.logn, this.codec.max_fg_bits[this.logn],
+                sksrc, sk + u, CRYPTO_SECRETKEYBYTES - u);
+            if (v == 0) {
+                throw new InvalidOperationException("f decode failed");
+            }
+            u += v;
+            v = this.codec.trim_i8_decode(g, 0, this.logn, this.codec.max_fg_bits[this.logn],
+                sksrc, sk + u, CRYPTO_SECRETKEYBYTES - u);
+            if (v == 0) {
+                throw new InvalidOperationException("g decode failed");
+            }
+            u += v;
+            v = this.codec.trim_i8_decode(F, 0, this.logn, this.codec.max_FG_bits[this.logn],
+                sksrc, sk + u, CRYPTO_SECRETKEYBYTES - u);
+            if (v == 0) {
+                throw new InvalidOperationException("F decode failed");
+            }
+            u += v;
+            if (u != CRYPTO_SECRETKEYBYTES) {
+                throw new InvalidOperationException("full Key not used");
+            }
+            if (this.vrfy.complete_private(G, 0, f, 0, g, 0, F, 0, this.logn, new ushort[2 * n],0) == 0) {
+                throw new InvalidOperationException("complete private failed");
+            }
+
+            /*
+            * Create a random nonce (40 bytes).
+            */
+            this.random.NextBytes(nonce);
+
+            /*
+            * Hash message nonce + message into a vector.
+            */
+            sc.i_shake256_init();
+            sc.i_shake256_inject(nonce,0,nonce.Length);
+            sc.i_shake256_inject(msrc,m, (int)mlen);
+            sc.i_shake256_flip();
+            this.common.hash_to_point_vartime(sc, hm, 0, this.logn);
+
+            /*
+            * Initialize a RNG.
+            */
+            this.random.NextBytes(seed);
+            sc.i_shake256_init();
+            sc.i_shake256_inject(seed, 0, seed.Length);
+            sc.i_shake256_flip();
+
+            /*
+            * Compute the signature.
+            */
+            signer.sign_dyn(sig, 0, sc, f, 0, g, 0, F, 0, G, 0, hm, 0, this.logn, new FalconFPR[10 * n], 0);
+
+            /*
+             * Encode the signature. Format is:
+             *   signature header     1 bytes
+             *   nonce                40 bytes
+             *   signature            slen bytes
+             */
+            esig[0] = (byte)(0x20 + logn);
+            sig_len = codec.comp_encode(esig, 1, esig.Length - 1, sig, 0, logn);
+            if (sig_len == 0)
+            {
+                throw new InvalidOperationException("signature failed to generate");
+            }
+            sig_len++;
+
+            // header
+            sm[0] = (byte)(0x30 + logn);
+            // nonce
+            Array.Copy(nonce, 0, sm, 1, noncelen);
+
+            // signature
+            Array.Copy(esig, 0, sm, 1 + noncelen, sig_len);
+
+            return Arrays.CopyOfRange(sm, 0, 1 + (int)noncelen + sig_len);
+        }
+
+        internal int crypto_sign_open(byte[] sig_encoded, byte[] nonce, byte[] m,
+            byte[] pksrc, int pk)
+        {
+            int sig_len, msg_len;
+            int n = (int)1 << (int)this.logn;
+            ushort[] h = new ushort[n],
+                    hm = new ushort[n];
+            short[] sig = new short[n];
+            SHAKE256 sc = new SHAKE256();
+
+            /*
+            * Decode public key.
+            */
+            if (pksrc[pk+0] != 0x00 + this.logn) {
+                return -1;
+            }
+            if (this.codec.modq_decode(h, 0, this.logn, pksrc, pk + 1, CRYPTO_PUBLICKEYBYTES - 1)
+                != CRYPTO_PUBLICKEYBYTES - 1)
+            {
+                return -1;
+            }
+            this.vrfy.to_ntt_monty(h, 0, this.logn);
+
+            /*
+            * Find nonce, signature, message length.
+            */
+            // if (smlen < 2 + this.noncelen) {
+            //     return -1;
+            // }
+            // sig_len = ((int)sm[0] << 8) | (int)sm[1];
+            sig_len = sig_encoded.Length;
+            // if (sig_len > (smlen - 2 - this.noncelen)) {
+            //     return -1;
+            // }
+            // msg_len = smlen - 2 - this.noncelen - sig_len;
+            msg_len = m.Length;
+
+            /*
+            * Decode signature.
+            */
+            // esig = sm + 2 + this.noncelen + msg_len;
+            if (sig_len < 1 || sig_encoded[0] != (byte)(0x20 + this.logn)) {
+                return -1;
+            }
+            if (this.codec.comp_decode(sig, 0, this.logn, sig_encoded,
+                1, sig_len - 1) != sig_len - 1)
+            {
+                return -1;
+            }
+
+            /*
+            * Hash nonce + message into a vector.
+            */
+            sc.i_shake256_init();
+            // sc.i_shake256_inject(sm + 2, this.noncelen + msg_len);
+            sc.i_shake256_inject(nonce, 0, (int)this.noncelen);
+            sc.i_shake256_inject(m, 0, m.Length);
+            sc.i_shake256_flip();
+            this.common.hash_to_point_vartime(sc, hm, 0, this.logn);
+
+            /*
+            * Verify signature.
+            */
+            if (!this.vrfy.verify_raw(hm, 0, sig, 0, h, 0, this.logn, new ushort[n], 0)) {
+                return -1;
+            }
+
+            /*
+            * Return plaintext. - not in use
+            */
+            // Array.Copy(sm + 2 + this.noncelen, m, msg_len);
+            // *mlen = msg_len;
+            return 0;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconParameters.cs b/crypto/src/pqc/crypto/falcon/FalconParameters.cs
new file mode 100644
index 000000000..313d25709
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconParameters.cs
@@ -0,0 +1,38 @@
+using System;
+using Org.BouncyCastle.Crypto;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconParameters 
+        : ICipherParameters
+    {
+        public static FalconParameters falcon_512 = new FalconParameters("falcon512", 9, 40);
+        public static FalconParameters falcon_1024 = new FalconParameters("falcon1024", 10, 40);
+
+        private String name;
+        private uint logn;
+        private uint nonce_length;
+
+        public FalconParameters(String name, uint logn, uint nonce_length)
+        {
+            this.name = name;
+            this.logn = logn;
+            this.nonce_length = nonce_length;
+        }
+
+        public uint GetLogN()
+        {
+            return logn;
+        }
+
+        public uint GetNonceLength()
+        {
+            return nonce_length;
+        }
+
+        public String GetName()
+        {
+            return name;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconPrivateKeyParameters.cs b/crypto/src/pqc/crypto/falcon/FalconPrivateKeyParameters.cs
new file mode 100644
index 000000000..448ba7275
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconPrivateKeyParameters.cs
@@ -0,0 +1,24 @@
+using Org.BouncyCastle.Utilities;
+
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconPrivateKeyParameters
+        : FalconKeyParameters
+    {
+        private byte[] privateKey;
+
+        public byte[] PrivateKey => Arrays.Clone(privateKey);
+
+        public FalconPrivateKeyParameters(FalconParameters parameters, byte[] sk_encoded)
+            : base(true, parameters)
+        {
+            this.privateKey = Arrays.Clone(sk_encoded);
+        }
+
+        public byte[] GetEncoded()
+        {
+            return Arrays.Clone(privateKey);
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconPublicKeyParameters.cs b/crypto/src/pqc/crypto/falcon/FalconPublicKeyParameters.cs
new file mode 100644
index 000000000..dace2e60f
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconPublicKeyParameters.cs
@@ -0,0 +1,23 @@
+using Org.BouncyCastle.Utilities;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconPublicKeyParameters
+        : FalconKeyParameters
+    {
+        private byte[] publicKey;
+
+        public byte[] PublicKey => Arrays.Clone(publicKey);
+
+        public FalconPublicKeyParameters(FalconParameters parameters, byte[] pk_encoded)
+            : base(false, parameters)
+        {
+            this.publicKey = Arrays.Clone(pk_encoded);
+        }
+
+        public byte[] GetEncoded()
+        {
+            return Arrays.Clone(publicKey);
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconRNG.cs b/crypto/src/pqc/crypto/falcon/FalconRNG.cs
new file mode 100644
index 000000000..31f04d5d7
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconRNG.cs
@@ -0,0 +1,261 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconRNG
+    {
+        byte[] bd;
+        //ulong bdummy_u64;
+        byte[] sd;
+        //ulong sdummy_u64;
+        //int type;
+        int ptr;
+
+        FalconConversions convertor;
+
+        internal FalconRNG() {
+            this.bd = new byte[512];
+            this.sd = new byte[256];
+            this.convertor = new FalconConversions();
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+
+        internal void prng_init(SHAKE256 src)
+        {
+            /*
+            * To ensure reproducibility for a given seed, we
+            * must enforce little-endian interpretation of
+            * the state words.
+            */
+            byte[] tmp = new byte[56];
+            ulong th, tl;
+            int i;
+
+            src.i_shake256_extract(tmp,0, 56);
+            for (i = 0; i < 14; i ++) {
+                uint w;
+
+                w = (uint)tmp[(i << 2) + 0]
+                    | ((uint)tmp[(i << 2) + 1] << 8)
+                    | ((uint)tmp[(i << 2) + 2] << 16)
+                    | ((uint)tmp[(i << 2) + 3] << 24);
+                //*(uint *)(this.sd + (i << 2)) = w;
+                Array.Copy(convertor.int_to_bytes((int)w), 0, this.sd, i << 2, 4);
+            }
+            //        tl = *(uint32_t *)(p->state.d + 48);
+            tl = convertor.bytes_to_uint(this.sd, 48);
+            //        th = *(uint32_t *)(p->state.d + 52);
+            th = convertor.bytes_to_uint(this.sd, 52);
+            Array.Copy(convertor.ulong_to_bytes(tl + (th << 32)), 0, this.sd, 48, 8);
+            this.prng_refill();
+        }
+
+        /*
+        * PRNG based on ChaCha20.
+        *
+        * State consists in key (32 bytes) then IV (16 bytes) and block counter
+        * (8 bytes). Normally, we should not care about local endianness (this
+        * is for a PRNG), but for the NIST competition we need reproducible KAT
+        * vectors that work across architectures, so we enforce little-endian
+        * interpretation where applicable. Moreover, output words are "spread
+        * out" over the output buffer with the interleaving pattern that is
+        * naturally obtained from the AVX2 implementation that runs eight
+        * ChaCha20 instances in parallel.
+        *
+        * The block counter is XORed into the first 8 bytes of the IV.
+        */
+        private void QROUND(uint[] state, int a, int b, int c, int d) {
+            state[a] += state[b];
+            state[d] ^= state[a];
+            state[d] = (state[d] << 16) | (state[d] >> 16);
+            state[c] += state[d];
+            state[b] ^= state[c];
+            state[b] = (state[b] << 12) | (state[b] >> 20);
+            state[a] += state[b];
+            state[d] ^= state[a];
+            state[d] = (state[d] <<  8) | (state[d] >> 24);
+            state[c] += state[d];
+            state[b] ^= state[c];
+            state[b] = (state[b] <<  7) | (state[b] >> 25);
+        }
+        void prng_refill()
+        {
+
+            uint[] CW = {
+                0x61707865, 0x3320646e, 0x79622d32, 0x6b206574
+            };
+
+            ulong cc;
+            int u;
+
+            /*
+            * State uses local endianness. Only the output bytes must be
+            * converted to little endian (if used on a big-endian machine).
+            */
+            //cc = *(ulong *)(this.sd + 48);
+            cc = convertor.bytes_to_ulong(this.sd, 48);
+            for (u = 0; u < 8; u ++) {
+                uint[] state = new uint[16];
+                int v;
+                int i;
+
+                // memcpy(&state[0], CW, sizeof CW);
+                Array.Copy(CW, 0, state, 0, 4);
+                // memcpy(&state[4], this.sd, 48);
+                Array.Copy(convertor.bytes_to_uint_array(this.sd, 0, 12), 0, state, 4, 12);
+
+                state[14] ^= (uint)cc;
+                state[15] ^= (uint)(cc >> 32);
+                for (i = 0; i < 10; i ++) {
+
+                    QROUND(state, 0,  4,  8, 12);
+                    QROUND(state, 1,  5,  9, 13);
+                    QROUND(state, 2,  6, 10, 14);
+                    QROUND(state, 3,  7, 11, 15);
+                    QROUND(state, 0,  5, 10, 15);
+                    QROUND(state, 1,  6, 11, 12);
+                    QROUND(state, 2,  7,  8, 13);
+                    QROUND(state, 3,  4,  9, 14);
+
+                }
+
+                for (v = 0; v < 4; v++)
+                {
+                    state[v] += CW[v];
+                }
+                for (v = 4; v < 14; v++)
+                {
+                    //                state[v] += ((uint32_t *)p->state.d)[v - 4];
+                    // we multiply the -4 by 4 to account for 4 bytes per int
+                    state[v] += convertor.bytes_to_uint(sd, (4 * v) - 16);
+                }
+                //            state[14] += ((uint32_t *)p->state.d)[10]
+                //            ^ (uint32_t)cc;
+                state[14] += (uint)(convertor.bytes_to_uint(sd, 40) ^ ((int)cc));
+                //            state[15] += ((uint32_t *)p->state.d)[11]
+                //            ^ (uint32_t)(cc >> 32);
+                state[15] += (uint)(convertor.bytes_to_uint(sd, 44) ^ ((int)(cc >> 32)));
+                cc ++;
+
+                /*
+                * We mimic the interleaving that is used in the AVX2
+                * implementation.
+                */
+                for (v = 0; v < 16; v ++) {
+                    this.bd[(u << 2) + (v << 5) + 0] =
+                        (byte)state[v];
+                    this.bd[(u << 2) + (v << 5) + 1] =
+                        (byte)(state[v] >> 8);
+                    this.bd[(u << 2) + (v << 5) + 2] =
+                        (byte)(state[v] >> 16);
+                    this.bd[(u << 2) + (v << 5) + 3] =
+                        (byte)(state[v] >> 24);
+                }
+            }
+            //*(ulong *)(this.sd + 48) = cc;
+            Array.Copy(convertor.ulong_to_bytes(cc), 0, sd, 48, 8);
+
+
+            this.ptr = 0;
+        }
+
+        internal void prng_get_bytes( byte[] dstsrc, int dst, int len)
+        {
+            int buf;
+
+            buf = dst;
+            while (len > 0) {
+                int clen;
+
+                clen = (this.bd.Length) - this.ptr;
+                if (clen > len) {
+                    clen = len;
+                }
+                // memcpy(buf, this.bd, clen);
+                Array.Copy(this.bd, 0, dstsrc, buf, clen);
+                buf += clen;
+                len -= clen;
+                this.ptr += clen;
+                if (this.ptr == this.bd.Length) {
+                    this.prng_refill();
+                }
+            }
+        }
+
+        /*
+         * Get a 64-bit random value from a PRNG.
+         */
+        internal ulong prng_get_u64()
+        {
+            int u;
+
+            /*
+            * If there are less than 9 bytes in the buffer, we refill it.
+            * This means that we may drop the last few bytes, but this allows
+            * for faster extraction code. Also, it means that we never leave
+            * an empty buffer.
+            */
+            u = this.ptr;
+            if (u >= (this.bd.Length) - 9) {
+                this.prng_refill();
+                u = 0;
+            }
+            this.ptr = u + 8;
+
+            /*
+            * On systems that use little-endian encoding and allow
+            * unaligned accesses, we can simply read the data where it is.
+            */
+            return (ulong)this.bd[u + 0]
+                | ((ulong)this.bd[u + 1] << 8)
+                | ((ulong)this.bd[u + 2] << 16)
+                | ((ulong)this.bd[u + 3] << 24)
+                | ((ulong)this.bd[u + 4] << 32)
+                | ((ulong)this.bd[u + 5] << 40)
+                | ((ulong)this.bd[u + 6] << 48)
+                | ((ulong)this.bd[u + 7] << 56);
+        }
+
+        /*
+        * Get an 8-bit random value from a PRNG.
+        */
+        internal uint prng_get_u8()
+        {
+            uint v;
+
+            v = this.bd[this.ptr ++];
+            if (this.ptr == this.bd.Length) {
+                this.prng_refill();
+            }
+            return v;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconSign.cs b/crypto/src/pqc/crypto/falcon/FalconSign.cs
new file mode 100644
index 000000000..613ef498b
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconSign.cs
@@ -0,0 +1,974 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconSign
+    {
+
+        FalconFFT ffte;
+        FPREngine fpre;
+        FalconCommon common;
+
+        internal FalconSign(FalconCommon common) {
+            this.ffte = new FalconFFT();
+            this.fpre = new FPREngine();
+            this.common = common;
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+
+        /*
+        * Binary case:
+        *   N = 2^logn
+        *   phi = X^N+1
+        */
+
+        /*
+        * Get the size of the LDL tree for an input with polynomials of size
+        * 2^logn. The size is expressed in the number of elements.
+        */
+        internal uint ffLDL_treesize(uint logn)
+        {
+            /*
+            * For logn = 0 (polynomials are constant), the "tree" is a
+            * single element. Otherwise, the tree node has size 2^logn, and
+            * has two child trees for size logn-1 each. Thus, treesize s()
+            * must fulfill these two relations:
+            *
+            *   s(0) = 1
+            *   s(logn) = (2^logn) + 2*s(logn-1)
+            */
+            return (logn + 1) << (int)logn;
+        }
+
+        /*
+        * Inner function for ffLDL_fft(). It expects the matrix to be both
+        * auto-adjoint and quasicyclic; also, it uses the source operands
+        * as modifiable temporaries.
+        *
+        * tmp[] must have room for at least one polynomial.
+        */
+        internal void ffLDL_fft_inner(FalconFPR[] treesrc, int tree,
+            FalconFPR[] g0src, int g0, FalconFPR[] g1src, int g1, uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+            int n, hn;
+
+            n = (int)1 << (int)logn;
+            if (n == 1) {
+                treesrc[tree+0] = g0src[g0 + 0];
+                return;
+            }
+            hn = n >> 1;
+
+            /*
+            * The LDL decomposition yields L (which is written in the tree)
+            * and the diagonal of D. Since d00 = g0, we just write d11
+            * into tmp.
+            */
+            this.ffte.poly_LDLmv_fft(tmpsrc, tmp, treesrc, tree, g0src, g0, g1src, g1, g0src, g0, logn);
+
+            /*
+            * Split d00 (currently in g0) and d11 (currently in tmp). We
+            * reuse g0 and g1 as temporary storage spaces:
+            *   d00 splits into g1, g1+hn
+            *   d11 splits into g0, g0+hn
+            */
+            this.ffte.poly_split_fft(g1src, g1, g1src, g1 + hn, g0src, g0, logn);
+            this.ffte.poly_split_fft(g0src, g0, g0src, g0 + hn, tmpsrc, tmp, logn);
+
+            /*
+            * Each split result is the first row of a new auto-adjoint
+            * quasicyclic matrix for the next recursive step.
+            */
+            ffLDL_fft_inner(treesrc, tree + n,
+                g1src, g1, g1src, g1 + hn, logn - 1, tmpsrc, tmp);
+            ffLDL_fft_inner(treesrc, tree + n + (int)ffLDL_treesize(logn - 1),
+                g0src, g0, g0src, g0 + hn, logn - 1, tmpsrc, tmp);
+        }
+
+        /*
+        * Compute the ffLDL tree of an auto-adjoint matrix G. The matrix
+        * is provided as three polynomials (FFT representation).
+        *
+        * The "tree" array is filled with the computed tree, of size
+        * (logn+1)*(2^logn) elements (see ffLDL_treesize()).
+        *
+        * Input arrays MUST NOT overlap, except possibly the three unmodified
+        * arrays g00, g01 and g11. tmp[] should have room for at least three
+        * polynomials of 2^logn elements each.
+        */
+        internal void ffLDL_fft(FalconFPR[] treesrc, int tree, FalconFPR[] g00src, int g00,
+            FalconFPR[] g01src, int g01, FalconFPR[] g11src, int g11,
+            uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+            int n, hn;
+            int d00, d11;
+
+            n = (int)1 << (int)logn;
+            if (n == 1) {
+                treesrc[tree+0] = g00src[g00+0];
+                return;
+            }
+            hn = n >> 1;
+            d00 = tmp;
+            d11 = tmp + n;
+            tmp += n << 1;
+
+            // memcpy(d00, g00, n * sizeof *g00);
+            Array.Copy(g00src, g00, tmpsrc, d00, n);
+            this.ffte.poly_LDLmv_fft(tmpsrc, d11, treesrc, tree, g00src, g00, g01src, g01, g11src, g11, logn);
+
+            this.ffte.poly_split_fft(tmpsrc, tmp, tmpsrc, tmp + hn, tmpsrc, d00, logn);
+            this.ffte.poly_split_fft(tmpsrc, d00, tmpsrc, d00 + hn, tmpsrc, d11, logn);
+            // memcpy(d11, tmp, n * sizeof *tmp);
+            Array.Copy(tmpsrc, tmp, tmpsrc, d11, n);
+            ffLDL_fft_inner(treesrc, tree + n,
+                tmpsrc, d11, tmpsrc, d11 + hn, logn - 1, tmpsrc, tmp);
+            ffLDL_fft_inner(treesrc, tree + n + (int)ffLDL_treesize(logn - 1),
+                tmpsrc, d00, tmpsrc, d00 + hn, logn - 1, tmpsrc, tmp);
+        }
+
+        /*
+        * Normalize an ffLDL tree: each leaf of value x is replaced with
+        * sigma / sqrt(x).
+        */
+        internal void ffLDL_binary_normalize(FalconFPR[] treesrc, int tree, uint orig_logn, uint logn)
+        {
+            /*
+            * TODO: make an iterative version.
+            */
+            int n;
+
+            n = (int)1 << (int)logn;
+            if (n == 1) {
+                /*
+                * We actually store in the tree leaf the inverse of
+                * the value mandated by the specification: this
+                * saves a division both here and in the sampler.
+                */
+                treesrc[tree+0] = this.fpre.fpr_mul(this.fpre.fpr_sqrt(treesrc[tree+0]), this.fpre.fpr_inv_sigma[orig_logn]);
+            } else {
+                ffLDL_binary_normalize(treesrc, tree + n, orig_logn, logn - 1);
+                ffLDL_binary_normalize(treesrc, tree + n + (int)ffLDL_treesize(logn - 1),
+                    orig_logn, logn - 1);
+            }
+        }
+
+        /* =================================================================== */
+
+        /*
+        * Convert an integer polynomial (with small values) into the
+        * representation with complex numbers.
+        */
+        internal void smallints_to_fpr(FalconFPR[] rsrc, int r, sbyte[] tsrc, int t, uint logn)
+        {
+            int n, u;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                rsrc[r+u] = this.fpre.fpr_of(tsrc[t+u]);
+            }
+        }
+
+        /*
+        * The expanded private key contains:
+        *  - The B0 matrix (four elements)
+        *  - The ffLDL tree
+        */
+
+        int skoff_b00(uint logn)
+        {
+            return 0;
+        }
+
+        int skoff_b01(uint logn)
+        {
+            return (int)1 << (int)logn;
+        }
+
+        int skoff_b10(uint logn)
+        {
+            return 2 * (int)1 << (int)logn;
+        }
+
+        int skoff_b11(uint logn)
+        {
+            return 3 * (int)1 << (int)logn;
+        }
+
+        int skoff_tree(uint logn)
+        {
+            return 4 * (int)1 << (int)logn;
+        }
+
+        /*
+        * Perform Fast Fourier Sampling for target vector t. The Gram matrix
+        * is provided (G = [[g00, g01], [adj(g01), g11]]). The sampled vector
+        * is written over (t0,t1). The Gram matrix is modified as well. The
+        * tmp[] buffer must have room for four polynomials.
+        */
+        internal void ffSampling_fft_dyntree(SamplerZ samp,
+            FalconFPR[] t0src, int t0, FalconFPR[] t1src, int t1,
+            FalconFPR[] g00src, int g00, FalconFPR[] g01src, int g01, FalconFPR[] g11src, int g11,
+            uint orig_logn, uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+            int n, hn;
+            int z0, z1;
+
+            /*
+            * Deepest level: the LDL tree leaf value is just g00 (the
+            * array has length only 1 at this point); we normalize it
+            * with regards to sigma, then use it for sampling.
+            */
+            if (logn == 0) {
+                FalconFPR leaf;
+
+                leaf = g00src[g00+0];
+                leaf = this.fpre.fpr_mul(this.fpre.fpr_sqrt(leaf), this.fpre.fpr_inv_sigma[orig_logn]);
+                t0src[t0+0] = this.fpre.fpr_of(samp.Sample(t0src[t0+0], leaf));
+                t1src[t1+0] = this.fpre.fpr_of(samp.Sample(t1src[t1+0], leaf));
+                return;
+            }
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+
+            /*
+            * Decompose G into LDL. We only need d00 (identical to g00),
+            * d11, and l10; we do that in place.
+            */
+            this.ffte.poly_LDL_fft(g00src, g00, g01src, g01, g11src, g11, logn);
+
+            /*
+            * Split d00 and d11 and expand them into half-size quasi-cyclic
+            * Gram matrices. We also save l10 in tmp[].
+            */
+            this.ffte.poly_split_fft(tmpsrc, tmp, tmpsrc, tmp + hn, g00src, g00, logn);
+            // memcpy(g00, tmp, n * sizeof *tmp);
+            Array.Copy(tmpsrc, tmp, g00src, g00, n);
+            this.ffte.poly_split_fft(tmpsrc, tmp, tmpsrc, tmp + hn, g11src, g11, logn);
+            // memcpy(g11, tmp, n * sizeof *tmp);
+            // memcpy(tmp, g01, n * sizeof *g01);
+            // memcpy(g01, g00, hn * sizeof *g00);
+            // memcpy(g01 + hn, g11, hn * sizeof *g00);
+            Array.Copy(tmpsrc, tmp, g11src, g11, n);
+            Array.Copy(g01src, g01, tmpsrc, tmp, n);
+            Array.Copy(g00src, g00,g01src, g01, hn);
+            Array.Copy(g11src, g11, g01src, g01 + hn, hn);
+            /*
+            * The half-size Gram matrices for the recursive LDL tree
+            * building are now:
+            *   - left sub-tree: g00, g00+hn, g01
+            *   - right sub-tree: g11, g11+hn, g01+hn
+            * l10 is in tmp[].
+            */
+
+            /*
+            * We split t1 and use the first recursive call on the two
+            * halves, using the right sub-tree. The result is merged
+            * back into tmp + 2*n.
+            */
+            z1 = tmp + n;
+            this.ffte.poly_split_fft(tmpsrc, z1, tmpsrc, z1 + hn, tmpsrc, t1, logn);
+            ffSampling_fft_dyntree(samp, tmpsrc, z1, tmpsrc, z1 + hn,
+                g11src, g11, g11src, g11 + hn, g01src, g01 + hn, orig_logn, logn - 1, tmpsrc, z1 + n);
+            this.ffte.poly_merge_fft(tmpsrc, tmp + (n << 1), tmpsrc, z1, tmpsrc, z1 + hn, logn);
+
+            /*
+            * Compute tb0 = t0 + (t1 - z1) * l10.
+            * At that point, l10 is in tmp, t1 is unmodified, and z1 is
+            * in tmp + (n << 1). The buffer in z1 is free.
+            *
+            * In the end, z1 is written over t1, and tb0 is in t0.
+            */
+            // memcpy(z1, t1, n * sizeof *t1);
+            Array.Copy(tmpsrc, t1, tmpsrc, z1, n);
+            this.ffte.poly_sub(tmpsrc, z1, tmpsrc, tmp + (n << 1), logn);
+            // memcpy(t1, tmp + (n << 1), n * sizeof *tmp);
+            Array.Copy(tmpsrc, tmp + (n << 1), tmpsrc, t1, n);
+            this.ffte.poly_mul_fft(tmpsrc, tmp, tmpsrc, z1, logn);
+            this.ffte.poly_add(tmpsrc, t0, tmpsrc, tmp, logn);
+
+            /*
+            * Second recursive invocation, on the split tb0 (currently in t0)
+            * and the left sub-tree.
+            */
+            z0 = tmp;
+            this.ffte.poly_split_fft(tmpsrc, z0, tmpsrc, z0 + hn, tmpsrc, t0, logn);
+            ffSampling_fft_dyntree(samp, tmpsrc, z0, tmpsrc, z0 + hn,
+                g00src, g00, g00src, g00 + hn, g01src, g01, orig_logn, logn - 1, tmpsrc, z0 + n);
+            this.ffte.poly_merge_fft(tmpsrc, t0, tmpsrc, z0, tmpsrc, z0 + hn, logn);
+        }
+
+        /*
+        * Perform Fast Fourier Sampling for target vector t and LDL tree T.
+        * tmp[] must have size for at least two polynomials of size 2^logn.
+        */
+        internal void ffSampling_fft(SamplerZ samp,
+            FalconFPR[] z0src, int z0, FalconFPR[] z1src, int z1,
+            FalconFPR[] treesrc, int tree,
+            FalconFPR[] t0src, int t0, FalconFPR[] t1src, int t1, uint logn,
+            FalconFPR[] tmpsrc, int tmp)
+        {
+            int n, hn;
+            int tree0, tree1;
+
+            /*
+            * When logn == 2, we inline the last two recursion levels.
+            */
+            if (logn == 2) {
+                FalconFPR x0, x1, y0, y1, w0, w1, w2, w3, sigma;
+                FalconFPR a_re, a_im, b_re, b_im, c_re, c_im;
+
+                tree0 = tree + 4;
+                tree1 = tree + 8;
+
+                /*
+                * We split t1 into w*, then do the recursive invocation,
+                * with output in w*. We finally merge back into z1.
+                */
+                a_re = t1src[t1+0];
+                a_im = t1src[t1 + 2];
+                b_re = t1src[t1 + 1];
+                b_im = t1src[t1 + 3];
+                c_re = this.fpre.fpr_add(a_re, b_re);
+                c_im = this.fpre.fpr_add(a_im, b_im);
+                w0 = this.fpre.fpr_half(c_re);
+                w1 = this.fpre.fpr_half(c_im);
+                c_re = this.fpre.fpr_sub(a_re, b_re);
+                c_im = this.fpre.fpr_sub(a_im, b_im);
+                w2 = this.fpre.fpr_mul(this.fpre.fpr_add(c_re, c_im), this.fpre.fpr_invsqrt8);
+                w3 = this.fpre.fpr_mul(this.fpre.fpr_sub(c_im, c_re), this.fpre.fpr_invsqrt8);
+
+                x0 = w2;
+                x1 = w3;
+                sigma = treesrc[tree1 + 3];
+                w2 = this.fpre.fpr_of(samp.Sample(x0, sigma));
+                w3 = this.fpre.fpr_of(samp.Sample(x1, sigma));
+                a_re = this.fpre.fpr_sub(x0, w2);
+                a_im = this.fpre.fpr_sub(x1, w3);
+                b_re = treesrc[tree1 + 0];
+                b_im = treesrc[tree1 + 1];
+                c_re = this.fpre.fpr_sub(this.fpre.fpr_mul(a_re, b_re), this.fpre.fpr_mul(a_im, b_im));
+                c_im = this.fpre.fpr_add(this.fpre.fpr_mul(a_re, b_im), this.fpre.fpr_mul(a_im, b_re));
+                x0 = this.fpre.fpr_add(c_re, w0);
+                x1 = this.fpre.fpr_add(c_im, w1);
+                sigma = treesrc[tree1 + 2];
+                w0 = this.fpre.fpr_of(samp.Sample(x0, sigma));
+                w1 = this.fpre.fpr_of(samp.Sample(x1, sigma));
+
+                a_re = w0;
+                a_im = w1;
+                b_re = w2;
+                b_im = w3;
+                c_re = this.fpre.fpr_mul(this.fpre.fpr_sub(b_re, b_im), this.fpre.fpr_invsqrt2);
+                c_im = this.fpre.fpr_mul(this.fpre.fpr_add(b_re, b_im), this.fpre.fpr_invsqrt2);
+                z1src[z1 + 0] = w0 = this.fpre.fpr_add(a_re, c_re);
+                z1src[z1 + 2] = w2 = this.fpre.fpr_add(a_im, c_im);
+                z1src[z1 + 1] = w1 = this.fpre.fpr_sub(a_re, c_re);
+                z1src[z1 + 3] = w3 = this.fpre.fpr_sub(a_im, c_im);
+
+                /*
+                * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in w*.
+                */
+                w0 = this.fpre.fpr_sub(t1src[t1+0], w0);
+                w1 = this.fpre.fpr_sub(t1src[t1 + 1], w1);
+                w2 = this.fpre.fpr_sub(t1src[t1 + 2], w2);
+                w3 = this.fpre.fpr_sub(t1src[t1 + 3], w3);
+
+                a_re = w0;
+                a_im = w2;
+                b_re = treesrc[tree+0];
+                b_im = treesrc[tree + 2];
+                w0 = this.fpre.fpr_sub(this.fpre.fpr_mul(a_re, b_re), this.fpre.fpr_mul(a_im, b_im));
+                w2 = this.fpre.fpr_add(this.fpre.fpr_mul(a_re, b_im), this.fpre.fpr_mul(a_im, b_re));
+                a_re = w1;
+                a_im = w3;
+                b_re = treesrc[tree + 1];
+                b_im = treesrc[tree + 3];
+                w1 = this.fpre.fpr_sub(this.fpre.fpr_mul(a_re, b_re), this.fpre.fpr_mul(a_im, b_im));
+                w3 = this.fpre.fpr_add(this.fpre.fpr_mul(a_re, b_im), this.fpre.fpr_mul(a_im, b_re));
+
+                w0 = this.fpre.fpr_add(w0, t0src[t0+0]);
+                w1 = this.fpre.fpr_add(w1, t0src[t0 + 1]);
+                w2 = this.fpre.fpr_add(w2, t0src[t0 + 2]);
+                w3 = this.fpre.fpr_add(w3, t0src[t0 + 3]);
+
+                /*
+                * Second recursive invocation.
+                */
+                a_re = w0;
+                a_im = w2;
+                b_re = w1;
+                b_im = w3;
+                c_re = this.fpre.fpr_add(a_re, b_re);
+                c_im = this.fpre.fpr_add(a_im, b_im);
+                w0 = this.fpre.fpr_half(c_re);
+                w1 = this.fpre.fpr_half(c_im);
+                c_re = this.fpre.fpr_sub(a_re, b_re);
+                c_im = this.fpre.fpr_sub(a_im, b_im);
+                w2 = this.fpre.fpr_mul(this.fpre.fpr_add(c_re, c_im), this.fpre.fpr_invsqrt8);
+                w3 = this.fpre.fpr_mul(this.fpre.fpr_sub(c_im, c_re), this.fpre.fpr_invsqrt8);
+
+                x0 = w2;
+                x1 = w3;
+                sigma = treesrc[tree0 + 3];
+                w2 = y0 = this.fpre.fpr_of(samp.Sample(x0, sigma));
+                w3 = y1 = this.fpre.fpr_of(samp.Sample(x1, sigma));
+                a_re = this.fpre.fpr_sub(x0, y0);
+                a_im = this.fpre.fpr_sub(x1, y1);
+                b_re = treesrc[tree0 + 0];
+                b_im = treesrc[tree0 + 1];
+                c_re = this.fpre.fpr_sub(this.fpre.fpr_mul(a_re, b_re), this.fpre.fpr_mul(a_im, b_im));
+                c_im = this.fpre.fpr_add(this.fpre.fpr_mul(a_re, b_im), this.fpre.fpr_mul(a_im, b_re));
+                x0 = this.fpre.fpr_add(c_re, w0);
+                x1 = this.fpre.fpr_add(c_im, w1);
+                sigma = treesrc[tree0 + 2];
+                w0 = this.fpre.fpr_of(samp.Sample(x0, sigma));
+                w1 = this.fpre.fpr_of(samp.Sample(x1, sigma));
+
+                a_re = w0;
+                a_im = w1;
+                b_re = w2;
+                b_im = w3;
+                c_re = this.fpre.fpr_mul(this.fpre.fpr_sub(b_re, b_im), this.fpre.fpr_invsqrt2);
+                c_im = this.fpre.fpr_mul(this.fpre.fpr_add(b_re, b_im), this.fpre.fpr_invsqrt2);
+                z0src[z0 + 0] = this.fpre.fpr_add(a_re, c_re);
+                z0src[z0 + 2] = this.fpre.fpr_add(a_im, c_im);
+                z0src[z0 + 1] = this.fpre.fpr_sub(a_re, c_re);
+                z0src[z0 + 3] = this.fpre.fpr_sub(a_im, c_im);
+
+                return;
+            }
+
+            /*
+            * Case logn == 1 is reachable only when using Falcon-2 (the
+            * smallest size for which Falcon is mathematically defined, but
+            * of course way too insecure to be of any use).
+            */
+            if (logn == 1) {
+                FalconFPR x0, x1, y0, y1, sigma;
+                FalconFPR a_re, a_im, b_re, b_im, c_re, c_im;
+
+                x0 = t1src[t1+0];
+                x1 = t1src[t1 + 1];
+                sigma = treesrc[tree + 3];
+                z1src[z1 + 0] = y0 = this.fpre.fpr_of(samp.Sample(x0, sigma));
+                z1src[z1 + 1] = y1 = this.fpre.fpr_of(samp.Sample(x1, sigma));
+                a_re = this.fpre.fpr_sub(x0, y0);
+                a_im = this.fpre.fpr_sub(x1, y1);
+                b_re = treesrc[tree+0];
+                b_im = treesrc[tree + 1];
+                c_re = this.fpre.fpr_sub(this.fpre.fpr_mul(a_re, b_re), this.fpre.fpr_mul(a_im, b_im));
+                c_im = this.fpre.fpr_add(this.fpre.fpr_mul(a_re, b_im), this.fpre.fpr_mul(a_im, b_re));
+                x0 = this.fpre.fpr_add(c_re, t0src[t0+0]);
+                x1 = this.fpre.fpr_add(c_im, t0src[t0 + 1]);
+                sigma = treesrc[tree + 2];
+                z0src[z0 + 0] = this.fpre.fpr_of(samp.Sample(x0, sigma));
+                z0src[z0 + 1] = this.fpre.fpr_of(samp.Sample(x1, sigma));
+
+                return;
+            }
+
+            /*
+            * Normal end of recursion is for logn == 0. Since the last
+            * steps of the recursions were inlined in the blocks above
+            * (when logn == 1 or 2), this case is not reachable, and is
+            * retained here only for documentation purposes.
+
+            if (logn == 0) {
+                fpr x0, x1, sigma;
+
+                x0 = t0src[t0+0];
+                x1 = t1src[t1+0];
+                sigma = treesrc[tree+0];
+                z0[0] = this.fpre.fpr_of(samp.sample(x0, sigma));
+                z1src[z1 + 0] = this.fpre.fpr_of(samp.sample(x1, sigma));
+                return;
+            }
+
+            */
+
+            /*
+            * General recursive case (logn >= 3).
+            */
+
+            n = (int)1 << (int)logn;
+            hn = n >> 1;
+            tree0 = tree + n;
+            tree1 = tree + n + (int)ffLDL_treesize(logn - 1);
+
+            /*
+            * We split t1 into z1 (reused as temporary storage), then do
+            * the recursive invocation, with output in tmp. We finally
+            * merge back into z1.
+            */
+            this.ffte.poly_split_fft(z1src, z1, z1src, z1 + hn, t1src, t1, logn);
+            ffSampling_fft(samp, tmpsrc, tmp, tmpsrc, tmp + hn,
+                treesrc, tree1, z1src, z1, z1src, z1 + hn, logn - 1, tmpsrc, tmp + n);
+            this.ffte.poly_merge_fft(z1src, z1, tmpsrc, tmp, tmpsrc, tmp + hn, logn);
+
+            /*
+            * Compute tb0 = t0 + (t1 - z1) * L. Value tb0 ends up in tmp[].
+            */
+            // memcpy(tmp, t1, n * sizeof *t1);
+            Array.Copy(t1src, t1, tmpsrc, tmp, n);
+            this.ffte.poly_sub(tmpsrc, tmp, z1src, z1, logn);
+            this.ffte.poly_mul_fft(tmpsrc, tmp, treesrc, tree, logn);
+            this.ffte.poly_add(tmpsrc, tmp, t0src, t0, logn);
+
+            /*
+            * Second recursive invocation.
+            */
+            this.ffte.poly_split_fft(z0src, z0, z0src, z0 + hn, tmpsrc, tmp, logn);
+            ffSampling_fft(samp, tmpsrc, tmp, tmpsrc, tmp + hn,
+                treesrc, tree0, z0src, z0, z0src, z0 + hn, logn - 1, tmpsrc, tmp + n);
+            this.ffte.poly_merge_fft(z0src, z0, tmpsrc, tmp, tmpsrc, tmp + hn, logn);
+        }
+
+        /*
+        * Compute a signature: the signature contains two vectors, s1 and s2.
+        * The s1 vector is not returned. The squared norm of (s1,s2) is
+        * computed, and if it is short enough, then s2 is returned into the
+        * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is
+        * returned; the caller should then try again. This function uses an
+        * expanded key.
+        *
+        * tmp[] must have room for at least six polynomials.
+        */
+        internal int do_sign_tree(SamplerZ samp, short[] s2src, int s2,
+            FalconFPR[] ex_keysrc, int expanded_key,
+            ushort[] hmsrc, int hm,
+            uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+            int n, u;
+            int t0, t1, tx, ty;
+            int b00, b01, b10, b11, tree;
+            FalconFPR ni;
+            uint sqn, ng;
+            short[] s1tmp, s2tmp;
+
+            n = (int)1 << (int)logn;
+            t0 = tmp;
+            t1 = t0 + n;
+            b00 = expanded_key + skoff_b00(logn);
+            b01 = expanded_key + skoff_b01(logn);
+            b10 = expanded_key + skoff_b10(logn);
+            b11 = expanded_key + skoff_b11(logn);
+            tree = expanded_key + skoff_tree(logn);
+
+            /*
+            * Set the target vector to [hm, 0] (hm is the hashed message).
+            */
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t0+u] = this.fpre.fpr_of(hmsrc[hm + u]);
+                /* This is implicit.
+                t1src[t1 + u] = fpr_zero;
+                */
+            }
+
+            /*
+            * Apply the lattice basis to obtain the real target
+            * vector (after normalization with regards to modulus).
+            */
+            this.ffte.FFT(tmpsrc, t0, logn);
+            ni = this.fpre.fpr_inverse_of_q;
+            // memcpy(t1, t0, n * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, t1, n);
+            this.ffte.poly_mul_fft(tmpsrc, t1, ex_keysrc, b01, logn);
+            this.ffte.poly_mulconst(tmpsrc, t1, this.fpre.fpr_neg(ni), logn);
+            this.ffte.poly_mul_fft(tmpsrc, t0, ex_keysrc, b11, logn);
+            this.ffte.poly_mulconst(tmpsrc, t0, ni, logn);
+
+            tx = t1 + n;
+            ty = tx + n;
+
+            /*
+            * Apply sampling. Output is written back in [tx, ty].
+            */
+            ffSampling_fft(samp, tmpsrc, tx, tmpsrc, ty, ex_keysrc, tree, tmpsrc, t0, tmpsrc, t1, logn, tmpsrc, ty + n);
+
+            /*
+            * Get the lattice point corresponding to that tiny vector.
+            */
+            // memcpy(t0, tx, n * sizeof *tx);
+            Array.Copy(tmpsrc, tx, tmpsrc, t0, n);
+            // memcpy(t1, ty, n * sizeof *ty);
+            Array.Copy(tmpsrc, ty, tmpsrc, t1, n);
+            this.ffte.poly_mul_fft(tmpsrc, tx, ex_keysrc, b00, logn);
+            this.ffte.poly_mul_fft(tmpsrc, ty, ex_keysrc, b10, logn);
+            this.ffte.poly_add(tmpsrc, tx, tmpsrc, ty, logn);
+            // memcpy(ty, t0, n * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, ty, n);
+            this.ffte.poly_mul_fft(tmpsrc, ty, ex_keysrc, b01, logn);
+
+            // memcpy(t0, tx, n * sizeof *tx);
+            Array.Copy(tmpsrc, tx, tmpsrc, t0, n);
+            this.ffte.poly_mul_fft(tmpsrc, t1, ex_keysrc, b11, logn);
+            this.ffte.poly_add(tmpsrc, t1, tmpsrc, ty, logn);
+
+            this.ffte.iFFT(tmpsrc, t0, logn);
+            this.ffte.iFFT(tmpsrc, t1, logn);
+
+            /*
+            * Compute the signature.
+            */
+            s1tmp = new short[n];
+            s2tmp = new short[n];
+            sqn = 0;
+            ng = 0;
+            for (u = 0; u < n; u ++) {
+                int z;
+
+                z = (int)hmsrc[hm + u] - (int)this.fpre.fpr_rint(tmpsrc[t0+u]);
+                sqn += (uint)(z * z);
+                ng |= sqn;
+                s1tmp[u] = (short)z;
+            }
+            sqn |= (uint)(-(ng >> 31));
+
+            /*
+            * With "normal" degrees (e.g. 512 or 1024), it is very
+            * improbable that the computed vector is not short enough;
+            * however, it may happen in practice for the very reduced
+            * versions (e.g. degree 16 or below). In that case, the caller
+            * will loop, and we must not write anything into s2[] because
+            * s2[] may overlap with the hashed message hm[] and we need
+            * hm[] for the next iteration.
+            */
+            for (u = 0; u < n; u ++) {
+                s2tmp[u] = (short)-this.fpre.fpr_rint(tmpsrc[t1 + u]);
+            }
+            if (this.common.is_short_half(sqn, s2tmp, 0, logn)) {
+                // memcpy(s2, s2tmp, n * sizeof *s2);
+                Array.Copy(s2tmp, 0, s2src, s2, n);
+                // memcpy(tmp, s1tmp, n * sizeof *s1tmp);
+                Array.Copy(s1tmp, 0, tmpsrc, tmp, n);
+                return 1;
+            }
+            return 0;
+        }
+
+        /*
+        * Compute a signature: the signature contains two vectors, s1 and s2.
+        * The s1 vector is not returned. The squared norm of (s1,s2) is
+        * computed, and if it is short enough, then s2 is returned into the
+        * s2[] buffer, and 1 is returned; otherwise, s2[] is untouched and 0 is
+        * returned; the caller should then try again.
+        *
+        * tmp[] must have room for at least nine polynomials.
+        */
+        internal int do_sign_dyn(SamplerZ samp, short[] s2src, int s2,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g,
+            sbyte[] Fsrc, int F, sbyte[] Gsrc, int G,
+            ushort[] hmsrc, int hm, uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+            int n, u;
+            int t0, t1, tx, ty;
+            int b00, b01, b10, b11;
+            int g00, g01, g11;
+            FalconFPR ni;
+            uint sqn, ng;
+            short[] s1tmp, s2tmp;
+
+            n = (int)1 << (int)logn;
+
+            /*
+            * Lattice basis is B = [[g, -f], [G, -F]]. We convert it to FFT.
+            */
+            b00 = tmp;
+            b01 = b00 + n;
+            b10 = b01 + n;
+            b11 = b10 + n;
+            smallints_to_fpr(tmpsrc, b01, fsrc, f, logn);
+            smallints_to_fpr(tmpsrc, b00, gsrc, g, logn);
+            smallints_to_fpr(tmpsrc, b11, Fsrc, F, logn);
+            smallints_to_fpr(tmpsrc, b10, Gsrc, G, logn);
+            this.ffte.FFT(tmpsrc, b01, logn);
+            this.ffte.FFT(tmpsrc, b00, logn);
+            this.ffte.FFT(tmpsrc, b11, logn);
+            this.ffte.FFT(tmpsrc, b10, logn);
+            this.ffte.poly_neg(tmpsrc, b01, logn);
+            this.ffte.poly_neg(tmpsrc, b11, logn);
+
+            /*
+            * Compute the Gram matrix G = B·B*. Formulas are:
+            *   g00 = b00*adj(b00) + b01*adj(b01)
+            *   g01 = b00*adj(b10) + b01*adj(b11)
+            *   g10 = b10*adj(b00) + b11*adj(b01)
+            *   g11 = b10*adj(b10) + b11*adj(b11)
+            *
+            * For historical reasons, this implementation uses
+            * g00, g01 and g11 (upper triangle). g10 is not kept
+            * since it is equal to adj(g01).
+            *
+            * We _replace_ the matrix B with the Gram matrix, but we
+            * must keep b01 and b11 for computing the target vector.
+            */
+            t0 = b11 + n;
+            t1 = t0 + n;
+
+            // memcpy(t0, b01, n * sizeof *b01);
+            Array.Copy(tmpsrc, b01, tmpsrc, t0, n);
+            this.ffte.poly_mulselfadj_fft(tmpsrc, t0, logn);    // t0 <- b01*adj(b01)
+
+            // memcpy(t1, b00, n * sizeof *b00);
+            Array.Copy(tmpsrc, b00, tmpsrc, t1, n);
+            this.ffte.poly_muladj_fft(tmpsrc, t1, tmpsrc, b10, logn);   // t1 <- b00*adj(b10)
+            this.ffte.poly_mulselfadj_fft(tmpsrc, b00, logn);   // b00 <- b00*adj(b00)
+            this.ffte.poly_add(tmpsrc, b00, tmpsrc, t0, logn);      // b00 <- g00
+            // memcpy(t0, b01, n * sizeof *b01);
+            Array.Copy(tmpsrc, b01, tmpsrc, t0, n);
+            this.ffte.poly_muladj_fft(tmpsrc, b01, tmpsrc, b11, logn);  // b01 <- b01*adj(b11)
+            this.ffte.poly_add(tmpsrc, b01, tmpsrc, t1, logn);      // b01 <- g01
+
+            this.ffte.poly_mulselfadj_fft(tmpsrc, b10, logn);   // b10 <- b10*adj(b10)
+            // memcpy(t1, b11, n * sizeof *b11);
+            Array.Copy(tmpsrc, b11, tmpsrc, t1, n);
+            this.ffte.poly_mulselfadj_fft(tmpsrc, t1, logn);    // t1 <- b11*adj(b11)
+            this.ffte.poly_add(tmpsrc, b10, tmpsrc, t1, logn);      // b10 <- g11
+
+            /*
+            * We rename variables to make things clearer. The three elements
+            * of the Gram matrix uses the first 3*n slots of tmp[], followed
+            * by b11 and b01 (in that order).
+            */
+            g00 = b00;
+            g01 = b01;
+            g11 = b10;
+            b01 = t0;
+            t0 = b01 + n;
+            t1 = t0 + n;
+
+            /*
+            * Memory layout at that point:
+            *   g00 g01 g11 b11 b01 t0 t1
+            */
+
+            /*
+            * Set the target vector to [hm, 0] (hm is the hashed message).
+            */
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t0+u] = this.fpre.fpr_of((short)hmsrc[hm + u]);
+                /* This is implicit.
+                t1src[t1 + u] = fpr_zero;
+                */
+            }
+
+            /*
+            * Apply the lattice basis to obtain the real target
+            * vector (after normalization with regards to modulus).
+            */
+            this.ffte.FFT(tmpsrc, t0, logn);
+            ni = this.fpre.fpr_inverse_of_q;
+            // memcpy(t1, t0, n * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, t1, n);
+            this.ffte.poly_mul_fft(tmpsrc, t1, tmpsrc, b01, logn);
+            this.ffte.poly_mulconst(tmpsrc, t1, this.fpre.fpr_neg(ni), logn);
+            this.ffte.poly_mul_fft(tmpsrc, t0, tmpsrc, b11, logn);
+            this.ffte.poly_mulconst(tmpsrc, t0, ni, logn);
+
+            /*
+            * b01 and b11 can be discarded, so we move back (t0,t1).
+            * Memory layout is now:
+            *      g00 g01 g11 t0 t1
+            */
+            // memcpy(b11, t0, n * 2 * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, b11, n * 2);
+            t0 = g11 + n;
+            t1 = t0 + n;
+
+            /*
+            * Apply sampling; result is written over (t0,t1).
+            */
+            ffSampling_fft_dyntree(samp,
+                tmpsrc, t0, tmpsrc, t1, tmpsrc, g00, tmpsrc, g01, tmpsrc, g11, logn, logn, tmpsrc, t1 + n);
+
+            /*
+            * We arrange the layout back to:
+            *     b00 b01 b10 b11 t0 t1
+            *
+            * We did not conserve the matrix basis, so we must recompute
+            * it now.
+            */
+            b00 = tmp;
+            b01 = b00 + n;
+            b10 = b01 + n;
+            b11 = b10 + n;
+            // memmove(b11 + n, t0, n * 2 * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, b11 + n, n * 2);
+            t0 = b11 + n;
+            t1 = t0 + n;
+            smallints_to_fpr(tmpsrc, b01, fsrc, f, logn);
+            smallints_to_fpr(tmpsrc, b00, gsrc, g, logn);
+            smallints_to_fpr(tmpsrc, b11, Fsrc, F, logn);
+            smallints_to_fpr(tmpsrc, b10, Gsrc, G, logn);
+            this.ffte.FFT(tmpsrc, b01, logn);
+            this.ffte.FFT(tmpsrc, b00, logn);
+            this.ffte.FFT(tmpsrc, b11, logn);
+            this.ffte.FFT(tmpsrc, b10, logn);
+            this.ffte.poly_neg(tmpsrc, b01, logn);
+            this.ffte.poly_neg(tmpsrc, b11, logn);
+            tx = t1 + n;
+            ty = tx + n;
+
+            /*
+            * Get the lattice point corresponding to that tiny vector.
+            */
+            // memcpy(tx, t0, n * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, tx, n);
+            // memcpy(ty, t1, n * sizeof *t1);
+            Array.Copy(tmpsrc, t1, tmpsrc, ty, n);
+            this.ffte.poly_mul_fft(tmpsrc, tx, tmpsrc, b00, logn);
+            this.ffte.poly_mul_fft(tmpsrc, ty, tmpsrc, b10, logn);
+            this.ffte.poly_add(tmpsrc, tx, tmpsrc, ty, logn);
+            // memcpy(ty, t0, n * sizeof *t0);
+            Array.Copy(tmpsrc, t0, tmpsrc, ty, n);
+            this.ffte.poly_mul_fft(tmpsrc, ty, tmpsrc, b01, logn);
+
+            // memcpy(t0, tx, n * sizeof *tx);
+            Array.Copy(tmpsrc, tx, tmpsrc, t0, n);
+            this.ffte.poly_mul_fft(tmpsrc, t1, tmpsrc, b11, logn);
+            this.ffte.poly_add(tmpsrc, t1, tmpsrc, ty, logn);
+            this.ffte.iFFT(tmpsrc, t0, logn);
+            this.ffte.iFFT(tmpsrc, t1, logn);
+
+            s1tmp = new short[n];
+            sqn = 0;
+            ng = 0;
+            for (u = 0; u < n; u ++) {
+                int z;
+
+                z = (int)hmsrc[hm + u] - (int)this.fpre.fpr_rint(tmpsrc[t0+u]);
+                sqn += (uint)(z * z);
+                ng |= sqn;
+                s1tmp[u] = (short)z;
+            }
+            sqn |= (uint)(-(ng >> 31));
+
+            /*
+            * With "normal" degrees (e.g. 512 or 1024), it is very
+            * improbable that the computed vector is not short enough;
+            * however, it may happen in practice for the very reduced
+            * versions (e.g. degree 16 or below). In that case, the caller
+            * will loop, and we must not write anything into s2[] because
+            * s2[] may overlap with the hashed message hm[] and we need
+            * hm[] for the next iteration.
+            */
+            s2tmp = new short[n];
+            for (u = 0; u < n; u ++) {
+                s2tmp[u] = (short)-this.fpre.fpr_rint(tmpsrc[t1 + u]);
+            }
+            if (this.common.is_short_half(sqn, s2tmp, 0, logn)) {
+                // memcpy(s2, s2tmp, n * sizeof *s2);
+                Array.Copy(s2tmp, 0, s2src, s2, n);
+                // memcpy(tmp, s1tmp, n * sizeof *s1tmp);
+                //Array.Copy(s1tmp, 0, tmpsrc, tmp, n);
+                return 1;
+            }
+            return 0;
+        }
+
+        internal void sign_tree(short[] sigsrc, int sig, SHAKE256 rng,
+            FalconFPR[] ex_keysrc, int expanded_key,
+            ushort[] hmsrc, int hm, uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+
+            int ftmp = tmp;
+            for (;;) {
+                /*
+                * Signature produces short vectors s1 and s2. The
+                * signature is acceptable only if the aggregate vector
+                * s1,s2 is short; we must use the same bound as the
+                * verifier.
+                *
+                * If the signature is acceptable, then we return only s2
+                * (the verifier recomputes s1 from s2, the hashed message,
+                * and the public key).
+                */
+                
+                /*
+                * Normal sampling. We use a fast PRNG seeded from our
+                * SHAKE context ('rng').
+                */
+                FalconRNG prng = new FalconRNG();
+                prng.prng_init(rng);
+                SamplerZ samp = new SamplerZ(prng, this.fpre.fpr_sigma_min[logn], this.fpre);
+
+
+                /*
+                * Do the actual signature.
+                */
+                if (do_sign_tree(samp, sigsrc, sig,
+                    ex_keysrc, expanded_key, hmsrc, hm, logn, tmpsrc, ftmp) != 0)
+                {
+                    break;
+                }
+            }
+        }
+
+        internal void sign_dyn(short[] sigsrc, int sig, SHAKE256 rng,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g,
+            sbyte[] Fsrc, int F, sbyte[] Gsrc, int G,
+            ushort[] hmsrc, int hm, uint logn, FalconFPR[] tmpsrc, int tmp)
+        {
+            for (;;) {
+                /*
+                * Signature produces short vectors s1 and s2. The
+                * signature is acceptable only if the aggregate vector
+                * s1,s2 is short; we must use the same bound as the
+                * verifier.
+                *
+                * If the signature is acceptable, then we return only s2
+                * (the verifier recomputes s1 from s2, the hashed message,
+                * and the public key).
+                */
+
+                /*
+                * Normal sampling. We use a fast PRNG seeded from our
+                * SHAKE context ('rng').
+                */
+
+                FalconRNG prng = new FalconRNG();
+                prng.prng_init(rng);
+                SamplerZ samp = new SamplerZ(prng, this.fpre.fpr_sigma_min[logn], this.fpre);
+
+                /*
+                * Do the actual signature.
+                */
+                if (do_sign_dyn(samp, sigsrc, sig,
+                    fsrc, f, gsrc,  g, Fsrc,  F, Gsrc,  G, hmsrc, hm, logn, tmpsrc, tmp) != 0)
+                {
+                    break;
+                }
+            }
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconSigner.cs b/crypto/src/pqc/crypto/falcon/FalconSigner.cs
new file mode 100644
index 000000000..8af2f4c93
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconSigner.cs
@@ -0,0 +1,76 @@
+using System;
+using Org.BouncyCastle.Crypto;
+using Org.BouncyCastle.Pqc.Crypto;
+using Org.BouncyCastle.Security;
+using Org.BouncyCastle.Crypto.Parameters;
+using Org.BouncyCastle.Utilities;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    public class FalconSigner
+        : IMessageSigner
+    {
+        private byte[] encodedkey;
+        private FalconNIST nist;
+
+        public void Init(bool forSigning, ICipherParameters param)
+        {
+            if (forSigning)
+            {
+                if (param is ParametersWithRandom)
+                {
+                    FalconPrivateKeyParameters skparam = ((FalconPrivateKeyParameters)((ParametersWithRandom)param).Parameters);
+                    encodedkey = skparam.GetEncoded();
+                    nist = new FalconNIST(
+                        ((ParametersWithRandom)param).Random, 
+                        skparam.GetParameters().GetLogN(),
+                        skparam.GetParameters().GetNonceLength());
+                }
+                else
+                {
+                    FalconPrivateKeyParameters skparam = (FalconPrivateKeyParameters)param;
+                    encodedkey = ((FalconPrivateKeyParameters)param).GetEncoded();
+                    nist = new FalconNIST(
+                        new SecureRandom(),
+                        // CryptoServicesRegistrar.GetSecureRandom(),
+                        skparam.GetParameters().GetLogN(),
+                        skparam.GetParameters().GetNonceLength()
+                        ); 
+                        // TODO when CryptoServicesRegistrar has been implemented, use that instead
+
+                }
+            }
+            else
+            {
+                FalconPublicKeyParameters pkparam = (FalconPublicKeyParameters)param;
+                encodedkey = pkparam.GetEncoded();
+                nist = new FalconNIST(
+                    new SecureRandom(),
+                    // CryptoServicesRegistrar.GetSecureRandom()
+                    pkparam.GetParameters().GetLogN(),
+                    pkparam.GetParameters().GetNonceLength());
+            }
+        }
+
+        public byte[] GenerateSignature(byte[] message)
+        {
+            byte[] sm = new byte[nist.GetCryptoBytes()];
+
+            return nist.crypto_sign(sm, message, 0, (uint)message.Length, encodedkey, 0);
+        }
+
+        public bool VerifySignature(byte[] message, byte[] signature)
+        {
+            if (signature[0] != (byte)(0x30 + nist.GetLogn()))
+            {
+                return false;
+            }
+            byte[] nonce = new byte[nist.GetNonceLength()];
+            byte[] sig = new byte[signature.Length - nist.GetNonceLength() - 1];
+            Array.Copy(signature, 1, nonce, 0, nist.GetNonceLength());
+            Array.Copy(signature, nist.GetNonceLength() + 1, sig, 0, signature.Length - nist.GetNonceLength() - 1);
+            bool res = nist.crypto_sign_open(sig,nonce,message,encodedkey,0) == 0;
+            return res;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconSmallPrime.cs b/crypto/src/pqc/crypto/falcon/FalconSmallPrime.cs
new file mode 100644
index 000000000..83a7cdfaf
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconSmallPrime.cs
@@ -0,0 +1,46 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconSmallPrime
+    {
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#, this file corresponds to the small_prime type defined
+        * in keygen.c):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+        internal uint p;
+        internal uint g;
+        internal uint s;
+
+        internal FalconSmallPrime(uint p, uint g, uint s) {
+            this.p = p;
+            this.g = g;
+            this.s = s;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconSmallPrimes.cs b/crypto/src/pqc/crypto/falcon/FalconSmallPrimes.cs
new file mode 100644
index 000000000..dcefa7f05
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconSmallPrimes.cs
@@ -0,0 +1,536 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading.Tasks;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconSmallPrimes
+    {
+        internal FalconSmallPrimes() {}
+        internal FalconSmallPrime[] PRIMES = {
+            new FalconSmallPrime( 2147473409,  383167813,      10239 ),
+            new FalconSmallPrime( 2147389441,  211808905,  471403745 ),
+            new FalconSmallPrime( 2147387393,   37672282, 1329335065 ),
+            new FalconSmallPrime( 2147377153, 1977035326,  968223422 ),
+            new FalconSmallPrime( 2147358721, 1067163706,  132460015 ),
+            new FalconSmallPrime( 2147352577, 1606082042,  598693809 ),
+            new FalconSmallPrime( 2147346433, 2033915641, 1056257184 ),
+            new FalconSmallPrime( 2147338241, 1653770625,  421286710 ),
+            new FalconSmallPrime( 2147309569,  631200819, 1111201074 ),
+            new FalconSmallPrime( 2147297281, 2038364663, 1042003613 ),
+            new FalconSmallPrime( 2147295233, 1962540515,   19440033 ),
+            new FalconSmallPrime( 2147239937, 2100082663,  353296760 ),
+            new FalconSmallPrime( 2147235841, 1991153006, 1703918027 ),
+            new FalconSmallPrime( 2147217409,  516405114, 1258919613 ),
+            new FalconSmallPrime( 2147205121,  409347988, 1089726929 ),
+            new FalconSmallPrime( 2147196929,  927788991, 1946238668 ),
+            new FalconSmallPrime( 2147178497, 1136922411, 1347028164 ),
+            new FalconSmallPrime( 2147100673,  868626236,  701164723 ),
+            new FalconSmallPrime( 2147082241, 1897279176,  617820870 ),
+            new FalconSmallPrime( 2147074049, 1888819123,  158382189 ),
+            new FalconSmallPrime( 2147051521,   25006327,  522758543 ),
+            new FalconSmallPrime( 2147043329,  327546255,   37227845 ),
+            new FalconSmallPrime( 2147039233,  766324424, 1133356428 ),
+            new FalconSmallPrime( 2146988033, 1862817362,   73861329 ),
+            new FalconSmallPrime( 2146963457,  404622040,  653019435 ),
+            new FalconSmallPrime( 2146959361, 1936581214,  995143093 ),
+            new FalconSmallPrime( 2146938881, 1559770096,  634921513 ),
+            new FalconSmallPrime( 2146908161,  422623708, 1985060172 ),
+            new FalconSmallPrime( 2146885633, 1751189170,  298238186 ),
+            new FalconSmallPrime( 2146871297,  578919515,  291810829 ),
+            new FalconSmallPrime( 2146846721, 1114060353,  915902322 ),
+            new FalconSmallPrime( 2146834433, 2069565474,   47859524 ),
+            new FalconSmallPrime( 2146818049, 1552824584,  646281055 ),
+            new FalconSmallPrime( 2146775041, 1906267847, 1597832891 ),
+            new FalconSmallPrime( 2146756609, 1847414714, 1228090888 ),
+            new FalconSmallPrime( 2146744321, 1818792070, 1176377637 ),
+            new FalconSmallPrime( 2146738177, 1118066398, 1054971214 ),
+            new FalconSmallPrime( 2146736129,   52057278,  933422153 ),
+            new FalconSmallPrime( 2146713601,  592259376, 1406621510 ),
+            new FalconSmallPrime( 2146695169,  263161877, 1514178701 ),
+            new FalconSmallPrime( 2146656257,  685363115,  384505091 ),
+            new FalconSmallPrime( 2146650113,  927727032,  537575289 ),
+            new FalconSmallPrime( 2146646017,   52575506, 1799464037 ),
+            new FalconSmallPrime( 2146643969, 1276803876, 1348954416 ),
+            new FalconSmallPrime( 2146603009,  814028633, 1521547704 ),
+            new FalconSmallPrime( 2146572289, 1846678872, 1310832121 ),
+            new FalconSmallPrime( 2146547713,  919368090, 1019041349 ),
+            new FalconSmallPrime( 2146508801,  671847612,   38582496 ),
+            new FalconSmallPrime( 2146492417,  283911680,  532424562 ),
+            new FalconSmallPrime( 2146490369, 1780044827,  896447978 ),
+            new FalconSmallPrime( 2146459649,  327980850, 1327906900 ),
+            new FalconSmallPrime( 2146447361, 1310561493,  958645253 ),
+            new FalconSmallPrime( 2146441217,  412148926,  287271128 ),
+            new FalconSmallPrime( 2146437121,  293186449, 2009822534 ),
+            new FalconSmallPrime( 2146430977,  179034356, 1359155584 ),
+            new FalconSmallPrime( 2146418689, 1517345488, 1790248672 ),
+            new FalconSmallPrime( 2146406401, 1615820390, 1584833571 ),
+            new FalconSmallPrime( 2146404353,  826651445,  607120498 ),
+            new FalconSmallPrime( 2146379777,    3816988, 1897049071 ),
+            new FalconSmallPrime( 2146363393, 1221409784, 1986921567 ),
+            new FalconSmallPrime( 2146355201, 1388081168,  849968120 ),
+            new FalconSmallPrime( 2146336769, 1803473237, 1655544036 ),
+            new FalconSmallPrime( 2146312193, 1023484977,  273671831 ),
+            new FalconSmallPrime( 2146293761, 1074591448,  467406983 ),
+            new FalconSmallPrime( 2146283521,  831604668, 1523950494 ),
+            new FalconSmallPrime( 2146203649,  712865423, 1170834574 ),
+            new FalconSmallPrime( 2146154497, 1764991362, 1064856763 ),
+            new FalconSmallPrime( 2146142209,  627386213, 1406840151 ),
+            new FalconSmallPrime( 2146127873, 1638674429, 2088393537 ),
+            new FalconSmallPrime( 2146099201, 1516001018,  690673370 ),
+            new FalconSmallPrime( 2146093057, 1294931393,  315136610 ),
+            new FalconSmallPrime( 2146091009, 1942399533,  973539425 ),
+            new FalconSmallPrime( 2146078721, 1843461814, 2132275436 ),
+            new FalconSmallPrime( 2146060289, 1098740778,  360423481 ),
+            new FalconSmallPrime( 2146048001, 1617213232, 1951981294 ),
+            new FalconSmallPrime( 2146041857, 1805783169, 2075683489 ),
+            new FalconSmallPrime( 2146019329,  272027909, 1753219918 ),
+            new FalconSmallPrime( 2145986561, 1206530344, 2034028118 ),
+            new FalconSmallPrime( 2145976321, 1243769360, 1173377644 ),
+            new FalconSmallPrime( 2145964033,  887200839, 1281344586 ),
+            new FalconSmallPrime( 2145906689, 1651026455,  906178216 ),
+            new FalconSmallPrime( 2145875969, 1673238256, 1043521212 ),
+            new FalconSmallPrime( 2145871873, 1226591210, 1399796492 ),
+            new FalconSmallPrime( 2145841153, 1465353397, 1324527802 ),
+            new FalconSmallPrime( 2145832961, 1150638905,  554084759 ),
+            new FalconSmallPrime( 2145816577,  221601706,  427340863 ),
+            new FalconSmallPrime( 2145785857,  608896761,  316590738 ),
+            new FalconSmallPrime( 2145755137, 1712054942, 1684294304 ),
+            new FalconSmallPrime( 2145742849, 1302302867,  724873116 ),
+            new FalconSmallPrime( 2145728513,  516717693,  431671476 ),
+            new FalconSmallPrime( 2145699841,  524575579, 1619722537 ),
+            new FalconSmallPrime( 2145691649, 1925625239,  982974435 ),
+            new FalconSmallPrime( 2145687553,  463795662, 1293154300 ),
+            new FalconSmallPrime( 2145673217,  771716636,  881778029 ),
+            new FalconSmallPrime( 2145630209, 1509556977,  837364988 ),
+            new FalconSmallPrime( 2145595393,  229091856,  851648427 ),
+            new FalconSmallPrime( 2145587201, 1796903241,  635342424 ),
+            new FalconSmallPrime( 2145525761,  715310882, 1677228081 ),
+            new FalconSmallPrime( 2145495041, 1040930522,  200685896 ),
+            new FalconSmallPrime( 2145466369,  949804237, 1809146322 ),
+            new FalconSmallPrime( 2145445889, 1673903706,   95316881 ),
+            new FalconSmallPrime( 2145390593,  806941852, 1428671135 ),
+            new FalconSmallPrime( 2145372161, 1402525292,  159350694 ),
+            new FalconSmallPrime( 2145361921, 2124760298, 1589134749 ),
+            new FalconSmallPrime( 2145359873, 1217503067, 1561543010 ),
+            new FalconSmallPrime( 2145355777,  338341402,   83865711 ),
+            new FalconSmallPrime( 2145343489, 1381532164,  641430002 ),
+            new FalconSmallPrime( 2145325057, 1883895478, 1528469895 ),
+            new FalconSmallPrime( 2145318913, 1335370424,   65809740 ),
+            new FalconSmallPrime( 2145312769, 2000008042, 1919775760 ),
+            new FalconSmallPrime( 2145300481,  961450962, 1229540578 ),
+            new FalconSmallPrime( 2145282049,  910466767, 1964062701 ),
+            new FalconSmallPrime( 2145232897,  816527501,  450152063 ),
+            new FalconSmallPrime( 2145218561, 1435128058, 1794509700 ),
+            new FalconSmallPrime( 2145187841,   33505311, 1272467582 ),
+            new FalconSmallPrime( 2145181697,  269767433, 1380363849 ),
+            new FalconSmallPrime( 2145175553,   56386299, 1316870546 ),
+            new FalconSmallPrime( 2145079297, 2106880293, 1391797340 ),
+            new FalconSmallPrime( 2145021953, 1347906152,  720510798 ),
+            new FalconSmallPrime( 2145015809,  206769262, 1651459955 ),
+            new FalconSmallPrime( 2145003521, 1885513236, 1393381284 ),
+            new FalconSmallPrime( 2144960513, 1810381315,   31937275 ),
+            new FalconSmallPrime( 2144944129, 1306487838, 2019419520 ),
+            new FalconSmallPrime( 2144935937,   37304730, 1841489054 ),
+            new FalconSmallPrime( 2144894977, 1601434616,  157985831 ),
+            new FalconSmallPrime( 2144888833,   98749330, 2128592228 ),
+            new FalconSmallPrime( 2144880641, 1772327002, 2076128344 ),
+            new FalconSmallPrime( 2144864257, 1404514762, 2029969964 ),
+            new FalconSmallPrime( 2144827393,  801236594,  406627220 ),
+            new FalconSmallPrime( 2144806913,  349217443, 1501080290 ),
+            new FalconSmallPrime( 2144796673, 1542656776, 2084736519 ),
+            new FalconSmallPrime( 2144778241, 1210734884, 1746416203 ),
+            new FalconSmallPrime( 2144759809, 1146598851,  716464489 ),
+            new FalconSmallPrime( 2144757761,  286328400, 1823728177 ),
+            new FalconSmallPrime( 2144729089, 1347555695, 1836644881 ),
+            new FalconSmallPrime( 2144727041, 1795703790,  520296412 ),
+            new FalconSmallPrime( 2144696321, 1302475157,  852964281 ),
+            new FalconSmallPrime( 2144667649, 1075877614,  504992927 ),
+            new FalconSmallPrime( 2144573441,  198765808, 1617144982 ),
+            new FalconSmallPrime( 2144555009,  321528767,  155821259 ),
+            new FalconSmallPrime( 2144550913,  814139516, 1819937644 ),
+            new FalconSmallPrime( 2144536577,  571143206,  962942255 ),
+            new FalconSmallPrime( 2144524289, 1746733766,    2471321 ),
+            new FalconSmallPrime( 2144512001, 1821415077,  124190939 ),
+            new FalconSmallPrime( 2144468993,  917871546, 1260072806 ),
+            new FalconSmallPrime( 2144458753,  378417981, 1569240563 ),
+            new FalconSmallPrime( 2144421889,  175229668, 1825620763 ),
+            new FalconSmallPrime( 2144409601, 1699216963,  351648117 ),
+            new FalconSmallPrime( 2144370689, 1071885991,  958186029 ),
+            new FalconSmallPrime( 2144348161, 1763151227,  540353574 ),
+            new FalconSmallPrime( 2144335873, 1060214804,  919598847 ),
+            new FalconSmallPrime( 2144329729,  663515846, 1448552668 ),
+            new FalconSmallPrime( 2144327681, 1057776305,  590222840 ),
+            new FalconSmallPrime( 2144309249, 1705149168, 1459294624 ),
+            new FalconSmallPrime( 2144296961,  325823721, 1649016934 ),
+            new FalconSmallPrime( 2144290817,  738775789,  447427206 ),
+            new FalconSmallPrime( 2144243713,  962347618,  893050215 ),
+            new FalconSmallPrime( 2144237569, 1655257077,  900860862 ),
+            new FalconSmallPrime( 2144161793,  242206694, 1567868672 ),
+            new FalconSmallPrime( 2144155649,  769415308, 1247993134 ),
+            new FalconSmallPrime( 2144137217,  320492023,  515841070 ),
+            new FalconSmallPrime( 2144120833, 1639388522,  770877302 ),
+            new FalconSmallPrime( 2144071681, 1761785233,  964296120 ),
+            new FalconSmallPrime( 2144065537,  419817825,  204564472 ),
+            new FalconSmallPrime( 2144028673,  666050597, 2091019760 ),
+            new FalconSmallPrime( 2144010241, 1413657615, 1518702610 ),
+            new FalconSmallPrime( 2143952897, 1238327946,  475672271 ),
+            new FalconSmallPrime( 2143940609,  307063413, 1176750846 ),
+            new FalconSmallPrime( 2143918081, 2062905559,  786785803 ),
+            new FalconSmallPrime( 2143899649, 1338112849, 1562292083 ),
+            new FalconSmallPrime( 2143891457,   68149545,   87166451 ),
+            new FalconSmallPrime( 2143885313,  921750778,  394460854 ),
+            new FalconSmallPrime( 2143854593,  719766593,  133877196 ),
+            new FalconSmallPrime( 2143836161, 1149399850, 1861591875 ),
+            new FalconSmallPrime( 2143762433, 1848739366, 1335934145 ),
+            new FalconSmallPrime( 2143756289, 1326674710,  102999236 ),
+            new FalconSmallPrime( 2143713281,  808061791, 1156900308 ),
+            new FalconSmallPrime( 2143690753,  388399459, 1926468019 ),
+            new FalconSmallPrime( 2143670273, 1427891374, 1756689401 ),
+            new FalconSmallPrime( 2143666177, 1912173949,  986629565 ),
+            new FalconSmallPrime( 2143645697, 2041160111,  371842865 ),
+            new FalconSmallPrime( 2143641601, 1279906897, 2023974350 ),
+            new FalconSmallPrime( 2143635457,  720473174, 1389027526 ),
+            new FalconSmallPrime( 2143621121, 1298309455, 1732632006 ),
+            new FalconSmallPrime( 2143598593, 1548762216, 1825417506 ),
+            new FalconSmallPrime( 2143567873,  620475784, 1073787233 ),
+            new FalconSmallPrime( 2143561729, 1932954575,  949167309 ),
+            new FalconSmallPrime( 2143553537,  354315656, 1652037534 ),
+            new FalconSmallPrime( 2143541249,  577424288, 1097027618 ),
+            new FalconSmallPrime( 2143531009,  357862822,  478640055 ),
+            new FalconSmallPrime( 2143522817, 2017706025, 1550531668 ),
+            new FalconSmallPrime( 2143506433, 2078127419, 1824320165 ),
+            new FalconSmallPrime( 2143488001,  613475285, 1604011510 ),
+            new FalconSmallPrime( 2143469569, 1466594987,  502095196 ),
+            new FalconSmallPrime( 2143426561, 1115430331, 1044637111 ),
+            new FalconSmallPrime( 2143383553,    9778045, 1902463734 ),
+            new FalconSmallPrime( 2143377409, 1557401276, 2056861771 ),
+            new FalconSmallPrime( 2143363073,  652036455, 1965915971 ),
+            new FalconSmallPrime( 2143260673, 1464581171, 1523257541 ),
+            new FalconSmallPrime( 2143246337, 1876119649,  764541916 ),
+            new FalconSmallPrime( 2143209473, 1614992673, 1920672844 ),
+            new FalconSmallPrime( 2143203329,  981052047, 2049774209 ),
+            new FalconSmallPrime( 2143160321, 1847355533,  728535665 ),
+            new FalconSmallPrime( 2143129601,  965558457,  603052992 ),
+            new FalconSmallPrime( 2143123457, 2140817191,    8348679 ),
+            new FalconSmallPrime( 2143100929, 1547263683,  694209023 ),
+            new FalconSmallPrime( 2143092737,  643459066, 1979934533 ),
+            new FalconSmallPrime( 2143082497,  188603778, 2026175670 ),
+            new FalconSmallPrime( 2143062017, 1657329695,  377451099 ),
+            new FalconSmallPrime( 2143051777,  114967950,  979255473 ),
+            new FalconSmallPrime( 2143025153, 1698431342, 1449196896 ),
+            new FalconSmallPrime( 2143006721, 1862741675, 1739650365 ),
+            new FalconSmallPrime( 2142996481,  756660457,  996160050 ),
+            new FalconSmallPrime( 2142976001,  927864010, 1166847574 ),
+            new FalconSmallPrime( 2142965761,  905070557,  661974566 ),
+            new FalconSmallPrime( 2142916609,   40932754, 1787161127 ),
+            new FalconSmallPrime( 2142892033, 1987985648,  675335382 ),
+            new FalconSmallPrime( 2142885889,  797497211, 1323096997 ),
+            new FalconSmallPrime( 2142871553, 2068025830, 1411877159 ),
+            new FalconSmallPrime( 2142861313, 1217177090, 1438410687 ),
+            new FalconSmallPrime( 2142830593,  409906375, 1767860634 ),
+            new FalconSmallPrime( 2142803969, 1197788993,  359782919 ),
+            new FalconSmallPrime( 2142785537,  643817365,  513932862 ),
+            new FalconSmallPrime( 2142779393, 1717046338,  218943121 ),
+            new FalconSmallPrime( 2142724097,   89336830,  416687049 ),
+            new FalconSmallPrime( 2142707713,    5944581, 1356813523 ),
+            new FalconSmallPrime( 2142658561,  887942135, 2074011722 ),
+            new FalconSmallPrime( 2142638081,  151851972, 1647339939 ),
+            new FalconSmallPrime( 2142564353, 1691505537, 1483107336 ),
+            new FalconSmallPrime( 2142533633, 1989920200, 1135938817 ),
+            new FalconSmallPrime( 2142529537,  959263126, 1531961857 ),
+            new FalconSmallPrime( 2142527489,  453251129, 1725566162 ),
+            new FalconSmallPrime( 2142502913, 1536028102,  182053257 ),
+            new FalconSmallPrime( 2142498817,  570138730,  701443447 ),
+            new FalconSmallPrime( 2142416897,  326965800,  411931819 ),
+            new FalconSmallPrime( 2142363649, 1675665410, 1517191733 ),
+            new FalconSmallPrime( 2142351361,  968529566, 1575712703 ),
+            new FalconSmallPrime( 2142330881, 1384953238, 1769087884 ),
+            new FalconSmallPrime( 2142314497, 1977173242, 1833745524 ),
+            new FalconSmallPrime( 2142289921,   95082313, 1714775493 ),
+            new FalconSmallPrime( 2142283777,  109377615, 1070584533 ),
+            new FalconSmallPrime( 2142277633,   16960510,  702157145 ),
+            new FalconSmallPrime( 2142263297,  553850819,  431364395 ),
+            new FalconSmallPrime( 2142208001,  241466367, 2053967982 ),
+            new FalconSmallPrime( 2142164993, 1795661326, 1031836848 ),
+            new FalconSmallPrime( 2142097409, 1212530046,  712772031 ),
+            new FalconSmallPrime( 2142087169, 1763869720,  822276067 ),
+            new FalconSmallPrime( 2142078977,  644065713, 1765268066 ),
+            new FalconSmallPrime( 2142074881,  112671944,  643204925 ),
+            new FalconSmallPrime( 2142044161, 1387785471, 1297890174 ),
+            new FalconSmallPrime( 2142025729,  783885537, 1000425730 ),
+            new FalconSmallPrime( 2142011393,  905662232, 1679401033 ),
+            new FalconSmallPrime( 2141974529,  799788433,  468119557 ),
+            new FalconSmallPrime( 2141943809, 1932544124,  449305555 ),
+            new FalconSmallPrime( 2141933569, 1527403256,  841867925 ),
+            new FalconSmallPrime( 2141931521, 1247076451,  743823916 ),
+            new FalconSmallPrime( 2141902849, 1199660531,  401687910 ),
+            new FalconSmallPrime( 2141890561,  150132350, 1720336972 ),
+            new FalconSmallPrime( 2141857793, 1287438162,  663880489 ),
+            new FalconSmallPrime( 2141833217,  618017731, 1819208266 ),
+            new FalconSmallPrime( 2141820929,  999578638, 1403090096 ),
+            new FalconSmallPrime( 2141786113,   81834325, 1523542501 ),
+            new FalconSmallPrime( 2141771777,  120001928,  463556492 ),
+            new FalconSmallPrime( 2141759489,  122455485, 2124928282 ),
+            new FalconSmallPrime( 2141749249,  141986041,  940339153 ),
+            new FalconSmallPrime( 2141685761,  889088734,  477141499 ),
+            new FalconSmallPrime( 2141673473,  324212681, 1122558298 ),
+            new FalconSmallPrime( 2141669377, 1175806187, 1373818177 ),
+            new FalconSmallPrime( 2141655041, 1113654822,  296887082 ),
+            new FalconSmallPrime( 2141587457,  991103258, 1585913875 ),
+            new FalconSmallPrime( 2141583361, 1401451409, 1802457360 ),
+            new FalconSmallPrime( 2141575169, 1571977166,  712760980 ),
+            new FalconSmallPrime( 2141546497, 1107849376, 1250270109 ),
+            new FalconSmallPrime( 2141515777,  196544219,  356001130 ),
+            new FalconSmallPrime( 2141495297, 1733571506, 1060744866 ),
+            new FalconSmallPrime( 2141483009,  321552363, 1168297026 ),
+            new FalconSmallPrime( 2141458433,  505818251,  733225819 ),
+            new FalconSmallPrime( 2141360129, 1026840098,  948342276 ),
+            new FalconSmallPrime( 2141325313,  945133744, 2129965998 ),
+            new FalconSmallPrime( 2141317121, 1871100260, 1843844634 ),
+            new FalconSmallPrime( 2141286401, 1790639498, 1750465696 ),
+            new FalconSmallPrime( 2141267969, 1376858592,  186160720 ),
+            new FalconSmallPrime( 2141255681, 2129698296, 1876677959 ),
+            new FalconSmallPrime( 2141243393, 2138900688, 1340009628 ),
+            new FalconSmallPrime( 2141214721, 1933049835, 1087819477 ),
+            new FalconSmallPrime( 2141212673, 1898664939, 1786328049 ),
+            new FalconSmallPrime( 2141202433,  990234828,  940682169 ),
+            new FalconSmallPrime( 2141175809, 1406392421,  993089586 ),
+            new FalconSmallPrime( 2141165569, 1263518371,  289019479 ),
+            new FalconSmallPrime( 2141073409, 1485624211,  507864514 ),
+            new FalconSmallPrime( 2141052929, 1885134788,  311252465 ),
+            new FalconSmallPrime( 2141040641, 1285021247,  280941862 ),
+            new FalconSmallPrime( 2141028353, 1527610374,  375035110 ),
+            new FalconSmallPrime( 2141011969, 1400626168,  164696620 ),
+            new FalconSmallPrime( 2140999681,  632959608,  966175067 ),
+            new FalconSmallPrime( 2140997633, 2045628978, 1290889438 ),
+            new FalconSmallPrime( 2140993537, 1412755491,  375366253 ),
+            new FalconSmallPrime( 2140942337,  719477232,  785367828 ),
+            new FalconSmallPrime( 2140925953,   45224252,  836552317 ),
+            new FalconSmallPrime( 2140917761, 1157376588, 1001839569 ),
+            new FalconSmallPrime( 2140887041,  278480752, 2098732796 ),
+            new FalconSmallPrime( 2140837889, 1663139953,  924094810 ),
+            new FalconSmallPrime( 2140788737,  802501511, 2045368990 ),
+            new FalconSmallPrime( 2140766209, 1820083885, 1800295504 ),
+            new FalconSmallPrime( 2140764161, 1169561905, 2106792035 ),
+            new FalconSmallPrime( 2140696577,  127781498, 1885987531 ),
+            new FalconSmallPrime( 2140684289,   16014477, 1098116827 ),
+            new FalconSmallPrime( 2140653569,  665960598, 1796728247 ),
+            new FalconSmallPrime( 2140594177, 1043085491,  377310938 ),
+            new FalconSmallPrime( 2140579841, 1732838211, 1504505945 ),
+            new FalconSmallPrime( 2140569601,  302071939,  358291016 ),
+            new FalconSmallPrime( 2140567553,  192393733, 1909137143 ),
+            new FalconSmallPrime( 2140557313,  406595731, 1175330270 ),
+            new FalconSmallPrime( 2140549121, 1748850918,  525007007 ),
+            new FalconSmallPrime( 2140477441,  499436566, 1031159814 ),
+            new FalconSmallPrime( 2140469249, 1886004401, 1029951320 ),
+            new FalconSmallPrime( 2140426241, 1483168100, 1676273461 ),
+            new FalconSmallPrime( 2140420097, 1779917297,  846024476 ),
+            new FalconSmallPrime( 2140413953,  522948893, 1816354149 ),
+            new FalconSmallPrime( 2140383233, 1931364473, 1296921241 ),
+            new FalconSmallPrime( 2140366849, 1917356555,  147196204 ),
+            new FalconSmallPrime( 2140354561,   16466177, 1349052107 ),
+            new FalconSmallPrime( 2140348417, 1875366972, 1860485634 ),
+            new FalconSmallPrime( 2140323841,  456498717, 1790256483 ),
+            new FalconSmallPrime( 2140321793, 1629493973,  150031888 ),
+            new FalconSmallPrime( 2140315649, 1904063898,  395510935 ),
+            new FalconSmallPrime( 2140280833, 1784104328,  831417909 ),
+            new FalconSmallPrime( 2140250113,  256087139,  697349101 ),
+            new FalconSmallPrime( 2140229633,  388553070,  243875754 ),
+            new FalconSmallPrime( 2140223489,  747459608, 1396270850 ),
+            new FalconSmallPrime( 2140200961,  507423743, 1895572209 ),
+            new FalconSmallPrime( 2140162049,  580106016, 2045297469 ),
+            new FalconSmallPrime( 2140149761,  712426444,  785217995 ),
+            new FalconSmallPrime( 2140137473, 1441607584,  536866543 ),
+            new FalconSmallPrime( 2140119041,  346538902, 1740434653 ),
+            new FalconSmallPrime( 2140090369,  282642885,   21051094 ),
+            new FalconSmallPrime( 2140076033, 1407456228,  319910029 ),
+            new FalconSmallPrime( 2140047361, 1619330500, 1488632070 ),
+            new FalconSmallPrime( 2140041217, 2089408064, 2012026134 ),
+            new FalconSmallPrime( 2140008449, 1705524800, 1613440760 ),
+            new FalconSmallPrime( 2139924481, 1846208233, 1280649481 ),
+            new FalconSmallPrime( 2139906049,  989438755, 1185646076 ),
+            new FalconSmallPrime( 2139867137, 1522314850,  372783595 ),
+            new FalconSmallPrime( 2139842561, 1681587377,  216848235 ),
+            new FalconSmallPrime( 2139826177, 2066284988, 1784999464 ),
+            new FalconSmallPrime( 2139824129,  480888214, 1513323027 ),
+            new FalconSmallPrime( 2139789313,  847937200,  858192859 ),
+            new FalconSmallPrime( 2139783169, 1642000434, 1583261448 ),
+            new FalconSmallPrime( 2139770881,  940699589,  179702100 ),
+            new FalconSmallPrime( 2139768833,  315623242,  964612676 ),
+            new FalconSmallPrime( 2139666433,  331649203,  764666914 ),
+            new FalconSmallPrime( 2139641857, 2118730799, 1313764644 ),
+            new FalconSmallPrime( 2139635713,  519149027,  519212449 ),
+            new FalconSmallPrime( 2139598849, 1526413634, 1769667104 ),
+            new FalconSmallPrime( 2139574273,  551148610,  820739925 ),
+            new FalconSmallPrime( 2139568129, 1386800242,  472447405 ),
+            new FalconSmallPrime( 2139549697,  813760130, 1412328531 ),
+            new FalconSmallPrime( 2139537409, 1615286260, 1609362979 ),
+            new FalconSmallPrime( 2139475969, 1352559299, 1696720421 ),
+            new FalconSmallPrime( 2139455489, 1048691649, 1584935400 ),
+            new FalconSmallPrime( 2139432961,  836025845,  950121150 ),
+            new FalconSmallPrime( 2139424769, 1558281165, 1635486858 ),
+            new FalconSmallPrime( 2139406337, 1728402143, 1674423301 ),
+            new FalconSmallPrime( 2139396097, 1727715782, 1483470544 ),
+            new FalconSmallPrime( 2139383809, 1092853491, 1741699084 ),
+            new FalconSmallPrime( 2139369473,  690776899, 1242798709 ),
+            new FalconSmallPrime( 2139351041, 1768782380, 2120712049 ),
+            new FalconSmallPrime( 2139334657, 1739968247, 1427249225 ),
+            new FalconSmallPrime( 2139332609, 1547189119,  623011170 ),
+            new FalconSmallPrime( 2139310081, 1346827917, 1605466350 ),
+            new FalconSmallPrime( 2139303937,  369317948,  828392831 ),
+            new FalconSmallPrime( 2139301889, 1560417239, 1788073219 ),
+            new FalconSmallPrime( 2139283457, 1303121623,  595079358 ),
+            new FalconSmallPrime( 2139248641, 1354555286,  573424177 ),
+            new FalconSmallPrime( 2139240449,   60974056,  885781403 ),
+            new FalconSmallPrime( 2139222017,  355573421, 1221054839 ),
+            new FalconSmallPrime( 2139215873,  566477826, 1724006500 ),
+            new FalconSmallPrime( 2139150337,  871437673, 1609133294 ),
+            new FalconSmallPrime( 2139144193, 1478130914, 1137491905 ),
+            new FalconSmallPrime( 2139117569, 1854880922,  964728507 ),
+            new FalconSmallPrime( 2139076609,  202405335,  756508944 ),
+            new FalconSmallPrime( 2139062273, 1399715741,  884826059 ),
+            new FalconSmallPrime( 2139045889, 1051045798, 1202295476 ),
+            new FalconSmallPrime( 2139033601, 1707715206,  632234634 ),
+            new FalconSmallPrime( 2139006977, 2035853139,  231626690 ),
+            new FalconSmallPrime( 2138951681,  183867876,  838350879 ),
+            new FalconSmallPrime( 2138945537, 1403254661,  404460202 ),
+            new FalconSmallPrime( 2138920961,  310865011, 1282911681 ),
+            new FalconSmallPrime( 2138910721, 1328496553,  103472415 ),
+            new FalconSmallPrime( 2138904577,   78831681,  993513549 ),
+            new FalconSmallPrime( 2138902529, 1319697451, 1055904361 ),
+            new FalconSmallPrime( 2138816513,  384338872, 1706202469 ),
+            new FalconSmallPrime( 2138810369, 1084868275,  405677177 ),
+            new FalconSmallPrime( 2138787841,  401181788, 1964773901 ),
+            new FalconSmallPrime( 2138775553, 1850532988, 1247087473 ),
+            new FalconSmallPrime( 2138767361,  874261901, 1576073565 ),
+            new FalconSmallPrime( 2138757121, 1187474742,  993541415 ),
+            new FalconSmallPrime( 2138748929, 1782458888, 1043206483 ),
+            new FalconSmallPrime( 2138744833, 1221500487,  800141243 ),
+            new FalconSmallPrime( 2138738689,  413465368, 1450660558 ),
+            new FalconSmallPrime( 2138695681,  739045140,  342611472 ),
+            new FalconSmallPrime( 2138658817, 1355845756,  672674190 ),
+            new FalconSmallPrime( 2138644481,  608379162, 1538874380 ),
+            new FalconSmallPrime( 2138632193, 1444914034,  686911254 ),
+            new FalconSmallPrime( 2138607617,  484707818, 1435142134 ),
+            new FalconSmallPrime( 2138591233,  539460669, 1290458549 ),
+            new FalconSmallPrime( 2138572801, 2093538990, 2011138646 ),
+            new FalconSmallPrime( 2138552321, 1149786988, 1076414907 ),
+            new FalconSmallPrime( 2138546177,  840688206, 2108985273 ),
+            new FalconSmallPrime( 2138533889,  209669619,  198172413 ),
+            new FalconSmallPrime( 2138523649, 1975879426, 1277003968 ),
+            new FalconSmallPrime( 2138490881, 1351891144, 1976858109 ),
+            new FalconSmallPrime( 2138460161, 1817321013, 1979278293 ),
+            new FalconSmallPrime( 2138429441, 1950077177,  203441928 ),
+            new FalconSmallPrime( 2138400769,  908970113,  628395069 ),
+            new FalconSmallPrime( 2138398721,  219890864,  758486760 ),
+            new FalconSmallPrime( 2138376193, 1306654379,  977554090 ),
+            new FalconSmallPrime( 2138351617,  298822498, 2004708503 ),
+            new FalconSmallPrime( 2138337281,  441457816, 1049002108 ),
+            new FalconSmallPrime( 2138320897, 1517731724, 1442269609 ),
+            new FalconSmallPrime( 2138290177, 1355911197, 1647139103 ),
+            new FalconSmallPrime( 2138234881,  531313247, 1746591962 ),
+            new FalconSmallPrime( 2138214401, 1899410930,  781416444 ),
+            new FalconSmallPrime( 2138202113, 1813477173, 1622508515 ),
+            new FalconSmallPrime( 2138191873, 1086458299, 1025408615 ),
+            new FalconSmallPrime( 2138183681, 1998800427,  827063290 ),
+            new FalconSmallPrime( 2138173441, 1921308898,  749670117 ),
+            new FalconSmallPrime( 2138103809, 1620902804, 2126787647 ),
+            new FalconSmallPrime( 2138099713,  828647069, 1892961817 ),
+            new FalconSmallPrime( 2138085377,  179405355, 1525506535 ),
+            new FalconSmallPrime( 2138060801,  615683235, 1259580138 ),
+            new FalconSmallPrime( 2138044417, 2030277840, 1731266562 ),
+            new FalconSmallPrime( 2138042369, 2087222316, 1627902259 ),
+            new FalconSmallPrime( 2138032129,  126388712, 1108640984 ),
+            new FalconSmallPrime( 2138011649,  715026550, 1017980050 ),
+            new FalconSmallPrime( 2137993217, 1693714349, 1351778704 ),
+            new FalconSmallPrime( 2137888769, 1289762259, 1053090405 ),
+            new FalconSmallPrime( 2137853953,  199991890, 1254192789 ),
+            new FalconSmallPrime( 2137833473,  941421685,  896995556 ),
+            new FalconSmallPrime( 2137817089,  750416446, 1251031181 ),
+            new FalconSmallPrime( 2137792513,  798075119,  368077456 ),
+            new FalconSmallPrime( 2137786369,  878543495, 1035375025 ),
+            new FalconSmallPrime( 2137767937,    9351178, 1156563902 ),
+            new FalconSmallPrime( 2137755649, 1382297614, 1686559583 ),
+            new FalconSmallPrime( 2137724929, 1345472850, 1681096331 ),
+            new FalconSmallPrime( 2137704449,  834666929,  630551727 ),
+            new FalconSmallPrime( 2137673729, 1646165729, 1892091571 ),
+            new FalconSmallPrime( 2137620481,  778943821,   48456461 ),
+            new FalconSmallPrime( 2137618433, 1730837875, 1713336725 ),
+            new FalconSmallPrime( 2137581569,  805610339, 1378891359 ),
+            new FalconSmallPrime( 2137538561,  204342388, 1950165220 ),
+            new FalconSmallPrime( 2137526273, 1947629754, 1500789441 ),
+            new FalconSmallPrime( 2137516033,  719902645, 1499525372 ),
+            new FalconSmallPrime( 2137491457,  230451261,  556382829 ),
+            new FalconSmallPrime( 2137440257,  979573541,  412760291 ),
+            new FalconSmallPrime( 2137374721,  927841248, 1954137185 ),
+            new FalconSmallPrime( 2137362433, 1243778559,  861024672 ),
+            new FalconSmallPrime( 2137313281, 1341338501,  980638386 ),
+            new FalconSmallPrime( 2137311233,  937415182, 1793212117 ),
+            new FalconSmallPrime( 2137255937,  795331324, 1410253405 ),
+            new FalconSmallPrime( 2137243649,  150756339, 1966999887 ),
+            new FalconSmallPrime( 2137182209,  163346914, 1939301431 ),
+            new FalconSmallPrime( 2137171969, 1952552395,  758913141 ),
+            new FalconSmallPrime( 2137159681,  570788721,  218668666 ),
+            new FalconSmallPrime( 2137147393, 1896656810, 2045670345 ),
+            new FalconSmallPrime( 2137141249,  358493842,  518199643 ),
+            new FalconSmallPrime( 2137139201, 1505023029,  674695848 ),
+            new FalconSmallPrime( 2137133057,   27911103,  830956306 ),
+            new FalconSmallPrime( 2137122817,  439771337, 1555268614 ),
+            new FalconSmallPrime( 2137116673,  790988579, 1871449599 ),
+            new FalconSmallPrime( 2137110529,  432109234,  811805080 ),
+            new FalconSmallPrime( 2137102337, 1357900653, 1184997641 ),
+            new FalconSmallPrime( 2137098241,  515119035, 1715693095 ),
+            new FalconSmallPrime( 2137090049,  408575203, 2085660657 ),
+            new FalconSmallPrime( 2137085953, 2097793407, 1349626963 ),
+            new FalconSmallPrime( 2137055233, 1556739954, 1449960883 ),
+            new FalconSmallPrime( 2137030657, 1545758650, 1369303716 ),
+            new FalconSmallPrime( 2136987649,  332602570,  103875114 ),
+            new FalconSmallPrime( 2136969217, 1499989506, 1662964115 ),
+            new FalconSmallPrime( 2136924161,  857040753,    4738842 ),
+            new FalconSmallPrime( 2136895489, 1948872712,  570436091 ),
+            new FalconSmallPrime( 2136893441,   58969960, 1568349634 ),
+            new FalconSmallPrime( 2136887297, 2127193379,  273612548 ),
+            new FalconSmallPrime( 2136850433,  111208983, 1181257116 ),
+            new FalconSmallPrime( 2136809473, 1627275942, 1680317971 ),
+            new FalconSmallPrime( 2136764417, 1574888217,   14011331 ),
+            new FalconSmallPrime( 2136741889,   14011055, 1129154251 ),
+            new FalconSmallPrime( 2136727553,   35862563, 1838555253 ),
+            new FalconSmallPrime( 2136721409,  310235666, 1363928244 ),
+            new FalconSmallPrime( 2136698881, 1612429202, 1560383828 ),
+            new FalconSmallPrime( 2136649729, 1138540131,  800014364 ),
+            new FalconSmallPrime( 2136606721,  602323503, 1433096652 ),
+            new FalconSmallPrime( 2136563713,  182209265, 1919611038 ),
+            new FalconSmallPrime( 2136555521,  324156477,  165591039 ),
+            new FalconSmallPrime( 2136549377,  195513113,  217165345 ),
+            new FalconSmallPrime( 2136526849, 1050768046,  939647887 ),
+            new FalconSmallPrime( 2136508417, 1886286237, 1619926572 ),
+            new FalconSmallPrime( 2136477697,  609647664,   35065157 ),
+            new FalconSmallPrime( 2136471553,  679352216, 1452259468 ),
+            new FalconSmallPrime( 2136457217,  128630031,  824816521 ),
+            new FalconSmallPrime( 2136422401,   19787464, 1526049830 ),
+            new FalconSmallPrime( 2136420353,  698316836, 1530623527 ),
+            new FalconSmallPrime( 2136371201, 1651862373, 1804812805 ),
+            new FalconSmallPrime( 2136334337,  326596005,  336977082 ),
+            new FalconSmallPrime( 2136322049,   63253370, 1904972151 ),
+            new FalconSmallPrime( 2136297473,  312176076,  172182411 ),
+            new FalconSmallPrime( 2136248321,  381261841,  369032670 ),
+            new FalconSmallPrime( 2136242177,  358688773, 1640007994 ),
+            new FalconSmallPrime( 2136229889,  512677188,   75585225 ),
+            new FalconSmallPrime( 2136219649, 2095003250, 1970086149 ),
+            new FalconSmallPrime( 2136207361, 1909650722,  537760675 ),
+            new FalconSmallPrime( 2136176641, 1334616195, 1533487619 ),
+            new FalconSmallPrime( 2136158209, 2096285632, 1793285210 ),
+            new FalconSmallPrime( 2136143873, 1897347517,  293843959 ),
+            new FalconSmallPrime( 2136133633,  923586222, 1022655978 ),
+            new FalconSmallPrime( 2136096769, 1464868191, 1515074410 ),
+            new FalconSmallPrime( 2136094721, 2020679520, 2061636104 ),
+            new FalconSmallPrime( 2136076289,  290798503, 1814726809 ),
+            new FalconSmallPrime( 2136041473,  156415894, 1250757633 ),
+            new FalconSmallPrime( 2135996417,  297459940, 1132158924 ),
+            new FalconSmallPrime( 2135955457,  538755304, 1688831340 ),
+            new FalconSmallPrime( 0, 0, 0 )
+            };
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/FalconVrfy.cs b/crypto/src/pqc/crypto/falcon/FalconVrfy.cs
new file mode 100644
index 000000000..4f28a77d9
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/FalconVrfy.cs
@@ -0,0 +1,860 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class FalconVrfy
+    {
+        FalconCommon common;
+        internal FalconVrfy() {
+            this.common = new FalconCommon();
+        }
+        internal FalconVrfy(FalconCommon common) {
+            this.common = common;
+        }
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+        /* ===================================================================== */
+        /*
+        * Constants for NTT.
+        *
+        *   n = 2^logn  (2 <= n <= 1024)
+        *   phi = X^n + 1
+        *   q = 12289
+        *   q0i = -1/q mod 2^16
+        *   R = 2^16 mod q
+        *   R2 = 2^32 mod q
+        */
+
+        const int Q = 12289;
+        const int Q0I = 12287;
+        const int R = 4091;
+        const int R2 = 10952;
+
+        /*
+        * Table for NTT, binary case:
+        *   GMb[x] = R*(g^rev(x)) mod q
+        * where g = 7 (it is a 2048-th primitive root of 1 modulo q)
+        * and rev() is the bit-reversal function over 10 bits.
+        */
+        internal ushort[] GMb = {
+            4091,  7888, 11060, 11208,  6960,  4342,  6275,  9759,
+            1591,  6399,  9477,  5266,   586,  5825,  7538,  9710,
+            1134,  6407,  1711,   965,  7099,  7674,  3743,  6442,
+            10414,  8100,  1885,  1688,  1364, 10329, 10164,  9180,
+            12210,  6240,   997,   117,  4783,  4407,  1549,  7072,
+            2829,  6458,  4431,  8877,  7144,  2564,  5664,  4042,
+            12189,   432, 10751,  1237,  7610,  1534,  3983,  7863,
+            2181,  6308,  8720,  6570,  4843,  1690,    14,  3872,
+            5569,  9368, 12163,  2019,  7543,  2315,  4673,  7340,
+            1553,  1156,  8401, 11389,  1020,  2967, 10772,  7045,
+            3316, 11236,  5285, 11578, 10637, 10086,  9493,  6180,
+            9277,  6130,  3323,   883, 10469,   489,  1502,  2851,
+            11061,  9729,  2742, 12241,  4970, 10481, 10078,  1195,
+            730,  1762,  3854,  2030,  5892, 10922,  9020,  5274,
+            9179,  3604,  3782, 10206,  3180,  3467,  4668,  2446,
+            7613,  9386,   834,  7703,  6836,  3403,  5351, 12276,
+            3580,  1739, 10820,  9787, 10209,  4070, 12250,  8525,
+            10401,  2749,  7338, 10574,  6040,   943,  9330,  1477,
+            6865,  9668,  3585,  6633, 12145,  4063,  3684,  7680,
+            8188,  6902,  3533,  9807,  6090,   727, 10099,  7003,
+            6945,  1949,  9731, 10559,  6057,   378,  7871,  8763,
+            8901,  9229,  8846,  4551,  9589, 11664,  7630,  8821,
+            5680,  4956,  6251,  8388, 10156,  8723,  2341,  3159,
+            1467,  5460,  8553,  7783,  2649,  2320,  9036,  6188,
+            737,  3698,  4699,  5753,  9046,  3687,    16,   914,
+            5186, 10531,  4552,  1964,  3509,  8436,  7516,  5381,
+            10733,  3281,  7037,  1060,  2895,  7156,  8887,  5357,
+            6409,  8197,  2962,  6375,  5064,  6634,  5625,   278,
+            932, 10229,  8927,  7642,   351,  9298,   237,  5858,
+            7692,  3146, 12126,  7586,  2053, 11285,  3802,  5204,
+            4602,  1748, 11300,   340,  3711,  4614,   300, 10993,
+            5070, 10049, 11616, 12247,  7421, 10707,  5746,  5654,
+            3835,  5553,  1224,  8476,  9237,  3845,   250, 11209,
+            4225,  6326,  9680, 12254,  4136,  2778,   692,  8808,
+            6410,  6718, 10105, 10418,  3759,  7356, 11361,  8433,
+            6437,  3652,  6342,  8978,  5391,  2272,  6476,  7416,
+            8418, 10824, 11986,  5733,   876,  7030,  2167,  2436,
+            3442,  9217,  8206,  4858,  5964,  2746,  7178,  1434,
+            7389,  8879, 10661, 11457,  4220,  1432, 10832,  4328,
+            8557,  1867,  9454,  2416,  3816,  9076,   686,  5393,
+            2523,  4339,  6115,   619,   937,  2834,  7775,  3279,
+            2363,  7488,  6112,  5056,   824, 10204, 11690,  1113,
+            2727,  9848,   896,  2028,  5075,  2654, 10464,  7884,
+            12169,  5434,  3070,  6400,  9132, 11672, 12153,  4520,
+            1273,  9739, 11468,  9937, 10039,  9720,  2262,  9399,
+            11192,   315,  4511,  1158,  6061,  6751, 11865,   357,
+            7367,  4550,   983,  8534,  8352, 10126,  7530,  9253,
+            4367,  5221,  3999,  8777,  3161,  6990,  4130, 11652,
+            3374, 11477,  1753,   292,  8681,  2806, 10378, 12188,
+            5800, 11811,  3181,  1988,  1024,  9340,  2477, 10928,
+            4582,  6750,  3619,  5503,  5233,  2463,  8470,  7650,
+            7964,  6395,  1071,  1272,  3474, 11045,  3291, 11344,
+            8502,  9478,  9837,  1253,  1857,  6233,  4720, 11561,
+            6034,  9817,  3339,  1797,  2879,  6242,  5200,  2114,
+            7962,  9353, 11363,  5475,  6084,  9601,  4108,  7323,
+            10438,  9471,  1271,   408,  6911,  3079,   360,  8276,
+            11535,  9156,  9049, 11539,   850,  8617,   784,  7919,
+            8334, 12170,  1846, 10213, 12184,  7827, 11903,  5600,
+            9779,  1012,   721,  2784,  6676,  6552,  5348,  4424,
+            6816,  8405,  9959,  5150,  2356,  5552,  5267,  1333,
+            8801,  9661,  7308,  5788,  4910,   909, 11613,  4395,
+            8238,  6686,  4302,  3044,  2285, 12249,  1963,  9216,
+            4296, 11918,   695,  4371,  9793,  4884,  2411, 10230,
+            2650,   841,  3890, 10231,  7248,  8505, 11196,  6688,
+            4059,  6060,  3686,  4722, 11853,  5816,  7058,  6868,
+            11137,  7926,  4894, 12284,  4102,  3908,  3610,  6525,
+            7938,  7982, 11977,  6755,   537,  4562,  1623,  8227,
+            11453,  7544,   906, 11816,  9548, 10858,  9703,  2815,
+            11736,  6813,  6979,   819,  8903,  6271, 10843,   348,
+            7514,  8339,  6439,   694,   852,  5659,  2781,  3716,
+            11589,  3024,  1523,  8659,  4114, 10738,  3303,  5885,
+            2978,  7289, 11884,  9123,  9323, 11830,    98,  2526,
+            2116,  4131, 11407,  1844,  3645,  3916,  8133,  2224,
+            10871,  8092,  9651,  5989,  7140,  8480,  1670,   159,
+            10923,  4918,   128,  7312,   725,  9157,  5006,  6393,
+            3494,  6043, 10972,  6181, 11838,  3423, 10514,  7668,
+            3693,  6658,  6905, 11953, 10212, 11922,  9101,  8365,
+            5110,    45,  2400,  1921,  4377,  2720,  1695,    51,
+            2808,   650,  1896,  9997,  9971, 11980,  8098,  4833,
+            4135,  4257,  5838,  4765, 10985, 11532,   590, 12198,
+            482, 12173,  2006,  7064, 10018,  3912, 12016, 10519,
+            11362,  6954,  2210,   284,  5413,  6601,  3865, 10339,
+            11188,  6231,   517,  9564, 11281,  3863,  1210,  4604,
+            8160, 11447,   153,  7204,  5763,  5089,  9248, 12154,
+            11748,  1354,  6672,   179,  5532,  2646,  5941, 12185,
+            862,  3158,   477,  7279,  5678,  7914,  4254,   302,
+            2893, 10114,  6890,  9560,  9647, 11905,  4098,  9824,
+            10269,  1353, 10715,  5325,  6254,  3951,  1807,  6449,
+            5159,  1308,  8315,  3404,  1877,  1231,   112,  6398,
+            11724, 12272,  7286,  1459, 12274,  9896,  3456,   800,
+            1397, 10678,   103,  7420,  7976,   936,   764,   632,
+            7996,  8223,  8445,  7758, 10870,  9571,  2508,  1946,
+            6524, 10158,  1044,  4338,  2457,  3641,  1659,  4139,
+            4688,  9733, 11148,  3946,  2082,  5261,  2036, 11850,
+            7636, 12236,  5366,  2380,  1399,  7720,  2100,  3217,
+            10912,  8898,  7578, 11995,  2791,  1215,  3355,  2711,
+            2267,  2004,  8568, 10176,  3214,  2337,  1750,  4729,
+            4997,  7415,  6315, 12044,  4374,  7157,  4844,   211,
+            8003, 10159,  9290, 11481,  1735,  2336,  5793,  9875,
+            8192,   986,  7527,  1401,   870,  3615,  8465,  2756,
+            9770,  2034, 10168,  3264,  6132,    54,  2880,  4763,
+            11805,  3074,  8286,  9428,  4881,  6933,  1090, 10038,
+            2567,   708,   893,  6465,  4962, 10024,  2090,  5718,
+            10743,   780,  4733,  4623,  2134,  2087,  4802,   884,
+            5372,  5795,  5938,  4333,  6559,  7549,  5269, 10664,
+            4252,  3260,  5917, 10814,  5768,  9983,  8096,  7791,
+            6800,  7491,  6272,  1907, 10947,  6289, 11803,  6032,
+            11449,  1171,  9201,  7933,  2479,  7970, 11337,  7062,
+            8911,  6728,  6542,  8114,  8828,  6595,  3545,  4348,
+            4610,  2205,  6999,  8106,  5560, 10390,  9321,  2499,
+            2413,  7272,  6881, 10582,  9308,  9437,  3554,  3326,
+            5991, 11969,  3415, 12283,  9838, 12063,  4332,  7830,
+            11329,  6605, 12271,  2044, 11611,  7353, 11201, 11582,
+            3733,  8943,  9978,  1627,  7168,  3935,  5050,  2762,
+            7496, 10383,   755,  1654, 12053,  4952, 10134,  4394,
+            6592,  7898,  7497,  8904, 12029,  3581, 10748,  5674,
+            10358,  4901,  7414,  8771,   710,  6764,  8462,  7193,
+            5371,  7274, 11084,   290,  7864,  6827, 11822,  2509,
+            6578,  4026,  5807,  1458,  5721,  5762,  4178,  2105,
+            11621,  4852,  8897,  2856, 11510,  9264,  2520,  8776,
+            7011,  2647,  1898,  7039,  5950, 11163,  5488,  6277,
+            9182, 11456,   633, 10046, 11554,  5633,  9587,  2333,
+            7008,  7084,  5047,  7199,  9865,  8997,   569,  6390,
+            10845,  9679,  8268, 11472,  4203,  1997,     2,  9331,
+            162,  6182,  2000,  3649,  9792,  6363,  7557,  6187,
+            8510,  9935,  5536,  9019,  3706, 12009,  1452,  3067,
+            5494,  9692,  4865,  6019,  7106,  9610,  4588, 10165,
+            6261,  5887,  2652, 10172,  1580, 10379,  4638,  9949
+        };
+
+        /*
+        * Table for inverse NTT, binary case:
+        *   iGMb[x] = R*((1/g)^rev(x)) mod q
+        * Since g = 7, 1/g = 8778 mod 12289.
+        */
+        internal ushort[] iGMb = {
+            4091,  4401,  1081,  1229,  2530,  6014,  7947,  5329,
+            2579,  4751,  6464, 11703,  7023,  2812,  5890, 10698,
+            3109,  2125,  1960, 10925, 10601, 10404,  4189,  1875,
+            5847,  8546,  4615,  5190, 11324, 10578,  5882, 11155,
+            8417, 12275, 10599,  7446,  5719,  3569,  5981, 10108,
+            4426,  8306, 10755,  4679, 11052,  1538, 11857,   100,
+            8247,  6625,  9725,  5145,  3412,  7858,  5831,  9460,
+            5217, 10740,  7882,  7506, 12172, 11292,  6049,    79,
+            13,  6938,  8886,  5453,  4586, 11455,  2903,  4676,
+            9843,  7621,  8822,  9109,  2083,  8507,  8685,  3110,
+            7015,  3269,  1367,  6397, 10259,  8435, 10527, 11559,
+            11094,  2211,  1808,  7319,    48,  9547,  2560,  1228,
+            9438, 10787, 11800,  1820, 11406,  8966,  6159,  3012,
+            6109,  2796,  2203,  1652,   711,  7004,  1053,  8973,
+            5244,  1517,  9322, 11269,   900,  3888, 11133, 10736,
+            4949,  7616,  9974,  4746, 10270,   126,  2921,  6720,
+            6635,  6543,  1582,  4868,    42,   673,  2240,  7219,
+            1296, 11989,  7675,  8578, 11949,   989, 10541,  7687,
+            7085,  8487,  1004, 10236,  4703,   163,  9143,  4597,
+            6431, 12052,  2991, 11938,  4647,  3362,  2060, 11357,
+            12011,  6664,  5655,  7225,  5914,  9327,  4092,  5880,
+            6932,  3402,  5133,  9394, 11229,  5252,  9008,  1556,
+            6908,  4773,  3853,  8780, 10325,  7737,  1758,  7103,
+            11375, 12273,  8602,  3243,  6536,  7590,  8591, 11552,
+            6101,  3253,  9969,  9640,  4506,  3736,  6829, 10822,
+            9130,  9948,  3566,  2133,  3901,  6038,  7333,  6609,
+            3468,  4659,   625,  2700,  7738,  3443,  3060,  3388,
+            3526,  4418, 11911,  6232,  1730,  2558, 10340,  5344,
+            5286,  2190, 11562,  6199,  2482,  8756,  5387,  4101,
+            4609,  8605,  8226,   144,  5656,  8704,  2621,  5424,
+            10812,  2959, 11346,  6249,  1715,  4951,  9540,  1888,
+            3764,    39,  8219,  2080,  2502,  1469, 10550,  8709,
+            5601,  1093,  3784,  5041,  2058,  8399, 11448,  9639,
+            2059,  9878,  7405,  2496,  7918, 11594,   371,  7993,
+            3073, 10326,    40, 10004,  9245,  7987,  5603,  4051,
+            7894,   676, 11380,  7379,  6501,  4981,  2628,  3488,
+            10956,  7022,  6737,  9933,  7139,  2330,  3884,  5473,
+            7865,  6941,  5737,  5613,  9505, 11568, 11277,  2510,
+            6689,   386,  4462,   105,  2076, 10443,   119,  3955,
+            4370, 11505,  3672, 11439,   750,  3240,  3133,   754,
+            4013, 11929,  9210,  5378, 11881, 11018,  2818,  1851,
+            4966,  8181,  2688,  6205,  6814,   926,  2936,  4327,
+            10175,  7089,  6047,  9410, 10492,  8950,  2472,  6255,
+            728,  7569,  6056, 10432, 11036,  2452,  2811,  3787,
+            945,  8998,  1244,  8815, 11017, 11218,  5894,  4325,
+            4639,  3819,  9826,  7056,  6786,  8670,  5539,  7707,
+            1361,  9812,  2949, 11265, 10301,  9108,   478,  6489,
+            101,  1911,  9483,  3608, 11997, 10536,   812,  8915,
+            637,  8159,  5299,  9128,  3512,  8290,  7068,  7922,
+            3036,  4759,  2163,  3937,  3755, 11306,  7739,  4922,
+            11932,   424,  5538,  6228, 11131,  7778, 11974,  1097,
+            2890, 10027,  2569,  2250,  2352,   821,  2550, 11016,
+            7769,   136,   617,  3157,  5889,  9219,  6855,   120,
+            4405,  1825,  9635,  7214, 10261, 11393,  2441,  9562,
+            11176,   599,  2085, 11465,  7233,  6177,  4801,  9926,
+            9010,  4514,  9455, 11352, 11670,  6174,  7950,  9766,
+            6896, 11603,  3213,  8473,  9873,  2835, 10422,  3732,
+            7961,  1457, 10857,  8069,   832,  1628,  3410,  4900,
+            10855,  5111,  9543,  6325,  7431,  4083,  3072,  8847,
+            9853, 10122,  5259, 11413,  6556,   303,  1465,  3871,
+            4873,  5813, 10017,  6898,  3311,  5947,  8637,  5852,
+            3856,   928,  4933,  8530,  1871,  2184,  5571,  5879,
+            3481, 11597,  9511,  8153,    35,  2609,  5963,  8064,
+            1080, 12039,  8444,  3052,  3813, 11065,  6736,  8454,
+            2340,  7651,  1910, 10709,  2117,  9637,  6402,  6028,
+            2124,  7701,  2679,  5183,  6270,  7424,  2597,  6795,
+            9222, 10837,   280,  8583,  3270,  6753,  2354,  3779,
+            6102,  4732,  5926,  2497,  8640, 10289,  6107, 12127,
+            2958, 12287, 10292,  8086,   817,  4021,  2610,  1444,
+            5899, 11720,  3292,  2424,  5090,  7242,  5205,  5281,
+            9956,  2702,  6656,   735,  2243, 11656,   833,  3107,
+            6012,  6801,  1126,  6339,  5250, 10391,  9642,  5278,
+            3513,  9769,  3025,   779,  9433,  3392,  7437,   668,
+            10184,  8111,  6527,  6568, 10831,  6482,  8263,  5711,
+            9780,   467,  5462,  4425, 11999,  1205,  5015,  6918,
+            5096,  3827,  5525, 11579,  3518,  4875,  7388,  1931,
+            6615,  1541,  8708,   260,  3385,  4792,  4391,  5697,
+            7895,  2155,  7337,   236, 10635, 11534,  1906,  4793,
+            9527,  7239,  8354,  5121, 10662,  2311,  3346,  8556,
+            707,  1088,  4936,   678, 10245,    18,  5684,   960,
+            4459,  7957,   226,  2451,     6,  8874,   320,  6298,
+            8963,  8735,  2852,  2981,  1707,  5408,  5017,  9876,
+            9790,  2968,  1899,  6729,  4183,  5290, 10084,  7679,
+            7941,  8744,  5694,  3461,  4175,  5747,  5561,  3378,
+            5227,   952,  4319,  9810,  4356,  3088, 11118,   840,
+            6257,   486,  6000,  1342, 10382,  6017,  4798,  5489,
+            4498,  4193,  2306,  6521,  1475,  6372,  9029,  8037,
+            1625,  7020,  4740,  5730,  7956,  6351,  6494,  6917,
+            11405,  7487, 10202, 10155,  7666,  7556, 11509,  1546,
+            6571, 10199,  2265,  7327,  5824, 11396, 11581,  9722,
+            2251, 11199,  5356,  7408,  2861,  4003,  9215,   484,
+            7526,  9409, 12235,  6157,  9025,  2121, 10255,  2519,
+            9533,  3824,  8674, 11419, 10888,  4762, 11303,  4097,
+            2414,  6496,  9953, 10554,   808,  2999,  2130,  4286,
+            12078,  7445,  5132,  7915,   245,  5974,  4874,  7292,
+            7560, 10539,  9952,  9075,  2113,  3721, 10285, 10022,
+            9578,  8934, 11074,  9498,   294,  4711,  3391,  1377,
+            9072, 10189,  4569, 10890,  9909,  6923,    53,  4653,
+            439, 10253,  7028, 10207,  8343,  1141,  2556,  7601,
+            8150, 10630,  8648,  9832,  7951, 11245,  2131,  5765,
+            10343,  9781,  2718,  1419,  4531,  3844,  4066,  4293,
+            11657, 11525, 11353,  4313,  4869, 12186,  1611, 10892,
+            11489,  8833,  2393,    15, 10830,  5003,    17,   565,
+            5891, 12177, 11058, 10412,  8885,  3974, 10981,  7130,
+            5840, 10482,  8338,  6035,  6964,  1574, 10936,  2020,
+            2465,  8191,   384,  2642,  2729,  5399,  2175,  9396,
+            11987,  8035,  4375,  6611,  5010, 11812,  9131, 11427,
+            104,  6348,  9643,  6757, 12110,  5617, 10935,   541,
+            135,  3041,  7200,  6526,  5085, 12136,   842,  4129,
+            7685, 11079,  8426,  1008,  2725, 11772,  6058,  1101,
+            1950,  8424,  5688,  6876, 12005, 10079,  5335,   927,
+            1770,   273,  8377,  2271,  5225, 10283,   116, 11807,
+            91, 11699,   757,  1304,  7524,  6451,  8032,  8154,
+            7456,  4191,   309,  2318,  2292, 10393, 11639,  9481,
+            12238, 10594,  9569,  7912, 10368,  9889, 12244,  7179,
+            3924,  3188,   367,  2077,   336,  5384,  5631,  8596,
+            4621,  1775,  8866,   451,  6108,  1317,  6246,  8795,
+            5896,  7283,  3132, 11564,  4977, 12161,  7371,  1366,
+            12130, 10619,  3809,  5149,  6300,  2638,  4197,  1418,
+            10065,  4156,  8373,  8644, 10445,   882,  8158, 10173,
+            9763, 12191,   459,  2966,  3166,   405,  5000,  9311,
+            6404,  8986,  1551,  8175,  3630, 10766,  9265,   700,
+            8573,  9508,  6630, 11437, 11595,  5850,  3950,  4775,
+            11941,  1446,  6018,  3386, 11470,  5310,  5476,   553,
+            9474,  2586,  1431,  2741,   473, 11383,  4745,   836,
+            4062, 10666,  7727, 11752,  5534,   312,  4307,  4351,
+            5764,  8679,  8381,  8187,     5,  7395,  4363,  1152,
+            5421,  5231,  6473,   436,  7567,  8603,  6229,  8230
+        };
+
+        /*
+        * Reduce a small signed integer modulo q. The source integer MUST
+        * be between -q/2 and +q/2.
+        */
+        internal uint mq_conv_small(int x)
+        {
+            /*
+            * If x < 0, the cast to uint will set the high bit to 1.
+            */
+            uint y;
+
+            y = (uint)x;
+            y += (uint)(Q & -(y >> 31));
+            return y;
+        }
+
+        /*
+        * Addition modulo q. Operands must be in the 0..q-1 range.
+        */
+        internal uint mq_add(uint x, uint y)
+        {
+            /*
+            * We compute x + y - q. If the result is negative, then the
+            * high bit will be set, and 'd >> 31' will be equal to 1;
+            * thus '-(d >> 31)' will be an all-one pattern. Otherwise,
+            * it will be an all-zero pattern. In other words, this
+            * implements a conditional addition of q.
+            */
+            uint d;
+
+            d = x + y - Q;
+            d += (uint)(Q & -(d >> 31));
+            return d;
+        }
+
+        /*
+        * Subtraction modulo q. Operands must be in the 0..q-1 range.
+        */
+        internal uint mq_sub(uint x, uint y)
+        {
+            /*
+            * As in mq_add(), we use a conditional addition to ensure the
+            * result is in the 0..q-1 range.
+            */
+            uint d;
+
+            d = x - y;
+            d += (uint)(Q & -(d >> 31));
+            return d;
+        }
+
+        /*
+        * Division by 2 modulo q. Operand must be in the 0..q-1 range.
+        */
+        internal uint mq_rshift1(uint x)
+        {
+            x += (uint)(Q & -(x & 1));
+            return (x >> 1);
+        }
+
+        /*
+        * Montgomery multiplication modulo q. If we set R = 2^16 mod q, then
+        * this function computes: x * y / R mod q
+        * Operands must be in the 0..q-1 range.
+        */
+        internal uint mq_montymul(uint x, uint y)
+        {
+            uint z, w;
+
+            /*
+            * We compute x*y + k*q with a value of k chosen so that the 16
+            * low bits of the result are 0. We can then shift the value.
+            * After the shift, result may still be larger than q, but it
+            * will be lower than 2*q, so a conditional subtraction works.
+            */
+
+            z = x * y;
+            w = ((z * Q0I) & 0xFFFF) * Q;
+
+            /*
+            * When adding z and w, the result will have its low 16 bits
+            * equal to 0. Since x, y and z are lower than q, the sum will
+            * be no more than (2^15 - 1) * q + (q - 1)^2, which will
+            * fit on 29 bits.
+            */
+            z = (z + w) >> 16;
+
+            /*
+            * After the shift, analysis shows that the value will be less
+            * than 2q. We do a subtraction then conditional subtraction to
+            * ensure the result is in the expected range.
+            */
+            z -= Q;
+            z += (uint)(Q & -(z >> 31));
+            return z;
+        }
+
+        /*
+        * Montgomery squaring (computes (x^2)/R).
+        */
+        internal uint mq_montysqr(uint x)
+        {
+            return mq_montymul(x, x);
+        }
+
+        /*
+        * Divide x by y modulo q = 12289.
+        */
+        internal uint mq_div_12289(uint x, uint y)
+        {
+            /*
+            * We invert y by computing y^(q-2) mod q.
+            *
+            * We use the following addition chain for exponent e = 12287:
+            *
+            *   e0 = 1
+            *   e1 = 2 * e0 = 2
+            *   e2 = e1 + e0 = 3
+            *   e3 = e2 + e1 = 5
+            *   e4 = 2 * e3 = 10
+            *   e5 = 2 * e4 = 20
+            *   e6 = 2 * e5 = 40
+            *   e7 = 2 * e6 = 80
+            *   e8 = 2 * e7 = 160
+            *   e9 = e8 + e2 = 163
+            *   e10 = e9 + e8 = 323
+            *   e11 = 2 * e10 = 646
+            *   e12 = 2 * e11 = 1292
+            *   e13 = e12 + e9 = 1455
+            *   e14 = 2 * e13 = 2910
+            *   e15 = 2 * e14 = 5820
+            *   e16 = e15 + e10 = 6143
+            *   e17 = 2 * e16 = 12286
+            *   e18 = e17 + e0 = 12287
+            *
+            * Additions on exponents are converted to Montgomery
+            * multiplications. We define all intermediate results as so
+            * many local variables, and let the C compiler work out which
+            * must be kept around.
+            */
+            uint y0, y1, y2, y3, y4, y5, y6, y7, y8, y9;
+            uint y10, y11, y12, y13, y14, y15, y16, y17, y18;
+
+            y0 = mq_montymul(y, R2);
+            y1 = mq_montysqr(y0);
+            y2 = mq_montymul(y1, y0);
+            y3 = mq_montymul(y2, y1);
+            y4 = mq_montysqr(y3);
+            y5 = mq_montysqr(y4);
+            y6 = mq_montysqr(y5);
+            y7 = mq_montysqr(y6);
+            y8 = mq_montysqr(y7);
+            y9 = mq_montymul(y8, y2);
+            y10 = mq_montymul(y9, y8);
+            y11 = mq_montysqr(y10);
+            y12 = mq_montysqr(y11);
+            y13 = mq_montymul(y12, y9);
+            y14 = mq_montysqr(y13);
+            y15 = mq_montysqr(y14);
+            y16 = mq_montymul(y15, y10);
+            y17 = mq_montysqr(y16);
+            y18 = mq_montymul(y17, y0);
+
+            /*
+            * Final multiplication with x, which is not in Montgomery
+            * representation, computes the correct division result.
+            */
+            return mq_montymul(y18, x);
+        }
+
+        /*
+        * Compute NTT on a ring element.
+        */
+        internal void mq_NTT(ushort[] asrc, int a, uint logn)
+        {
+            int n, t, m;
+
+            n = (int)1 << (int)logn;
+            t = n;
+            for (m = 1; m < n; m <<= 1) {
+                int ht, i, j1;
+
+                ht = t >> 1;
+                for (i = 0, j1 = 0; i < m; i ++, j1 += t) {
+                    int j, j2;
+                    uint s;
+
+                    s = GMb[m + i];
+                    j2 = j1 + ht;
+                    for (j = j1; j < j2; j ++) {
+                        uint u, v;
+
+                        u = asrc[a + j];
+                        v = mq_montymul(asrc[a + j + ht], s);
+                        asrc[a + j] = (ushort)mq_add(u, v);
+                        asrc[a + j + ht] = (ushort)mq_sub(u, v);
+                    }
+                }
+                t = ht;
+            }
+        }
+
+        /*
+        * Compute the inverse NTT on a ring element, binary case.
+        */
+        internal void mq_iNTT(ushort[] asrc, int a, uint logn)
+        {
+            int n, t, m;
+            uint ni;
+
+            n = (int)1 << (int)logn;
+            t = 1;
+            m = n;
+            while (m > 1) {
+                int hm, dt, i, j1;
+
+                hm = m >> 1;
+                dt = t << 1;
+                for (i = 0, j1 = 0; i < hm; i ++, j1 += dt) {
+                    int j, j2;
+                    uint s;
+
+                    j2 = j1 + t;
+                    s = iGMb[hm + i];
+                    for (j = j1; j < j2; j ++) {
+                        uint u, v, w;
+
+                        u = asrc[a + j];
+                        v = asrc[a + j + t];
+                        asrc[a + j] = (ushort)mq_add(u, v);
+                        w = mq_sub(u, v);
+                        asrc[a + j + t] = (ushort)
+                            mq_montymul(w, s);
+                    }
+                }
+                t = dt;
+                m = hm;
+            }
+
+            /*
+            * To complete the inverse NTT, we must now divide all values by
+            * n (the vector size). We thus need the inverse of n, i.e. we
+            * need to divide 1 by 2 logn times. But we also want it in
+            * Montgomery representation, i.e. we also want to multiply it
+            * by R = 2^16. In the common case, this should be a simple right
+            * shift. The loop below is generic and works also in corner cases;
+            * its computation time is negligible.
+            */
+            ni = R;
+            for (m = n; m > 1; m >>= 1) {
+                ni = mq_rshift1(ni);
+            }
+            for (m = 0; m < n; m ++) {
+                asrc[a + m] = (ushort)mq_montymul(asrc[a + m], ni);
+            }
+        }
+
+        /*
+        * Convert a polynomial (mod q) to Montgomery representation.
+        */
+        internal void mq_poly_tomonty(ushort[] fsrc, int f, uint logn)
+        {
+            int u, n;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                fsrc[f + u] = (ushort)mq_montymul(fsrc[f + u], R2);
+            }
+        }
+
+        /*
+        * Multiply two polynomials together (NTT representation, and using
+        * a Montgomery multiplication). Result f*g is written over f.
+        */
+        internal void mq_poly_montymul_ntt(ushort[] fsrc, int f, ushort[] gsrc, int g, uint logn)
+        {
+            int u, n;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                fsrc[f + u] = (ushort)mq_montymul(fsrc[f + u], gsrc[g + u]);
+            }
+        }
+
+        /*
+        * Subtract polynomial g from polynomial f.
+        */
+        internal void mq_poly_sub(ushort[] fsrc, int f, ushort[] gsrc, int g, uint logn)
+        {
+            int u, n;
+
+            n = (int)1 << (int)logn;
+            for (u = 0; u < n; u ++) {
+                fsrc[f + u] = (ushort)mq_sub(fsrc[f + u], gsrc[g + u]);
+            }
+        }
+
+        /* ===================================================================== */
+
+        internal void to_ntt_monty(ushort[] hsrc, int h, uint logn)
+        {
+            mq_NTT(hsrc, h, logn);
+            mq_poly_tomonty(hsrc, h, logn);
+        }
+
+        internal bool verify_raw(ushort[] c0src, int c0, short[] s2src, int s2,
+            ushort[] hsrc, int h, uint logn, ushort[] tmpsrc, int tmp)
+        {
+            int u, n;
+            int tt;
+
+            n = (int)1 << (int)logn;
+            tt = tmp;
+
+            /*
+            * Reduce s2 elements modulo q ([0..q-1] range).
+            */
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = (uint)s2src[s2 + u];
+                w += (uint)(Q & -(w >> 31));
+                tmpsrc[tt+u] = (ushort)w;
+            }
+
+            /*
+            * Compute -s1 = s2*h - c0 mod phi mod q (in tt[]).
+            */
+            mq_NTT(tmpsrc, tt, logn);
+            mq_poly_montymul_ntt(tmpsrc, tt, hsrc, h, logn);
+            mq_iNTT(tmpsrc, tt, logn);
+            mq_poly_sub(tmpsrc, tt, c0src, c0, logn);
+
+            /*
+            * Normalize -s1 elements into the [-q/2..q/2] range.
+            */
+            short[] shorttmp = new short[n];
+            for (u = 0; u < n; u ++) {
+                int w;
+
+                w = (int)tmpsrc[tt+u];
+                w -= (int)(Q & -(((Q >> 1) - (uint)w) >> 31));
+                tmpsrc[tt + u] = (ushort)w;
+                shorttmp[u] = (short)tmpsrc[tt + u];
+            }
+
+
+            /*
+            * Signature is valid if and only if the aggregate (-s1,s2) vector
+            * is short enough.
+            */
+            return this.common.is_short(shorttmp, 0, s2src, s2, logn);
+        }
+
+        internal int compute_public(ushort[] hsrc, int h,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, uint logn, ushort[] tmpsrc, int tmp)
+        {
+            int u, n;
+            int tt;
+
+            n = (int)1 << (int)logn;
+            tt = tmp;
+            for (u = 0; u < n; u ++) {
+                tmpsrc[tt+u] = (ushort)mq_conv_small(fsrc[f+u]);
+                hsrc[h+u] = (ushort)mq_conv_small(gsrc[g+u]);
+            }
+            mq_NTT(hsrc, h, logn);
+            mq_NTT(tmpsrc, tt, logn);
+            for (u = 0; u < n; u ++) {
+                if (tmpsrc[tt+u] == 0) {
+                    return 0;
+                }
+                hsrc[h+u] = (ushort)mq_div_12289(hsrc[h+u], tmpsrc[tt+u]);
+            }
+            mq_iNTT(hsrc, h, logn);
+            return 1;
+        }
+
+        internal int complete_private(sbyte[] Gsrc, int G,
+            sbyte[] fsrc, int f, sbyte[] gsrc, int g, sbyte[] Fsrc, int F,
+            uint logn, ushort[] tmpsrc, int tmp)
+        {
+            int u, n;
+            int t1, t2;
+
+            n = (int)1 << (int)logn;
+            t1 = tmp;
+            t2 = t1 + n;
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t1+u] = (ushort)mq_conv_small(gsrc[g+u]);
+                tmpsrc[t2+u] = (ushort)mq_conv_small(Fsrc[F+u]);
+            }
+            mq_NTT(tmpsrc, t1, logn);
+            mq_NTT(tmpsrc, t2, logn);
+            mq_poly_tomonty(tmpsrc, t1, logn);
+            mq_poly_montymul_ntt(tmpsrc, t1, tmpsrc, t2, logn);
+            for (u = 0; u < n; u ++) {
+                tmpsrc[t2+u] = (ushort)mq_conv_small(fsrc[f+u]);
+            }
+            mq_NTT(tmpsrc, t2, logn);
+            for (u = 0; u < n; u ++) {
+                if (tmpsrc[t2+u] == 0) {
+                    return 0;
+                }
+                tmpsrc[t1+u] = (ushort)mq_div_12289(tmpsrc[t1+u], tmpsrc[t2+u]);
+            }
+            mq_iNTT(tmpsrc, t1, logn);
+            for (u = 0; u < n; u ++) {
+                uint w;
+                int gi;
+
+                w = tmpsrc[t1+u];
+                w -= (uint)(Q & ~-((w - (Q >> 1)) >> 31));
+                //gi = *(int *)&w;
+                gi = (int)w;
+                if (gi < -127 || gi > +127) {
+                    return 0;
+                }
+                Gsrc[G+u] = (sbyte)gi;
+            }
+            return 1;
+        }
+
+        internal int is_invertible(
+            short[] s2src, int s2, uint logn, ushort[] tmpsrc, int tmp)
+        {
+            int u, n;
+            int tt;
+            uint r;
+
+            n = (int)1 << (int)logn;
+            tt = tmp;
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = (uint)s2src[s2 + u];
+                w += (uint)(Q & -(w >> 31));
+                tmpsrc[tt+u] = (ushort)w;
+            }
+            mq_NTT(tmpsrc, tt, logn);
+            r = 0;
+            for (u = 0; u < n; u ++) {
+                r |= (uint)(tmpsrc[tt+u] - 1);
+            }
+            return (int)(1u - (r >> 31));
+        }
+
+        internal int verify_recover(ushort[] hsrc, int h,
+            ushort[] c0src, int c0, short[] s1src, int s1, short[] s2src, int s2,
+            uint logn, ushort[] tmpsrc, int tmp)
+        {
+            int u, n;
+            int tt;
+            uint r;
+
+            n = (int)1 << (int)logn;
+
+            /*
+            * Reduce elements of s1 and s2 modulo q; then write s2 into tt[]
+            * and c0 - s1 into h[].
+            */
+            tt = tmp;
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = (uint)s2src[s2 + u];
+                w += (uint)(Q & -(w >> 31));
+                tmpsrc[tt+u] = (ushort)w;
+
+                w = (uint)s1src[s1+u];
+                w += (uint)(Q & -(w >> 31));
+                w = mq_sub(c0src[c0 + u], w);
+                hsrc[h+u] = (ushort)w;
+            }
+
+            /*
+            * Compute h = (c0 - s1) / s2. If one of the coefficients of s2
+            * is zero (in NTT representation) then the operation fails. We
+            * keep that information into a flag so that we do not deviate
+            * from strict constant-time processing; if all coefficients of
+            * s2 are non-zero, then the high bit of r will be zero.
+            */
+            mq_NTT(tmpsrc, tt, logn);
+            mq_NTT(hsrc, h, logn);
+            r = 0;
+            for (u = 0; u < n; u ++) {
+                r |= (uint)(tmpsrc[tt+u] - 1);
+                hsrc[h+u] = (ushort)mq_div_12289(hsrc[h+u], tmpsrc[tt+u]);
+            }
+            mq_iNTT(hsrc, h, logn);
+
+            /*
+            * Signature is acceptable if and only if it is short enough,
+            * and s2 was invertible mod phi mod q. The caller must still
+            * check that the rebuilt public key matches the expected
+            * value (e.g. through a hash).
+            */
+            r = ~r & (uint)-(this.common.is_short(s1src, s1, s2src, s2, logn) ? 1 : 0);
+            return (int)(r >> 31);
+        }
+
+        internal int count_nttzero(short[] sigsrc, int sig, uint logn, ushort[] tmpsrc, int tmp)
+        {
+            int s2;
+            int u, n;
+            uint r;
+
+            n = (int)1 << (int)logn;
+            s2 = tmp;
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = (uint)sigsrc[sig + u];
+                w += (uint)(Q & -(w >> 31));
+                tmpsrc[s2 + u] = (ushort)w;
+            }
+            mq_NTT(tmpsrc, s2, logn);
+            r = 0;
+            for (u = 0; u < n; u ++) {
+                uint w;
+
+                w = (uint)tmpsrc[s2 + u] - 1u;
+                r += (w >> 31);
+            }
+            return (int)r;
+        }
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/SHAKE256.cs b/crypto/src/pqc/crypto/falcon/SHAKE256.cs
new file mode 100644
index 000000000..eb7c77e09
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/SHAKE256.cs
@@ -0,0 +1,569 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{   
+    class SHAKE256
+    {
+
+        /* 
+        * License from the reference C code (the code was copied then modified
+        * to function in C#):
+        * ==========================(LICENSE BEGIN)============================
+        *
+        * Copyright (c) 2017-2019  Falcon Project
+        *
+        * Permission is hereby granted, free of charge, to any person obtaining
+        * a copy of this software and associated documentation files (the
+        * "Software"), to deal in the Software without restriction, including
+        * without limitation the rights to use, copy, modify, merge, publish,
+        * distribute, sublicense, and/or sell copies of the Software, and to
+        * permit persons to whom the Software is furnished to do so, subject to
+        * the following conditions:
+        *
+        * The above copyright notice and this permission notice shall be
+        * included in all copies or substantial portions of the Software.
+        *
+        * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+        * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+        * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+        * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+        * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+        * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+        * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+        *
+        * ===========================(LICENSE END)=============================
+        */
+        
+        ulong[] A;
+        byte[] dubf;
+        ulong dptr;
+
+        ulong[] RC = {
+            0x0000000000000001, 0x0000000000008082,
+            0x800000000000808A, 0x8000000080008000,
+            0x000000000000808B, 0x0000000080000001,
+            0x8000000080008081, 0x8000000000008009,
+            0x000000000000008A, 0x0000000000000088,
+            0x0000000080008009, 0x000000008000000A,
+            0x000000008000808B, 0x800000000000008B,
+            0x8000000000008089, 0x8000000000008003,
+            0x8000000000008002, 0x8000000000000080,
+            0x000000000000800A, 0x800000008000000A,
+            0x8000000080008081, 0x8000000000008080,
+            0x0000000080000001, 0x8000000080008008
+        };
+
+        void process_block(ulong[] A) {
+            ulong t0, t1, t2, t3, t4;
+            ulong tt0, tt1, tt2, tt3;
+            ulong t, kt;
+            ulong c0, c1, c2, c3, c4, bnn;
+            int j;
+
+            /*
+            * Invert some words (alternate internal representation, which
+            * saves some operations).
+            */
+            A[ 1] = ~A[ 1];
+            A[ 2] = ~A[ 2];
+            A[ 8] = ~A[ 8];
+            A[12] = ~A[12];
+            A[17] = ~A[17];
+            A[20] = ~A[20];
+
+            /*
+            * Compute the 24 rounds. This loop is partially unrolled (each
+            * iteration computes two rounds).
+            */
+            for (j = 0; j < 24; j += 2) {
+
+                tt0 = A[ 1] ^ A[ 6];
+                tt1 = A[11] ^ A[16];
+                tt0 ^= A[21] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 4] ^ A[ 9];
+                tt3 = A[14] ^ A[19];
+                tt0 ^= A[24];
+                tt2 ^= tt3;
+                t0 = tt0 ^ tt2;
+
+                tt0 = A[ 2] ^ A[ 7];
+                tt1 = A[12] ^ A[17];
+                tt0 ^= A[22] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 0] ^ A[ 5];
+                tt3 = A[10] ^ A[15];
+                tt0 ^= A[20];
+                tt2 ^= tt3;
+                t1 = tt0 ^ tt2;
+
+                tt0 = A[ 3] ^ A[ 8];
+                tt1 = A[13] ^ A[18];
+                tt0 ^= A[23] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 1] ^ A[ 6];
+                tt3 = A[11] ^ A[16];
+                tt0 ^= A[21];
+                tt2 ^= tt3;
+                t2 = tt0 ^ tt2;
+
+                tt0 = A[ 4] ^ A[ 9];
+                tt1 = A[14] ^ A[19];
+                tt0 ^= A[24] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 2] ^ A[ 7];
+                tt3 = A[12] ^ A[17];
+                tt0 ^= A[22];
+                tt2 ^= tt3;
+                t3 = tt0 ^ tt2;
+
+                tt0 = A[ 0] ^ A[ 5];
+                tt1 = A[10] ^ A[15];
+                tt0 ^= A[20] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 3] ^ A[ 8];
+                tt3 = A[13] ^ A[18];
+                tt0 ^= A[23];
+                tt2 ^= tt3;
+                t4 = tt0 ^ tt2;
+
+                A[ 0] = A[ 0] ^ t0;
+                A[ 5] = A[ 5] ^ t0;
+                A[10] = A[10] ^ t0;
+                A[15] = A[15] ^ t0;
+                A[20] = A[20] ^ t0;
+                A[ 1] = A[ 1] ^ t1;
+                A[ 6] = A[ 6] ^ t1;
+                A[11] = A[11] ^ t1;
+                A[16] = A[16] ^ t1;
+                A[21] = A[21] ^ t1;
+                A[ 2] = A[ 2] ^ t2;
+                A[ 7] = A[ 7] ^ t2;
+                A[12] = A[12] ^ t2;
+                A[17] = A[17] ^ t2;
+                A[22] = A[22] ^ t2;
+                A[ 3] = A[ 3] ^ t3;
+                A[ 8] = A[ 8] ^ t3;
+                A[13] = A[13] ^ t3;
+                A[18] = A[18] ^ t3;
+                A[23] = A[23] ^ t3;
+                A[ 4] = A[ 4] ^ t4;
+                A[ 9] = A[ 9] ^ t4;
+                A[14] = A[14] ^ t4;
+                A[19] = A[19] ^ t4;
+                A[24] = A[24] ^ t4;
+                A[ 5] = (A[ 5] << 36) | (A[ 5] >> (64 - 36));
+                A[10] = (A[10] <<  3) | (A[10] >> (64 -  3));
+                A[15] = (A[15] << 41) | (A[15] >> (64 - 41));
+                A[20] = (A[20] << 18) | (A[20] >> (64 - 18));
+                A[ 1] = (A[ 1] <<  1) | (A[ 1] >> (64 -  1));
+                A[ 6] = (A[ 6] << 44) | (A[ 6] >> (64 - 44));
+                A[11] = (A[11] << 10) | (A[11] >> (64 - 10));
+                A[16] = (A[16] << 45) | (A[16] >> (64 - 45));
+                A[21] = (A[21] <<  2) | (A[21] >> (64 - 2));
+                A[ 2] = (A[ 2] << 62) | (A[ 2] >> (64 - 62));
+                A[ 7] = (A[ 7] <<  6) | (A[ 7] >> (64 -  6));
+                A[12] = (A[12] << 43) | (A[12] >> (64 - 43));
+                A[17] = (A[17] << 15) | (A[17] >> (64 - 15));
+                A[22] = (A[22] << 61) | (A[22] >> (64 - 61));
+                A[ 3] = (A[ 3] << 28) | (A[ 3] >> (64 - 28));
+                A[ 8] = (A[ 8] << 55) | (A[ 8] >> (64 - 55));
+                A[13] = (A[13] << 25) | (A[13] >> (64 - 25));
+                A[18] = (A[18] << 21) | (A[18] >> (64 - 21));
+                A[23] = (A[23] << 56) | (A[23] >> (64 - 56));
+                A[ 4] = (A[ 4] << 27) | (A[ 4] >> (64 - 27));
+                A[ 9] = (A[ 9] << 20) | (A[ 9] >> (64 - 20));
+                A[14] = (A[14] << 39) | (A[14] >> (64 - 39));
+                A[19] = (A[19] <<  8) | (A[19] >> (64 -  8));
+                A[24] = (A[24] << 14) | (A[24] >> (64 - 14));
+
+                bnn = ~A[12];
+                kt = A[ 6] | A[12];
+                c0 = A[ 0] ^ kt;
+                kt = bnn | A[18];
+                c1 = A[ 6] ^ kt;
+                kt = A[18] & A[24];
+                c2 = A[12] ^ kt;
+                kt = A[24] | A[ 0];
+                c3 = A[18] ^ kt;
+                kt = A[ 0] & A[ 6];
+                c4 = A[24] ^ kt;
+                A[ 0] = c0;
+                A[ 6] = c1;
+                A[12] = c2;
+                A[18] = c3;
+                A[24] = c4;
+                bnn = ~A[22];
+                kt = A[ 9] | A[10];
+                c0 = A[ 3] ^ kt;
+                kt = A[10] & A[16];
+                c1 = A[ 9] ^ kt;
+                kt = A[16] | bnn;
+                c2 = A[10] ^ kt;
+                kt = A[22] | A[ 3];
+                c3 = A[16] ^ kt;
+                kt = A[ 3] & A[ 9];
+                c4 = A[22] ^ kt;
+                A[ 3] = c0;
+                A[ 9] = c1;
+                A[10] = c2;
+                A[16] = c3;
+                A[22] = c4;
+                bnn = ~A[19];
+                kt = A[ 7] | A[13];
+                c0 = A[ 1] ^ kt;
+                kt = A[13] & A[19];
+                c1 = A[ 7] ^ kt;
+                kt = bnn & A[20];
+                c2 = A[13] ^ kt;
+                kt = A[20] | A[ 1];
+                c3 = bnn ^ kt;
+                kt = A[ 1] & A[ 7];
+                c4 = A[20] ^ kt;
+                A[ 1] = c0;
+                A[ 7] = c1;
+                A[13] = c2;
+                A[19] = c3;
+                A[20] = c4;
+                bnn = ~A[17];
+                kt = A[ 5] & A[11];
+                c0 = A[ 4] ^ kt;
+                kt = A[11] | A[17];
+                c1 = A[ 5] ^ kt;
+                kt = bnn | A[23];
+                c2 = A[11] ^ kt;
+                kt = A[23] & A[ 4];
+                c3 = bnn ^ kt;
+                kt = A[ 4] | A[ 5];
+                c4 = A[23] ^ kt;
+                A[ 4] = c0;
+                A[ 5] = c1;
+                A[11] = c2;
+                A[17] = c3;
+                A[23] = c4;
+                bnn = ~A[ 8];
+                kt = bnn & A[14];
+                c0 = A[ 2] ^ kt;
+                kt = A[14] | A[15];
+                c1 = bnn ^ kt;
+                kt = A[15] & A[21];
+                c2 = A[14] ^ kt;
+                kt = A[21] | A[ 2];
+                c3 = A[15] ^ kt;
+                kt = A[ 2] & A[ 8];
+                c4 = A[21] ^ kt;
+                A[ 2] = c0;
+                A[ 8] = c1;
+                A[14] = c2;
+                A[15] = c3;
+                A[21] = c4;
+                A[ 0] = A[ 0] ^ RC[j + 0];
+
+                tt0 = A[ 6] ^ A[ 9];
+                tt1 = A[ 7] ^ A[ 5];
+                tt0 ^= A[ 8] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[24] ^ A[22];
+                tt3 = A[20] ^ A[23];
+                tt0 ^= A[21];
+                tt2 ^= tt3;
+                t0 = tt0 ^ tt2;
+
+                tt0 = A[12] ^ A[10];
+                tt1 = A[13] ^ A[11];
+                tt0 ^= A[14] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 0] ^ A[ 3];
+                tt3 = A[ 1] ^ A[ 4];
+                tt0 ^= A[ 2];
+                tt2 ^= tt3;
+                t1 = tt0 ^ tt2;
+
+                tt0 = A[18] ^ A[16];
+                tt1 = A[19] ^ A[17];
+                tt0 ^= A[15] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[ 6] ^ A[ 9];
+                tt3 = A[ 7] ^ A[ 5];
+                tt0 ^= A[ 8];
+                tt2 ^= tt3;
+                t2 = tt0 ^ tt2;
+
+                tt0 = A[24] ^ A[22];
+                tt1 = A[20] ^ A[23];
+                tt0 ^= A[21] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[12] ^ A[10];
+                tt3 = A[13] ^ A[11];
+                tt0 ^= A[14];
+                tt2 ^= tt3;
+                t3 = tt0 ^ tt2;
+
+                tt0 = A[ 0] ^ A[ 3];
+                tt1 = A[ 1] ^ A[ 4];
+                tt0 ^= A[ 2] ^ tt1;
+                tt0 = (tt0 << 1) | (tt0 >> 63);
+                tt2 = A[18] ^ A[16];
+                tt3 = A[19] ^ A[17];
+                tt0 ^= A[15];
+                tt2 ^= tt3;
+                t4 = tt0 ^ tt2;
+
+                A[ 0] = A[ 0] ^ t0;
+                A[ 3] = A[ 3] ^ t0;
+                A[ 1] = A[ 1] ^ t0;
+                A[ 4] = A[ 4] ^ t0;
+                A[ 2] = A[ 2] ^ t0;
+                A[ 6] = A[ 6] ^ t1;
+                A[ 9] = A[ 9] ^ t1;
+                A[ 7] = A[ 7] ^ t1;
+                A[ 5] = A[ 5] ^ t1;
+                A[ 8] = A[ 8] ^ t1;
+                A[12] = A[12] ^ t2;
+                A[10] = A[10] ^ t2;
+                A[13] = A[13] ^ t2;
+                A[11] = A[11] ^ t2;
+                A[14] = A[14] ^ t2;
+                A[18] = A[18] ^ t3;
+                A[16] = A[16] ^ t3;
+                A[19] = A[19] ^ t3;
+                A[17] = A[17] ^ t3;
+                A[15] = A[15] ^ t3;
+                A[24] = A[24] ^ t4;
+                A[22] = A[22] ^ t4;
+                A[20] = A[20] ^ t4;
+                A[23] = A[23] ^ t4;
+                A[21] = A[21] ^ t4;
+                A[ 3] = (A[ 3] << 36) | (A[ 3] >> (64 - 36));
+                A[ 1] = (A[ 1] <<  3) | (A[ 1] >> (64 -  3));
+                A[ 4] = (A[ 4] << 41) | (A[ 4] >> (64 - 41));
+                A[ 2] = (A[ 2] << 18) | (A[ 2] >> (64 - 18));
+                A[ 6] = (A[ 6] <<  1) | (A[ 6] >> (64 -  1));
+                A[ 9] = (A[ 9] << 44) | (A[ 9] >> (64 - 44));
+                A[ 7] = (A[ 7] << 10) | (A[ 7] >> (64 - 10));
+                A[ 5] = (A[ 5] << 45) | (A[ 5] >> (64 - 45));
+                A[ 8] = (A[ 8] <<  2) | (A[ 8] >> (64 - 2));
+                A[12] = (A[12] << 62) | (A[12] >> (64 - 62));
+                A[10] = (A[10] <<  6) | (A[10] >> (64 -  6));
+                A[13] = (A[13] << 43) | (A[13] >> (64 - 43));
+                A[11] = (A[11] << 15) | (A[11] >> (64 - 15));
+                A[14] = (A[14] << 61) | (A[14] >> (64 - 61));
+                A[18] = (A[18] << 28) | (A[18] >> (64 - 28));
+                A[16] = (A[16] << 55) | (A[16] >> (64 - 55));
+                A[19] = (A[19] << 25) | (A[19] >> (64 - 25));
+                A[17] = (A[17] << 21) | (A[17] >> (64 - 21));
+                A[15] = (A[15] << 56) | (A[15] >> (64 - 56));
+                A[24] = (A[24] << 27) | (A[24] >> (64 - 27));
+                A[22] = (A[22] << 20) | (A[22] >> (64 - 20));
+                A[20] = (A[20] << 39) | (A[20] >> (64 - 39));
+                A[23] = (A[23] <<  8) | (A[23] >> (64 -  8));
+                A[21] = (A[21] << 14) | (A[21] >> (64 - 14));
+
+                bnn = ~A[13];
+                kt = A[ 9] | A[13];
+                c0 = A[ 0] ^ kt;
+                kt = bnn | A[17];
+                c1 = A[ 9] ^ kt;
+                kt = A[17] & A[21];
+                c2 = A[13] ^ kt;
+                kt = A[21] | A[ 0];
+                c3 = A[17] ^ kt;
+                kt = A[ 0] & A[ 9];
+                c4 = A[21] ^ kt;
+                A[ 0] = c0;
+                A[ 9] = c1;
+                A[13] = c2;
+                A[17] = c3;
+                A[21] = c4;
+                bnn = ~A[14];
+                kt = A[22] | A[ 1];
+                c0 = A[18] ^ kt;
+                kt = A[ 1] & A[ 5];
+                c1 = A[22] ^ kt;
+                kt = A[ 5] | bnn;
+                c2 = A[ 1] ^ kt;
+                kt = A[14] | A[18];
+                c3 = A[ 5] ^ kt;
+                kt = A[18] & A[22];
+                c4 = A[14] ^ kt;
+                A[18] = c0;
+                A[22] = c1;
+                A[ 1] = c2;
+                A[ 5] = c3;
+                A[14] = c4;
+                bnn = ~A[23];
+                kt = A[10] | A[19];
+                c0 = A[ 6] ^ kt;
+                kt = A[19] & A[23];
+                c1 = A[10] ^ kt;
+                kt = bnn & A[ 2];
+                c2 = A[19] ^ kt;
+                kt = A[ 2] | A[ 6];
+                c3 = bnn ^ kt;
+                kt = A[ 6] & A[10];
+                c4 = A[ 2] ^ kt;
+                A[ 6] = c0;
+                A[10] = c1;
+                A[19] = c2;
+                A[23] = c3;
+                A[ 2] = c4;
+                bnn = ~A[11];
+                kt = A[ 3] & A[ 7];
+                c0 = A[24] ^ kt;
+                kt = A[ 7] | A[11];
+                c1 = A[ 3] ^ kt;
+                kt = bnn | A[15];
+                c2 = A[ 7] ^ kt;
+                kt = A[15] & A[24];
+                c3 = bnn ^ kt;
+                kt = A[24] | A[ 3];
+                c4 = A[15] ^ kt;
+                A[24] = c0;
+                A[ 3] = c1;
+                A[ 7] = c2;
+                A[11] = c3;
+                A[15] = c4;
+                bnn = ~A[16];
+                kt = bnn & A[20];
+                c0 = A[12] ^ kt;
+                kt = A[20] | A[ 4];
+                c1 = bnn ^ kt;
+                kt = A[ 4] & A[ 8];
+                c2 = A[20] ^ kt;
+                kt = A[ 8] | A[12];
+                c3 = A[ 4] ^ kt;
+                kt = A[12] & A[16];
+                c4 = A[ 8] ^ kt;
+                A[12] = c0;
+                A[16] = c1;
+                A[20] = c2;
+                A[ 4] = c3;
+                A[ 8] = c4;
+                A[ 0] = A[ 0] ^ RC[j + 1];
+                t = A[ 5];
+                A[ 5] = A[18];
+                A[18] = A[11];
+                A[11] = A[10];
+                A[10] = A[ 6];
+                A[ 6] = A[22];
+                A[22] = A[20];
+                A[20] = A[12];
+                A[12] = A[19];
+                A[19] = A[15];
+                A[15] = A[24];
+                A[24] = A[ 8];
+                A[ 8] = t;
+                t = A[ 1];
+                A[ 1] = A[ 9];
+                A[ 9] = A[14];
+                A[14] = A[ 2];
+                A[ 2] = A[13];
+                A[13] = A[23];
+                A[23] = A[ 4];
+                A[ 4] = A[21];
+                A[21] = A[16];
+                A[16] = A[ 3];
+                A[ 3] = A[17];
+                A[17] = A[ 7];
+                A[ 7] = t;
+            }
+
+                /*
+                * Invert some words back to normal representation.
+                */
+                A[ 1] = ~A[ 1];
+                A[ 2] = ~A[ 2];
+                A[ 8] = ~A[ 8];
+                A[12] = ~A[12];
+                A[17] = ~A[17];
+                A[20] = ~A[20];
+        }
+        
+        internal void i_shake256_init()
+        {
+            this.dptr = 0;
+
+            /*
+            * Representation of an all-ones uint64_t is the same regardless
+            * of local endianness.
+            */
+            // memset(this.A, 0, sizeof this.A);
+            this.A = new ulong[25];
+            this.dubf = new byte[200];
+
+            for (int i = 0; i < this.A.Length; i++) {
+                this.A[i] = 0;
+            }
+        }
+
+        internal void i_shake256_inject(byte[] insrc, int inarray, int len)
+        {
+            ulong dptr;
+
+            dptr = this.dptr;
+            while (len > 0) {
+                int clen, u;
+
+                clen = 136 - (int)dptr;
+                if (clen > len) {
+                    clen = len;
+                }
+                for (u = 0; u < clen; u ++) {
+                    int v;
+
+                    v = u + (int)dptr;
+                    this.A[v >> 3] ^= (ulong)insrc[inarray + u] << ((v & 7) << 3);
+                }
+                dptr += (ulong)clen;
+                inarray += clen;
+                len -= clen;
+                if (dptr == 136) {
+                    process_block(this.A);
+                    dptr = 0;
+                }
+            }
+            this.dptr = dptr;
+        }
+
+        internal void i_shake256_flip()
+        {
+            /*
+            * We apply padding and pre-XOR the value into the state. We
+            * set dptr to the end of the buffer, so that first call to
+            * shake_extract() will process the block.
+            */
+            uint v;
+
+            v = (uint)this.dptr;
+            this.A[v >> 3] ^= (ulong)0x1F << (int)((v & 7) << 3);
+            this.A[16] ^= (ulong)0x80 << 56;
+            this.dptr = 136;
+        }
+
+        internal void i_shake256_extract(byte[] outsrc, int outarray, int len)
+        {
+            ulong dptr;
+
+            dptr = this.dptr;
+            while (len > 0) {
+                int clen;
+
+                if (dptr == 136) {
+                    process_block(this.A);
+                    dptr = 0;
+                }
+                clen = 136 - (int)dptr;
+                if (clen > len) {
+                    clen = len;
+                }
+                len -= clen;
+                while (clen -- > 0) {
+                    outsrc[outarray ++] = (byte)(this.A[dptr >> 3] >> (int)((dptr & 7) << 3));
+                    dptr ++;
+                }
+            }
+            this.dptr = dptr;
+        }
+        
+    }
+}
diff --git a/crypto/src/pqc/crypto/falcon/SamplerZ.cs b/crypto/src/pqc/crypto/falcon/SamplerZ.cs
new file mode 100644
index 000000000..b43cd2c38
--- /dev/null
+++ b/crypto/src/pqc/crypto/falcon/SamplerZ.cs
@@ -0,0 +1,229 @@
+using System;
+
+namespace Org.BouncyCastle.Pqc.Crypto.Falcon
+{
+    class SamplerZ
+    {
+        FalconRNG p;
+        FalconFPR sigma_min;
+        FPREngine fpre;
+
+        internal SamplerZ(FalconRNG p, FalconFPR sigma_min, FPREngine fpre) {
+            this.p = p;
+            this.sigma_min = sigma_min;
+            this.fpre = fpre;
+        }
+
+        internal int Sample(FalconFPR mu, FalconFPR isigma) {
+            return this.sampler(mu, isigma);
+        }
+
+        /*
+        * Sample an integer value along a half-gaussian distribution centered
+        * on zero and standard deviation 1.8205, with a precision of 72 bits.
+        */
+        int gaussian0_sampler(FalconRNG p)
+        {
+
+            uint[] dist = {
+                10745844u,  3068844u,  3741698u,
+                5559083u,  1580863u,  8248194u,
+                2260429u, 13669192u,  2736639u,
+                708981u,  4421575u, 10046180u,
+                169348u,  7122675u,  4136815u,
+                30538u, 13063405u,  7650655u,
+                    4132u, 14505003u,  7826148u,
+                    417u, 16768101u, 11363290u,
+                    31u,  8444042u,  8086568u,
+                    1u, 12844466u,   265321u,
+                    0u,  1232676u, 13644283u,
+                    0u,    38047u,  9111839u,
+                    0u,      870u,  6138264u,
+                    0u,       14u, 12545723u,
+                    0u,        0u,  3104126u,
+                    0u,        0u,    28824u,
+                    0u,        0u,      198u,
+                    0u,        0u,        1u
+            };
+
+            uint v0, v1, v2, hi;
+            ulong lo;
+            int u;
+            int z;
+
+            /*
+            * Get a random 72-bit value, into three 24-bit limbs v0..v2.
+            */
+            lo = p.prng_get_u64();
+            hi = p.prng_get_u8();
+            v0 = (uint)lo & 0xFFFFFF;
+            v1 = (uint)(lo >> 24) & 0xFFFFFF;
+            v2 = (uint)(lo >> 48) | (hi << 16);
+
+            /*
+            * Sampled value is z, such that v0..v2 is lower than the first
+            * z elements of the table.
+            */
+            z = 0;
+            for (u = 0; u < dist.Length; u += 3) {
+                uint w0, w1, w2, cc;
+
+                w0 = dist[u + 2];
+                w1 = dist[u + 1];
+                w2 = dist[u + 0];
+                cc = (v0 - w0) >> 31;
+                cc = (v1 - w1 - cc) >> 31;
+                cc = (v2 - w2 - cc) >> 31;
+                z += (int)cc;
+            }
+            return z;
+
+        }
+
+        /*
+        * Sample a bit with probability exp(-x) for some x >= 0.
+        */
+        int BerExp(FalconRNG p, FalconFPR x, FalconFPR ccs)
+        {
+            int s, i;
+            FalconFPR r;
+            uint sw, w;
+            ulong z;
+
+            /*
+            * Reduce x modulo log(2): x = s*log(2) + r, with s an integer,
+            * and 0 <= r < log(2). Since x >= 0, we can use this.fpre.fpr_trunc().
+            */
+            s = (int)this.fpre.fpr_trunc(this.fpre.fpr_mul(x, this.fpre.fpr_inv_log2));
+            r = this.fpre.fpr_sub(x, this.fpre.fpr_mul(this.fpre.fpr_of(s), this.fpre.fpr_log2));
+
+            /*
+            * It may happen (quite rarely) that s >= 64; if sigma = 1.2
+            * (the minimum value for sigma), r = 0 and b = 1, then we get
+            * s >= 64 if the half-Gaussian produced a z >= 13, which happens
+            * with probability about 0.000000000230383991, which is
+            * approximatively equal to 2^(-32). In any case, if s >= 64,
+            * then BerExp will be non-zero with probability less than
+            * 2^(-64), so we can simply saturate s at 63.
+            */
+            sw = (uint)s;
+            sw ^= (uint)((sw ^ 63) & -((63 - sw) >> 31));
+            s = (int)sw;
+
+            /*
+            * Compute exp(-r); we know that 0 <= r < log(2) at this point, so
+            * we can use this.fpre.fpr_expm_p63(), which yields a result scaled to 2^63.
+            * We scale it up to 2^64, then right-shift it by s bits because
+            * we really want exp(-x) = 2^(-s)*exp(-r).
+            *
+            * The "-1" operation makes sure that the value fits on 64 bits
+            * (i.e. if r = 0, we may get 2^64, and we prefer 2^64-1 in that
+            * case). The bias is negligible since this.fpre.fpr_expm_p63() only computes
+            * with 51 bits of precision or so.
+            */
+            z = ((this.fpre.fpr_expm_p63(r, ccs) << 1) - 1) >> s;
+
+            /*
+            * Sample a bit with probability exp(-x). Since x = s*log(2) + r,
+            * exp(-x) = 2^-s * exp(-r), we compare lazily exp(-x) with the
+            * PRNG output to limit its consumption, the sign of the difference
+            * yields the expected result.
+            */
+            i = 64;
+            do {
+                i -= 8;
+                w = p.prng_get_u8() - ((uint)(z >> i) & 0xFF);
+            } while (w == 0 && i > 0);
+            return (int)(w >> 31);
+        }
+
+        /*
+        * The sampler produces a random integer that follows a discrete Gaussian
+        * distribution, centered on mu, and with standard deviation sigma. The
+        * provided parameter isigma is equal to 1/sigma.
+        *
+        * The value of sigma MUST lie between 1 and 2 (i.e. isigma lies between
+        * 0.5 and 1); in Falcon, sigma should always be between 1.2 and 1.9.
+        */
+        int sampler(FalconFPR mu, FalconFPR isigma)
+        {
+            int s;
+            FalconFPR r, dss, ccs;
+
+            /*
+            * Center is mu. We compute mu = s + r where s is an integer
+            * and 0 <= r < 1.
+            */
+            s = (int)this.fpre.fpr_floor(mu);
+            r = this.fpre.fpr_sub(mu, this.fpre.fpr_of(s));
+
+            /*
+            * dss = 1/(2*sigma^2) = 0.5*(isigma^2).
+            */
+            dss = this.fpre.fpr_half(this.fpre.fpr_sqr(isigma));
+
+            /*
+            * ccs = sigma_min / sigma = sigma_min * isigma.
+            */
+            ccs = this.fpre.fpr_mul(isigma, this.sigma_min);
+
+            /*
+            * We now need to sample on center r.
+            */
+            for (;;) {
+                int z0, z, b;
+                FalconFPR x;
+
+                /*
+                * Sample z for a Gaussian distribution. Then get a
+                * random bit b to turn the sampling into a bimodal
+                * distribution: if b = 1, we use z+1, otherwise we
+                * use -z. We thus have two situations:
+                *
+                *  - b = 1: z >= 1 and sampled against a Gaussian
+                *    centered on 1.
+                *  - b = 0: z <= 0 and sampled against a Gaussian
+                *    centered on 0.
+                */
+                z0 = gaussian0_sampler(this.p);
+                b = (int)this.p.prng_get_u8() & 1;
+                z = b + ((b << 1) - 1) * z0;
+
+                /*
+                * Rejection sampling. We want a Gaussian centered on r;
+                * but we sampled against a Gaussian centered on b (0 or
+                * 1). But we know that z is always in the range where
+                * our sampling distribution is greater than the Gaussian
+                * distribution, so rejection works.
+                *
+                * We got z with distribution:
+                *    G(z) = exp(-((z-b)^2)/(2*sigma0^2))
+                * We target distribution:
+                *    S(z) = exp(-((z-r)^2)/(2*sigma^2))
+                * Rejection sampling works by keeping the value z with
+                * probability S(z)/G(z), and starting again otherwise.
+                * This requires S(z) <= G(z), which is the case here.
+                * Thus, we simply need to keep our z with probability:
+                *    P = exp(-x)
+                * where:
+                *    x = ((z-r)^2)/(2*sigma^2) - ((z-b)^2)/(2*sigma0^2)
+                *
+                * Here, we scale up the Bernouilli distribution, which
+                * makes rejection more probable, but makes rejection
+                * rate sufficiently decorrelated from the Gaussian
+                * center and standard deviation that the whole sampler
+                * can be said to be constant-time.
+                */
+                x = this.fpre.fpr_mul(this.fpre.fpr_sqr(this.fpre.fpr_sub(this.fpre.fpr_of(z), r)), dss);
+                x = this.fpre.fpr_sub(x, this.fpre.fpr_mul(this.fpre.fpr_of(z0 * z0), this.fpre.fpr_inv_2sqrsigma0));
+                if (BerExp(this.p, x, ccs) != 0) {
+                    /*
+                    * Rejection sampling was centered on r, but the
+                    * actual center is mu = s + r.
+                    */
+                    return s + z;
+                }
+            }
+        }
+    }
+}