From ad341610afdfc3d9e8e34f552b05d3d0d94660fe Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Thu, 2 Aug 2018 16:41:37 +0700
Subject: [PATCH 1/8] Signed-digit multi-comb for ecmult_gen

- see section 3.3 of https://eprint.iacr.org/2012/309
---
 src/ecmult_gen.h       |  64 +++++++++++++++++
 src/ecmult_gen_impl.h  | 157 +++++++++++++++++++++++++++++++++++++++--
 src/scalar.h           |   2 +
 src/scalar_4x64_impl.h |  35 +++++++++
 src/scalar_8x32_impl.h |  39 ++++++++++
 src/scalar_low_impl.h  |  24 +++++++
 6 files changed, 317 insertions(+), 4 deletions(-)

diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
index 539618dcbb..6b456ddda8 100644
--- a/src/ecmult_gen.h
+++ b/src/ecmult_gen.h
@@ -10,6 +10,57 @@
 #include "scalar.h"
 #include "group.h"
 
+#if !defined(USE_ECMULT_STATIC_PRECOMPUTATION) && !defined(USE_BASIC_CONFIG)
+#define USE_COMB 1
+#endif
+
+#ifdef USE_COMB
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+
+  /* We need to control these values for exhaustive tests because
+   * the tables cannot have infinities in them (secp256k1_ge_storage
+   * doesn't support infinities) */
+#  if EXHAUSTIVE_TEST_ORDER > 32
+#    define COMB_BLOCKS 52
+#    define COMB_TEETH 5
+#  elif EXHAUSTIVE_TEST_ORDER > 16
+#    define COMB_BLOCKS 64
+#    define COMB_TEETH 4
+#  elif EXHAUSTIVE_TEST_ORDER > 8
+#    define COMB_BLOCKS 86
+#    define COMB_TEETH 3
+#  elif EXHAUSTIVE_TEST_ORDER > 4
+#    define COMB_BLOCKS 128
+#    define COMB_TEETH 2
+#  else
+#    define COMB_BLOCKS 256
+#    define COMB_TEETH 1
+#  endif
+
+#  define COMB_SPACING 1
+
+#else
+
+  /* COMB_BLOCKS, COMB_TEETH, COMB_SPACING must all be positive and the product of the three (COMB_BITS)
+   * must evaluate to a value in the range [256, 288]. The resulting memory usage for precomputation
+   * will be COMB_POINTS_TOTAL * sizeof(secp256k1_ge_storage). */
+  #define COMB_BLOCKS 4
+  #define COMB_TEETH 5
+  #define COMB_SPACING 13
+
+#endif
+
+/* The remaining COMB_* parameters are derived values, don't modify these. */
+#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
+#define COMB_GROUPED ((COMB_SPACING == 1) && ((32 % COMB_TEETH) == 0))
+#define COMB_OFFSET (COMB_BITS == 256)
+#define COMB_POINTS (1 << (COMB_TEETH - 1))
+#define COMB_POINTS_TOTAL (COMB_BLOCKS * COMB_POINTS)
+#define COMB_MASK (COMB_POINTS - 1)
+
+#else
+
 #if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8
 #  error "Set ECMULT_GEN_PREC_BITS to 2, 4 or 8."
 #endif
@@ -17,7 +68,19 @@
 #define ECMULT_GEN_PREC_G (1 << ECMULT_GEN_PREC_B)
 #define ECMULT_GEN_PREC_N (256 / ECMULT_GEN_PREC_B)
 
+#endif
+
 typedef struct {
+#ifdef USE_COMB
+    /* Precomputation data for the signed-digit multi-comb algorithm as described in section 3.3 of:
+     *     "Fast and compact elliptic-curve cryptography", Mike Hamburg
+     *         (https://eprint.iacr.org/2012/309)
+     */
+    secp256k1_ge_storage (*prec)[COMB_BLOCKS][COMB_POINTS];
+#if COMB_OFFSET
+    secp256k1_ge offset;
+#endif
+#else
     /* For accelerating the computation of a*G:
      * To harden against timing attacks, use the following mechanism:
      * * Break up the multiplicand into groups of PREC_B bits, called n_0, n_1, n_2, ..., n_(PREC_N-1).
@@ -31,6 +94,7 @@ typedef struct {
      * the intermediate sums while computing a*G.
      */
     secp256k1_ge_storage (*prec)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G]; /* prec[j][i] = (PREC_G)^j * i * G + U_i */
+#endif
     secp256k1_scalar blind;
     secp256k1_gej initial;
 } secp256k1_ecmult_gen_context;
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 384a67faed..ba8cf07985 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -28,10 +28,16 @@ static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context *ctx)
 
 static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx, void **prealloc) {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#ifdef USE_COMB
+    secp256k1_ge prec[COMB_POINTS_TOTAL + COMB_OFFSET];
+    secp256k1_gej u, sum;
+    int block, index, spacing, stride, tooth;
+#else
     secp256k1_ge prec[ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G];
     secp256k1_gej gj;
     secp256k1_gej nums_gej;
     int i, j;
+#endif
     size_t const prealloc_size = SECP256K1_ECMULT_GEN_CONTEXT_PREALLOCATED_SIZE;
     void* const base = *prealloc;
 #endif
@@ -40,6 +46,54 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
         return;
     }
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#ifdef USE_COMB
+    ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])manual_alloc(prealloc, prealloc_size, base, prealloc_size);
+
+    /* get the generator */
+    secp256k1_gej_set_ge(&u, &secp256k1_ge_const_g);
+
+    /* compute prec. */
+    {
+        secp256k1_gej ds[COMB_TEETH];
+        secp256k1_gej vs[COMB_POINTS_TOTAL + COMB_OFFSET];
+        int vs_pos = 0;
+
+        for (block = 0; block < COMB_BLOCKS; ++block) {
+            secp256k1_gej_set_infinity(&sum);
+            for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
+                secp256k1_gej_add_var(&sum, &sum, &u, NULL);
+                secp256k1_gej_double_var(&u, &u, NULL);
+                ds[tooth] = u;
+                for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+                    secp256k1_gej_double_var(&u, &u, NULL);
+                }
+            }
+            secp256k1_gej_neg(&vs[vs_pos++], &sum);
+            for (tooth = 0; tooth < (COMB_TEETH - 1); ++tooth) {
+                stride = 1 << tooth;
+                for (index = 0; index < stride; ++index, ++vs_pos) {
+                    secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
+                }
+            }
+        }
+        VERIFY_CHECK(vs_pos == COMB_POINTS_TOTAL);
+#if COMB_OFFSET
+        vs[COMB_POINTS_TOTAL] = ds[COMB_TEETH - 1];
+#endif
+        secp256k1_ge_set_all_gej_var(prec, vs, COMB_POINTS_TOTAL + COMB_OFFSET);
+    }
+
+    for (block = 0; block < COMB_BLOCKS; ++block) {
+        for (index = 0; index < COMB_POINTS; ++index) {
+            secp256k1_ge_to_storage(&(*ctx->prec)[block][index], &prec[block * COMB_POINTS + index]);
+        }
+    }
+
+#if COMB_OFFSET
+    ctx->offset = prec[COMB_POINTS_TOTAL];
+#endif
+
+#else
     ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])manual_alloc(prealloc, prealloc_size, base, prealloc_size);
 
     /* get the generator */
@@ -94,6 +148,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
             secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*ECMULT_GEN_PREC_G + i]);
         }
     }
+#endif
 #else
     (void)prealloc;
     ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])secp256k1_ecmult_static_context;
@@ -107,16 +162,26 @@ static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_cont
 
 static void secp256k1_ecmult_gen_context_finalize_memcpy(secp256k1_ecmult_gen_context *dst, const secp256k1_ecmult_gen_context *src) {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#ifdef USE_COMB
     if (src->prec != NULL) {
         /* We cast to void* first to suppress a -Wcast-align warning. */
-        dst->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
+        dst->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
     }
 #else
-    (void)dst, (void)src;
+    if (src->prec != NULL) {
+        dst->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
+    }
+#endif
 #endif
+    (void)dst, (void)src;
 }
 
 static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
+#ifdef USE_COMB
+#if COMB_OFFSET
+    secp256k1_ge_clear(&ctx->offset);
+#endif
+#endif
     secp256k1_scalar_clear(&ctx->blind);
     secp256k1_gej_clear(&ctx->initial);
     ctx->prec = NULL;
@@ -126,7 +191,70 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
     secp256k1_ge add;
     secp256k1_ge_storage adds;
     secp256k1_scalar gnb;
-    int bits;
+    uint32_t bits;
+
+#ifdef USE_COMB
+
+    uint32_t abs, bit_pos, block, comb_off, index, sign;
+#if !COMB_GROUPED
+    uint32_t bit, tooth;
+#endif
+    uint32_t recoded[9];
+    secp256k1_fe neg;
+
+    memset(&adds, 0, sizeof(adds));
+    *r = ctx->initial;
+
+    /* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */
+    secp256k1_scalar_add(&gnb, gn, &ctx->blind);
+    secp256k1_scalar_signed_recoding(recoded, &gnb, COMB_BITS + COMB_OFFSET);
+
+    comb_off = COMB_SPACING - 1;
+    for (;;) {
+        bit_pos = comb_off;
+        for (block = 0; block < COMB_BLOCKS; ++block) {
+#if COMB_GROUPED
+            bits = (recoded[bit_pos >> 5] >> (bit_pos & 0x1F)) & ((1 << COMB_TEETH) - 1);
+            bit_pos += COMB_SPACING * COMB_TEETH;
+#else
+            bits = 0;
+            for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
+                bit = (recoded[bit_pos >> 5] >> (bit_pos & 0x1F)) & 1;
+                bits |= bit << tooth;
+                bit_pos += COMB_SPACING;
+            }
+#endif
+
+            sign = (bits >> (COMB_TEETH - 1)) & 1;
+            abs = (bits ^ -sign) & COMB_MASK;
+
+            VERIFY_CHECK(sign == 0 || sign == 1);
+            VERIFY_CHECK(abs < COMB_POINTS);
+
+            for (index = 0; index < COMB_POINTS; ++index) {
+                secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[block][index], index == abs);
+            }
+
+            secp256k1_ge_from_storage(&add, &adds);
+            secp256k1_fe_negate(&neg, &add.y, 1);
+            secp256k1_fe_cmov(&add.y, &neg, sign);
+
+            secp256k1_gej_add_ge(r, r, &add);
+        }
+
+        if (comb_off-- == 0) {
+            break;
+        }
+
+        secp256k1_gej_double(r, r);
+    }
+
+    secp256k1_fe_clear(&neg);
+    memset(recoded, 0, sizeof(recoded));
+    abs = 0;
+    sign = 0;
+
+#else
     int i, j;
     memset(&adds, 0, sizeof(adds));
     *r = ctx->initial;
@@ -146,18 +274,23 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
              *    by Dag Arne Osvik, Adi Shamir, and Eran Tromer
              *    (https://www.tau.ac.il/~tromer/papers/cache.pdf)
              */
-            secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[j][i], i == bits);
+            secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[j][i], (uint32_t)i == bits);
         }
         secp256k1_ge_from_storage(&add, &adds);
         secp256k1_gej_add_ge(r, r, &add);
     }
+#endif
     bits = 0;
     secp256k1_ge_clear(&add);
+    memset(&adds, 0, sizeof(adds));
     secp256k1_scalar_clear(&gnb);
 }
 
 /* Setup blinding values for secp256k1_ecmult_gen. */
 static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
+#ifdef USE_COMB
+    int spacing;
+#endif
     secp256k1_scalar b;
     secp256k1_gej gb;
     secp256k1_fe s;
@@ -170,6 +303,14 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
         secp256k1_gej_set_ge(&ctx->initial, &secp256k1_ge_const_g);
         secp256k1_gej_neg(&ctx->initial, &ctx->initial);
         secp256k1_scalar_set_int(&ctx->blind, 1);
+#ifdef USE_COMB
+        for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+            secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
+        }
+#if COMB_OFFSET
+        secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
+#endif
+#endif
     }
     /* The prior blinding value (if not reset) is chained forward by including it in the hash. */
     secp256k1_scalar_get_b32(nonce32, &ctx->blind);
@@ -201,6 +342,14 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
     secp256k1_scalar_negate(&b, &b);
     ctx->blind = b;
     ctx->initial = gb;
+#ifdef USE_COMB
+    for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+        secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
+    }
+#if COMB_OFFSET
+    secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
+#endif
+#endif
     secp256k1_scalar_clear(&b);
     secp256k1_gej_clear(&gb);
 }
diff --git a/src/scalar.h b/src/scalar.h
index aaaa3d8827..a9dd5ca139 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -102,4 +102,6 @@ static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_
 /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
 static void secp256k1_scalar_cmov(secp256k1_scalar *r, const secp256k1_scalar *a, int flag);
 
+static void secp256k1_scalar_signed_recoding(uint32_t r[9], const secp256k1_scalar *a, int bits);
+
 #endif /* SECP256K1_SCALAR_H */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index a1def26fca..88cd85a9d1 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -865,4 +865,39 @@ SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a)
     return !(a->d[0] & 1);
 }
 
+SECP256K1_INLINE static void secp256k1_scalar_signed_recoding(uint32_t r[9], const secp256k1_scalar *a, int bits) {
+    uint64_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3], a4;
+    uint64_t mask = (a0 & 1ULL) - 1ULL;
+    uint128_t c;
+
+    VERIFY_CHECK(256 < bits && bits <= 288);
+
+    c  = (uint128_t)a0 + (SECP256K1_N_0 & mask);
+    a0 = (uint64_t)c; c >>= 64;
+    c += (uint128_t)a1 + (SECP256K1_N_1 & mask);
+    a1 = (uint64_t)c; c >>= 64;
+    c += (uint128_t)a2 + (SECP256K1_N_2 & mask);
+    a2 = (uint64_t)c; c >>= 64;
+    c += (uint128_t)a3 + (SECP256K1_N_3 & mask);
+    a3 = (uint64_t)c; c >>= 64;
+    a4 = (uint64_t)c;
+    VERIFY_CHECK(a0 & 1ULL);
+    VERIFY_CHECK(a4 < 2ULL);
+
+    a0 = (a0 >> 1) | (a1 << 63);
+    a1 = (a1 >> 1) | (a2 << 63);
+    a2 = (a2 >> 1) | (a3 << 63);
+    a3 = (a3 >> 1) | (a4 << 63);
+
+    r[0] = (uint32_t)a0;
+    r[1] = (uint32_t)(a0 >> 32);
+    r[2] = (uint32_t)a1;
+    r[3] = (uint32_t)(a1 >> 32);
+    r[4] = (uint32_t)a2;
+    r[5] = (uint32_t)(a2 >> 32);
+    r[6] = (uint32_t)a3;
+    r[7] = (uint32_t)(a3 >> 32);
+    r[8] = 1UL << (bits - 257);
+}
+
 #endif /* SECP256K1_SCALAR_REPR_IMPL_H */
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 62c7ae7156..6dcdcbc7d1 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -732,4 +732,43 @@ SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar *a)
     return !(a->d[0] & 1);
 }
 
+SECP256K1_INLINE static void secp256k1_scalar_signed_recoding(uint32_t r[9], const secp256k1_scalar *a, int bits) {
+    uint32_t a0 = a->d[0], a1 = a->d[1], a2 = a->d[2], a3 = a->d[3],
+             a4 = a->d[4], a5 = a->d[5], a6 = a->d[6], a7 = a->d[7], a8;
+    uint32_t mask = (a0 & 1UL) - 1UL;
+    uint64_t c;
+
+    VERIFY_CHECK(256 < bits && bits <= 288);
+
+    c  = (uint64_t)a0 + (SECP256K1_N_0 & mask);
+    a0 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a1 + (SECP256K1_N_1 & mask);
+    a1 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a2 + (SECP256K1_N_2 & mask);
+    a2 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a3 + (SECP256K1_N_3 & mask);
+    a3 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a4 + (SECP256K1_N_4 & mask);
+    a4 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a5 + (SECP256K1_N_5 & mask);
+    a5 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a6 + (SECP256K1_N_6 & mask);
+    a6 = (uint32_t)c; c >>= 32;
+    c += (uint64_t)a7 + (SECP256K1_N_7 & mask);
+    a7 = (uint32_t)c; c >>= 32;
+    a8 = (uint32_t)c;
+    VERIFY_CHECK(a0 & 1UL);
+    VERIFY_CHECK(a8 < 2UL);
+
+    r[0] = (a0 >> 1) | (a1 << 31);
+    r[1] = (a1 >> 1) | (a2 << 31);
+    r[2] = (a2 >> 1) | (a3 << 31);
+    r[3] = (a3 >> 1) | (a4 << 31);
+    r[4] = (a4 >> 1) | (a5 << 31);
+    r[5] = (a5 >> 1) | (a6 << 31);
+    r[6] = (a6 >> 1) | (a7 << 31);
+    r[7] = (a7 >> 1) | (a8 << 31);
+    r[8] = 1UL << (bits - 257);
+}
+
 #endif /* SECP256K1_SCALAR_REPR_IMPL_H */
diff --git a/src/scalar_low_impl.h b/src/scalar_low_impl.h
index 7176f0b2ca..d2f2ae81ee 100644
--- a/src/scalar_low_impl.h
+++ b/src/scalar_low_impl.h
@@ -136,4 +136,28 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
     secp256k1_scalar_inverse(r, x);
 }
 
+SECP256K1_INLINE static void secp256k1_scalar_signed_recoding(uint32_t r[9], const secp256k1_scalar *a, int bits) {
+    uint32_t a0 = *a, a1;
+    uint32_t mask = (a0 & 1UL) - 1UL;
+    uint64_t c;
+
+    VERIFY_CHECK(256 < bits && bits <= 288);
+
+    c  = (uint64_t)a0 + (EXHAUSTIVE_TEST_ORDER & mask);
+    a0 = (uint32_t)c; c >>= 32;
+    a1 = (uint32_t)c;
+    VERIFY_CHECK(a0 & 1UL);
+    VERIFY_CHECK(a1 < 2UL);
+
+    r[0] = (a0 >> 1) | (a1 << 31);
+    r[1] = 0;
+    r[2] = 0;
+    r[3] = 0;
+    r[4] = 0;
+    r[5] = 0;
+    r[6] = 0;
+    r[7] = 0;
+    r[8] = 1UL << (bits - 257);
+}
+
 #endif /* SECP256K1_SCALAR_REPR_IMPL_H */

From e0d7b1229e733b0b608d947ff915586d8c7df7ca Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Sun, 5 Aug 2018 13:08:07 +0700
Subject: [PATCH 2/8] Support static precomputation with multi-comb

---
 src/ecmult_gen.h      |  2 --
 src/ecmult_gen_impl.h | 10 ++++++++++
 src/gen_context.c     | 43 +++++++++++++++++++++++++++++++++++++------
 3 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
index 6b456ddda8..b0252574c4 100644
--- a/src/ecmult_gen.h
+++ b/src/ecmult_gen.h
@@ -10,9 +10,7 @@
 #include "scalar.h"
 #include "group.h"
 
-#if !defined(USE_ECMULT_STATIC_PRECOMPUTATION) && !defined(USE_BASIC_CONFIG)
 #define USE_COMB 1
-#endif
 
 #ifdef USE_COMB
 
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index ba8cf07985..c70f5c478a 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -151,7 +151,14 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
 #endif
 #else
     (void)prealloc;
+#if USE_COMB
+    ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])secp256k1_ecmult_gen_ctx_prec;
+#if COMB_OFFSET
+    secp256k1_ge_from_storage(&ctx->offset, &secp256k1_ecmult_gen_ctx_offset);
+#endif
+#else
     ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])secp256k1_ecmult_static_context;
+#endif
 #endif
     secp256k1_ecmult_gen_blind(ctx, NULL);
 }
@@ -167,6 +174,9 @@ static void secp256k1_ecmult_gen_context_finalize_memcpy(secp256k1_ecmult_gen_co
         /* We cast to void* first to suppress a -Wcast-align warning. */
         dst->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
     }
+#if COMB_OFFSET
+    dst->offset = src->offset;
+#endif
 #else
     if (src->prec != NULL) {
         dst->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
diff --git a/src/gen_context.c b/src/gen_context.c
index 05e7dee175..403e615f73 100644
--- a/src/gen_context.c
+++ b/src/gen_context.c
@@ -37,6 +37,18 @@ int main(int argc, char **argv) {
     int inner;
     int outer;
     FILE* fp;
+    const char *SC_FORMAT = "    SC(%uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu)";
+
+#if USE_COMB
+    const int blocks = COMB_BLOCKS;
+    const int points = COMB_POINTS;
+#if COMB_OFFSET
+    secp256k1_ge_storage offset;
+#endif
+#else
+    const int blocks = ECMULT_GEN_PREC_N;
+    const int points = ECMULT_GEN_PREC_G;
+#endif
 
     (void)argc;
     (void)argv;
@@ -51,26 +63,45 @@ int main(int argc, char **argv) {
     fprintf(fp, "#define SECP256K1_ECMULT_STATIC_CONTEXT_H\n");
     fprintf(fp, "#include \"src/group.h\"\n");
     fprintf(fp, "#define SC SECP256K1_GE_STORAGE_CONST\n");
+    fprintf(fp, "#if USE_COMB != %i\n", USE_COMB);
+    fprintf(fp, "   #error configuration mismatch, invalid USE_COMB. Try deleting ecmult_static_context.h before the build.\n");
+    fprintf(fp, "#endif\n");
+#if USE_COMB
+    fprintf(fp, "#if COMB_BLOCKS != %i || COMB_TEETH != %i || COMB_SPACING != %i\n", COMB_BLOCKS, COMB_TEETH, COMB_SPACING);
+    fprintf(fp, "   #error configuration mismatch, invalid COMB_BLOCKS, COMB_TEETH, or COMB_SPACING. Try deleting ecmult_static_context.h before the build.\n");
+    fprintf(fp, "#endif\n");
+#else
     fprintf(fp, "#if ECMULT_GEN_PREC_N != %d || ECMULT_GEN_PREC_G != %d\n", ECMULT_GEN_PREC_N, ECMULT_GEN_PREC_G);
     fprintf(fp, "   #error configuration mismatch, invalid ECMULT_GEN_PREC_N, ECMULT_GEN_PREC_G. Try deleting ecmult_static_context.h before the build.\n");
     fprintf(fp, "#endif\n");
-    fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_static_context[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G] = {\n");
+#endif
 
     base = checked_malloc(&default_error_callback, SECP256K1_ECMULT_GEN_CONTEXT_PREALLOCATED_SIZE);
     prealloc = base;
     secp256k1_ecmult_gen_context_init(&ctx);
     secp256k1_ecmult_gen_context_build(&ctx, &prealloc);
-    for(outer = 0; outer != ECMULT_GEN_PREC_N; outer++) {
+
+#if USE_COMB
+#if COMB_OFFSET
+    secp256k1_ge_to_storage(&offset, &ctx.offset);
+    fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_gen_ctx_offset =\n");
+    fprintf(fp, SC_FORMAT, SECP256K1_GE_STORAGE_CONST_GET(offset));
+    fprintf(fp, ";\n");
+#endif
+#endif
+
+    fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_gen_ctx_prec[%i][%i] = {\n", blocks, points);
+    for(outer = 0; outer != blocks; outer++) {
         fprintf(fp,"{\n");
-        for(inner = 0; inner != ECMULT_GEN_PREC_G; inner++) {
-            fprintf(fp,"    SC(%uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu)", SECP256K1_GE_STORAGE_CONST_GET((*ctx.prec)[outer][inner]));
-            if (inner != ECMULT_GEN_PREC_G - 1) {
+        for(inner = 0; inner != points; inner++) {
+            fprintf(fp, SC_FORMAT, SECP256K1_GE_STORAGE_CONST_GET((*ctx.prec)[outer][inner]));
+            if (inner != (points - 1)) {
                 fprintf(fp,",\n");
             } else {
                 fprintf(fp,"\n");
             }
         }
-        if (outer != ECMULT_GEN_PREC_N - 1) {
+        if (outer != (blocks - 1)) {
             fprintf(fp,"},\n");
         } else {
             fprintf(fp,"}\n");

From e81a78eba8248a87889fe4fada15b7e589f01d4c Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Sun, 23 Sep 2018 12:10:19 +0700
Subject: [PATCH 3/8] Add comments for context offset

---
 src/ecmult_gen.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
index b0252574c4..4066c831d4 100644
--- a/src/ecmult_gen.h
+++ b/src/ecmult_gen.h
@@ -76,6 +76,11 @@ typedef struct {
      */
     secp256k1_ge_storage (*prec)[COMB_BLOCKS][COMB_POINTS];
 #if COMB_OFFSET
+    /* Signed recoding of a 256-bit scalar must be at least 257 bits, with the top bit always 1. We
+     * support a 256-bit comb over a 257-bit recoding by pre-adding an 'offset' value to the context's
+     * 'initial' value, to account for the high 1 bit. Note that the 'offset' is calculated to allow
+     * for the (COMB_SPACING - 1) doublings in the _ecmult_gen ladder.
+     */
     secp256k1_ge offset;
 #endif
 #else

From 0f67709e07e9f24a6c498489c432f6e3607a8501 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Mon, 24 Sep 2018 20:06:34 +0700
Subject: [PATCH 4/8] Reduce side-channels from single-bit reads

---
 src/ecmult_gen_impl.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index c70f5c478a..49af00e010 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -224,13 +224,14 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
         bit_pos = comb_off;
         for (block = 0; block < COMB_BLOCKS; ++block) {
 #if COMB_GROUPED
-            bits = (recoded[bit_pos >> 5] >> (bit_pos & 0x1F)) & ((1 << COMB_TEETH) - 1);
-            bit_pos += COMB_SPACING * COMB_TEETH;
+            bits = recoded[bit_pos >> 5] >> (bit_pos & 0x1F);
+            bit_pos += COMB_TEETH;
 #else
             bits = 0;
             for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
-                bit = (recoded[bit_pos >> 5] >> (bit_pos & 0x1F)) & 1;
-                bits |= bit << tooth;
+                bit = recoded[bit_pos >> 5] >> (bit_pos & 0x1F);
+                bits &= ~(1 << tooth);
+                bits ^= bit << tooth;
                 bit_pos += COMB_SPACING;
             }
 #endif

From ad91bd9bd4996346ef96eec671f5b0b6b047d65f Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Mon, 24 Sep 2018 20:18:55 +0700
Subject: [PATCH 5/8] Avoid unnecessary doublings in precomputation

---
 src/ecmult_gen_impl.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 49af00e010..c473774ac5 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -64,8 +64,10 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
                 secp256k1_gej_add_var(&sum, &sum, &u, NULL);
                 secp256k1_gej_double_var(&u, &u, NULL);
                 ds[tooth] = u;
-                for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
-                    secp256k1_gej_double_var(&u, &u, NULL);
+                if (block + tooth != COMB_BLOCKS + COMB_TEETH - 2) {
+                    for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+                        secp256k1_gej_double_var(&u, &u, NULL);
+                    }
                 }
             }
             secp256k1_gej_neg(&vs[vs_pos++], &sum);

From 1b45383449c32bb3d37e49c5667f5e5d272c2615 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Mon, 24 Sep 2018 20:35:31 +0700
Subject: [PATCH 6/8] Add precompiler guards on comb constants

---
 src/ecmult_gen.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
index 4066c831d4..890cfce0c6 100644
--- a/src/ecmult_gen.h
+++ b/src/ecmult_gen.h
@@ -49,6 +49,16 @@
 
 #endif
 
+#if !(1 <= COMB_BLOCKS && COMB_BLOCKS <= 256)
+#  error "COMB_BLOCKS must be in the range [1, 256]"
+#endif
+#if !(1 <= COMB_TEETH && COMB_TEETH <= 8)
+#  error "COMB_TEETH must be in the range [1, 8]"
+#endif
+#if !(1 <= COMB_SPACING && COMB_SPACING <= 256)
+#  error "COMB_SPACING must be in the range [1, 256]"
+#endif
+
 /* The remaining COMB_* parameters are derived values, don't modify these. */
 #define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
 #define COMB_GROUPED ((COMB_SPACING == 1) && ((32 % COMB_TEETH) == 0))
@@ -57,6 +67,10 @@
 #define COMB_POINTS_TOTAL (COMB_BLOCKS * COMB_POINTS)
 #define COMB_MASK (COMB_POINTS - 1)
 
+#if !(256 <= COMB_BITS && COMB_BITS <= 288)
+#  error "COMB_BITS must be in the range [256, 288]"
+#endif
+
 #else
 
 #if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8

From 63d2cf53f02de4b692b2ee96d289492559471f8f Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter@wuille.net>
Date: Fri, 29 Jan 2021 19:45:17 -0800
Subject: [PATCH 7/8] Remove old ecmult_gen code and always use multi-comb

---
 README.md             |   4 +-
 src/ecmult_gen.h      |  31 -----------
 src/ecmult_gen_impl.h | 121 ++++--------------------------------------
 src/gen_context.c     |  16 ------
 4 files changed, 11 insertions(+), 161 deletions(-)

diff --git a/README.md b/README.md
index 197a56fff8..2acc4f9f96 100644
--- a/README.md
+++ b/README.md
@@ -49,13 +49,11 @@ Implementation details
   * Use a much larger window for multiples of G, using precomputed multiples.
   * Use Shamir's trick to do the multiplication with the public key and the generator simultaneously.
   * Use secp256k1's efficiently-computable endomorphism to split the P multiplicand into 2 half-sized ones.
-* Point multiplication for signing
-  * Use a precomputed table of multiples of powers of 16 multiplied with the generator, so general multiplication becomes a series of additions.
+* Point multiplication for signing using Mike Hamburg's signed-digit multi-comb method (see https://eprint.iacr.org/2012/309, section 3.3)
   * Intended to be completely free of timing sidechannels for secret-key operations (on reasonable hardware/toolchains)
     * Access the table with branch-free conditional moves so memory access is uniform.
     * No data-dependent branches
   * Optional runtime blinding which attempts to frustrate differential power analysis.
-  * The precomputed tables add and eventually subtract points for which no known scalar (secret key) is known, preventing even an attacker with control over the secret key used to control the data internally.
 
 Build steps
 -----------
diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
index 890cfce0c6..e9c60ba928 100644
--- a/src/ecmult_gen.h
+++ b/src/ecmult_gen.h
@@ -10,10 +10,6 @@
 #include "scalar.h"
 #include "group.h"
 
-#define USE_COMB 1
-
-#ifdef USE_COMB
-
 #if defined(EXHAUSTIVE_TEST_ORDER)
 
   /* We need to control these values for exhaustive tests because
@@ -71,19 +67,7 @@
 #  error "COMB_BITS must be in the range [256, 288]"
 #endif
 
-#else
-
-#if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8
-#  error "Set ECMULT_GEN_PREC_BITS to 2, 4 or 8."
-#endif
-#define ECMULT_GEN_PREC_B ECMULT_GEN_PREC_BITS
-#define ECMULT_GEN_PREC_G (1 << ECMULT_GEN_PREC_B)
-#define ECMULT_GEN_PREC_N (256 / ECMULT_GEN_PREC_B)
-
-#endif
-
 typedef struct {
-#ifdef USE_COMB
     /* Precomputation data for the signed-digit multi-comb algorithm as described in section 3.3 of:
      *     "Fast and compact elliptic-curve cryptography", Mike Hamburg
      *         (https://eprint.iacr.org/2012/309)
@@ -96,21 +80,6 @@ typedef struct {
      * for the (COMB_SPACING - 1) doublings in the _ecmult_gen ladder.
      */
     secp256k1_ge offset;
-#endif
-#else
-    /* For accelerating the computation of a*G:
-     * To harden against timing attacks, use the following mechanism:
-     * * Break up the multiplicand into groups of PREC_B bits, called n_0, n_1, n_2, ..., n_(PREC_N-1).
-     * * Compute sum(n_i * (PREC_G)^i * G + U_i, i=0 ... PREC_N-1), where:
-     *   * U_i = U * 2^i, for i=0 ... PREC_N-2
-     *   * U_i = U * (1-2^(PREC_N-1)), for i=PREC_N-1
-     *   where U is a point with no known corresponding scalar. Note that sum(U_i, i=0 ... PREC_N-1) = 0.
-     * For each i, and each of the PREC_G possible values of n_i, (n_i * (PREC_G)^i * G + U_i) is
-     * precomputed (call it prec(i, n_i)). The formula now becomes sum(prec(i, n_i), i=0 ... PREC_N-1).
-     * None of the resulting prec group elements have a known scalar, and neither do any of
-     * the intermediate sums while computing a*G.
-     */
-    secp256k1_ge_storage (*prec)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G]; /* prec[j][i] = (PREC_G)^j * i * G + U_i */
 #endif
     secp256k1_scalar blind;
     secp256k1_gej initial;
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index c473774ac5..728a8945ea 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -28,16 +28,9 @@ static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context *ctx)
 
 static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx, void **prealloc) {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
-#ifdef USE_COMB
     secp256k1_ge prec[COMB_POINTS_TOTAL + COMB_OFFSET];
     secp256k1_gej u, sum;
     int block, index, spacing, stride, tooth;
-#else
-    secp256k1_ge prec[ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G];
-    secp256k1_gej gj;
-    secp256k1_gej nums_gej;
-    int i, j;
-#endif
     size_t const prealloc_size = SECP256K1_ECMULT_GEN_CONTEXT_PREALLOCATED_SIZE;
     void* const base = *prealloc;
 #endif
@@ -46,7 +39,6 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
         return;
     }
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
-#ifdef USE_COMB
     ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])manual_alloc(prealloc, prealloc_size, base, prealloc_size);
 
     /* get the generator */
@@ -95,72 +87,12 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
     ctx->offset = prec[COMB_POINTS_TOTAL];
 #endif
 
-#else
-    ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])manual_alloc(prealloc, prealloc_size, base, prealloc_size);
-
-    /* get the generator */
-    secp256k1_gej_set_ge(&gj, &secp256k1_ge_const_g);
-
-    /* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
-    {
-        static const unsigned char nums_b32[33] = "The scalar for this x is unknown";
-        secp256k1_fe nums_x;
-        secp256k1_ge nums_ge;
-        int r;
-        r = secp256k1_fe_set_b32(&nums_x, nums_b32);
-        (void)r;
-        VERIFY_CHECK(r);
-        r = secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0);
-        (void)r;
-        VERIFY_CHECK(r);
-        secp256k1_gej_set_ge(&nums_gej, &nums_ge);
-        /* Add G to make the bits in x uniformly distributed. */
-        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g, NULL);
-    }
-
-    /* compute prec. */
-    {
-        secp256k1_gej precj[ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G]; /* Jacobian versions of prec. */
-        secp256k1_gej gbase;
-        secp256k1_gej numsbase;
-        gbase = gj; /* PREC_G^j * G */
-        numsbase = nums_gej; /* 2^j * nums. */
-        for (j = 0; j < ECMULT_GEN_PREC_N; j++) {
-            /* Set precj[j*PREC_G .. j*PREC_G+(PREC_G-1)] to (numsbase, numsbase + gbase, ..., numsbase + (PREC_G-1)*gbase). */
-            precj[j*ECMULT_GEN_PREC_G] = numsbase;
-            for (i = 1; i < ECMULT_GEN_PREC_G; i++) {
-                secp256k1_gej_add_var(&precj[j*ECMULT_GEN_PREC_G + i], &precj[j*ECMULT_GEN_PREC_G + i - 1], &gbase, NULL);
-            }
-            /* Multiply gbase by PREC_G. */
-            for (i = 0; i < ECMULT_GEN_PREC_B; i++) {
-                secp256k1_gej_double_var(&gbase, &gbase, NULL);
-            }
-            /* Multiply numbase by 2. */
-            secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
-            if (j == ECMULT_GEN_PREC_N - 2) {
-                /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
-                secp256k1_gej_neg(&numsbase, &numsbase);
-                secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
-            }
-        }
-        secp256k1_ge_set_all_gej_var(prec, precj, ECMULT_GEN_PREC_N * ECMULT_GEN_PREC_G);
-    }
-    for (j = 0; j < ECMULT_GEN_PREC_N; j++) {
-        for (i = 0; i < ECMULT_GEN_PREC_G; i++) {
-            secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*ECMULT_GEN_PREC_G + i]);
-        }
-    }
-#endif
 #else
     (void)prealloc;
-#if USE_COMB
     ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])secp256k1_ecmult_gen_ctx_prec;
 #if COMB_OFFSET
     secp256k1_ge_from_storage(&ctx->offset, &secp256k1_ecmult_gen_ctx_offset);
 #endif
-#else
-    ctx->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])secp256k1_ecmult_static_context;
-#endif
 #endif
     secp256k1_ecmult_gen_blind(ctx, NULL);
 }
@@ -171,7 +103,6 @@ static int secp256k1_ecmult_gen_context_is_built(const secp256k1_ecmult_gen_cont
 
 static void secp256k1_ecmult_gen_context_finalize_memcpy(secp256k1_ecmult_gen_context *dst, const secp256k1_ecmult_gen_context *src) {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
-#ifdef USE_COMB
     if (src->prec != NULL) {
         /* We cast to void* first to suppress a -Wcast-align warning. */
         dst->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
@@ -179,20 +110,13 @@ static void secp256k1_ecmult_gen_context_finalize_memcpy(secp256k1_ecmult_gen_co
 #if COMB_OFFSET
     dst->offset = src->offset;
 #endif
-#else
-    if (src->prec != NULL) {
-        dst->prec = (secp256k1_ge_storage (*)[ECMULT_GEN_PREC_N][ECMULT_GEN_PREC_G])(void*)((unsigned char*)dst + ((unsigned char*)src->prec - (unsigned char*)src));
-    }
-#endif
 #endif
     (void)dst, (void)src;
 }
 
 static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
-#ifdef USE_COMB
 #if COMB_OFFSET
     secp256k1_ge_clear(&ctx->offset);
-#endif
 #endif
     secp256k1_scalar_clear(&ctx->blind);
     secp256k1_gej_clear(&ctx->initial);
@@ -205,8 +129,6 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
     secp256k1_scalar gnb;
     uint32_t bits;
 
-#ifdef USE_COMB
-
     uint32_t abs, bit_pos, block, comb_off, index, sign;
 #if !COMB_GROUPED
     uint32_t bit, tooth;
@@ -245,6 +167,16 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
             VERIFY_CHECK(abs < COMB_POINTS);
 
             for (index = 0; index < COMB_POINTS; ++index) {
+                /** This uses a conditional move to avoid any secret data in array indexes.
+                 *   _Any_ use of secret indexes has been demonstrated to result in timing
+                 *   sidechannels, even when the cache-line access patterns are uniform.
+                 *  See also:
+                 *   "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe
+                 *    (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and
+                 *   "Cache Attacks and Countermeasures: the Case of AES", RSA 2006,
+                 *    by Dag Arne Osvik, Adi Shamir, and Eran Tromer
+                 *    (http://www.tau.ac.il/~tromer/papers/cache.pdf)
+                 */
                 secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[block][index], index == abs);
             }
 
@@ -266,33 +198,6 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
     memset(recoded, 0, sizeof(recoded));
     abs = 0;
     sign = 0;
-
-#else
-    int i, j;
-    memset(&adds, 0, sizeof(adds));
-    *r = ctx->initial;
-    /* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */
-    secp256k1_scalar_add(&gnb, gn, &ctx->blind);
-    add.infinity = 0;
-    for (j = 0; j < ECMULT_GEN_PREC_N; j++) {
-        bits = secp256k1_scalar_get_bits(&gnb, j * ECMULT_GEN_PREC_B, ECMULT_GEN_PREC_B);
-        for (i = 0; i < ECMULT_GEN_PREC_G; i++) {
-            /** This uses a conditional move to avoid any secret data in array indexes.
-             *   _Any_ use of secret indexes has been demonstrated to result in timing
-             *   sidechannels, even when the cache-line access patterns are uniform.
-             *  See also:
-             *   "A word of warning", CHES 2013 Rump Session, by Daniel J. Bernstein and Peter Schwabe
-             *    (https://cryptojedi.org/peter/data/chesrump-20130822.pdf) and
-             *   "Cache Attacks and Countermeasures: the Case of AES", RSA 2006,
-             *    by Dag Arne Osvik, Adi Shamir, and Eran Tromer
-             *    (https://www.tau.ac.il/~tromer/papers/cache.pdf)
-             */
-            secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[j][i], (uint32_t)i == bits);
-        }
-        secp256k1_ge_from_storage(&add, &adds);
-        secp256k1_gej_add_ge(r, r, &add);
-    }
-#endif
     bits = 0;
     secp256k1_ge_clear(&add);
     memset(&adds, 0, sizeof(adds));
@@ -301,9 +206,7 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
 
 /* Setup blinding values for secp256k1_ecmult_gen. */
 static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
-#ifdef USE_COMB
     int spacing;
-#endif
     secp256k1_scalar b;
     secp256k1_gej gb;
     secp256k1_fe s;
@@ -316,13 +219,11 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
         secp256k1_gej_set_ge(&ctx->initial, &secp256k1_ge_const_g);
         secp256k1_gej_neg(&ctx->initial, &ctx->initial);
         secp256k1_scalar_set_int(&ctx->blind, 1);
-#ifdef USE_COMB
         for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
             secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
         }
 #if COMB_OFFSET
         secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
-#endif
 #endif
     }
     /* The prior blinding value (if not reset) is chained forward by including it in the hash. */
@@ -355,13 +256,11 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
     secp256k1_scalar_negate(&b, &b);
     ctx->blind = b;
     ctx->initial = gb;
-#ifdef USE_COMB
     for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
         secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
     }
 #if COMB_OFFSET
     secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
-#endif
 #endif
     secp256k1_scalar_clear(&b);
     secp256k1_gej_clear(&gb);
diff --git a/src/gen_context.c b/src/gen_context.c
index 403e615f73..6abcc9df2b 100644
--- a/src/gen_context.c
+++ b/src/gen_context.c
@@ -39,16 +39,11 @@ int main(int argc, char **argv) {
     FILE* fp;
     const char *SC_FORMAT = "    SC(%uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu, %uu)";
 
-#if USE_COMB
     const int blocks = COMB_BLOCKS;
     const int points = COMB_POINTS;
 #if COMB_OFFSET
     secp256k1_ge_storage offset;
 #endif
-#else
-    const int blocks = ECMULT_GEN_PREC_N;
-    const int points = ECMULT_GEN_PREC_G;
-#endif
 
     (void)argc;
     (void)argv;
@@ -63,31 +58,20 @@ int main(int argc, char **argv) {
     fprintf(fp, "#define SECP256K1_ECMULT_STATIC_CONTEXT_H\n");
     fprintf(fp, "#include \"src/group.h\"\n");
     fprintf(fp, "#define SC SECP256K1_GE_STORAGE_CONST\n");
-    fprintf(fp, "#if USE_COMB != %i\n", USE_COMB);
-    fprintf(fp, "   #error configuration mismatch, invalid USE_COMB. Try deleting ecmult_static_context.h before the build.\n");
-    fprintf(fp, "#endif\n");
-#if USE_COMB
     fprintf(fp, "#if COMB_BLOCKS != %i || COMB_TEETH != %i || COMB_SPACING != %i\n", COMB_BLOCKS, COMB_TEETH, COMB_SPACING);
     fprintf(fp, "   #error configuration mismatch, invalid COMB_BLOCKS, COMB_TEETH, or COMB_SPACING. Try deleting ecmult_static_context.h before the build.\n");
     fprintf(fp, "#endif\n");
-#else
-    fprintf(fp, "#if ECMULT_GEN_PREC_N != %d || ECMULT_GEN_PREC_G != %d\n", ECMULT_GEN_PREC_N, ECMULT_GEN_PREC_G);
-    fprintf(fp, "   #error configuration mismatch, invalid ECMULT_GEN_PREC_N, ECMULT_GEN_PREC_G. Try deleting ecmult_static_context.h before the build.\n");
-    fprintf(fp, "#endif\n");
-#endif
 
     base = checked_malloc(&default_error_callback, SECP256K1_ECMULT_GEN_CONTEXT_PREALLOCATED_SIZE);
     prealloc = base;
     secp256k1_ecmult_gen_context_init(&ctx);
     secp256k1_ecmult_gen_context_build(&ctx, &prealloc);
 
-#if USE_COMB
 #if COMB_OFFSET
     secp256k1_ge_to_storage(&offset, &ctx.offset);
     fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_gen_ctx_offset =\n");
     fprintf(fp, SC_FORMAT, SECP256K1_GE_STORAGE_CONST_GET(offset));
     fprintf(fp, ";\n");
-#endif
 #endif
 
     fprintf(fp, "static const secp256k1_ge_storage secp256k1_ecmult_gen_ctx_prec[%i][%i] = {\n", blocks, points);

From 937a8c270fd04af91b0a1bdeee8ff1f75ec539a1 Mon Sep 17 00:00:00 2001
From: Pieter Wuille <pieter.wuille@gmail.com>
Date: Mon, 11 Nov 2019 14:35:23 -0800
Subject: [PATCH 8/8] Make COMB_BLOCKS and COMB_TEETH configurable, and test in
 Cirrus

---
 .cirrus.yml      |  8 +++---
 ci/cirrus.sh     |  2 +-
 configure.ac     | 63 +++++++++++++++++++++++++++++++++++++-----------
 src/ecmult_gen.h | 20 ++++++++++++---
 4 files changed, 72 insertions(+), 21 deletions(-)

diff --git a/.cirrus.yml b/.cirrus.yml
index 506a860336..7a16cc698d 100644
--- a/.cirrus.yml
+++ b/.cirrus.yml
@@ -1,7 +1,8 @@
 env:
   WIDEMUL: auto
   STATICPRECOMPUTATION: yes
-  ECMULTGENPRECISION: auto
+  ECMULTGENBLOCKS: auto
+  ECMULTGENTEETH: auto
   ASM: no
   BUILD: check
   WITH_VALGRIND: yes
@@ -73,8 +74,9 @@ task:
         EXPERIMENTAL: yes
         SCHNORRSIG: yes
         CTIMETEST: no
-    - env: { ECMULTGENPRECISION: 2 }
-    - env: { ECMULTGENPRECISION: 8 }
+    - env: { ECMULTGENBLOCKS: 256, ECMULTGENTEETH: 1 }
+    - env: { ECMULTGENBLOCKS: 43, ECMULTGENTEETH: 6, STATICPRECOMPUTATION: no }
+    - env: { ECMULTGENBLOCKS: 1, ECMULTGENTEETH: 1, STATICPRECOMPUTATION: no }
     - env:
         RUN_VALGRIND: yes
         ASM: x86_64
diff --git a/ci/cirrus.sh b/ci/cirrus.sh
index f26ca98d1d..679dbdfebf 100755
--- a/ci/cirrus.sh
+++ b/ci/cirrus.sh
@@ -15,7 +15,7 @@ valgrind --version || true
 ./configure \
     --enable-experimental="$EXPERIMENTAL" \
     --with-test-override-wide-multiply="$WIDEMUL" --with-asm="$ASM" \
-    --enable-ecmult-static-precomputation="$STATICPRECOMPUTATION" --with-ecmult-gen-precision="$ECMULTGENPRECISION" \
+    --enable-ecmult-static-precomputation="$STATICPRECOMPUTATION" --with-ecmult-gen-blocks="$ECMULTGENBLOCKS" --with-ecmult-gen-teeth="$ECMULTGENTEETH" \
     --enable-module-ecdh="$ECDH" --enable-module-recovery="$RECOVERY" \
     --enable-module-schnorrsig="$SCHNORRSIG" \
     --with-valgrind="$WITH_VALGRIND" \
diff --git a/configure.ac b/configure.ac
index e84005edf4..236109fd83 100644
--- a/configure.ac
+++ b/configure.ac
@@ -170,13 +170,21 @@ AC_ARG_WITH([ecmult-window], [AS_HELP_STRING([--with-ecmult-window=SIZE|auto],
 )],
 [req_ecmult_window=$withval], [req_ecmult_window=auto])
 
-AC_ARG_WITH([ecmult-gen-precision], [AS_HELP_STRING([--with-ecmult-gen-precision=2|4|8|auto],
-[Precision bits to tune the precomputed table size for signing.]
-[The size of the table is 32kB for 2 bits, 64kB for 4 bits, 512kB for 8 bits of precision.]
-[A larger table size usually results in possible faster signing.]
+AC_ARG_WITH([ecmult-gen-blocks], [AS_HELP_STRING([--with-ecmult-gen-blocks=BLOCKS|auto],
+[The number of blocks to use in the multi-comb multiplication algorithm, in the range [1..256].]
+[Larger values result in possibly better performance at the cost of a linearly larger precomputed table.]
+[There must exist a multiple of BLOCKS*TEETH that is between 256 and 288, inclusive.]
 ["auto" is a reasonable setting for desktop machines (currently 4). [default=auto]]
 )],
-[req_ecmult_gen_precision=$withval], [req_ecmult_gen_precision=auto])
+[req_ecmult_gen_blocks=$withval], [req_ecmult_gen_blocks=auto])
+
+AC_ARG_WITH([ecmult-gen-teeth], [AS_HELP_STRING([--with-ecmult-gen-teeth=TEETH|auto],
+[The number of teeth to use in the multi-comb multiplication algorithm, in the range [1..8].]
+[Larger values result in possibly better performance at the cost of an exponentially larger precomputed table.]
+[There must exist a multiple of BLOCKS*TEETH that is between 256 and 288, inclusive.]
+["auto" is a reasonable setting for desktop machines (currently 5). [default=auto]]
+)],
+[req_ecmult_gen_teeth=$withval], [req_ecmult_gen_teeth=auto])
 
 AC_ARG_WITH([valgrind], [AS_HELP_STRING([--with-valgrind=yes|no|auto],
 [Build with extra checks for running inside Valgrind [default=auto]]
@@ -296,19 +304,45 @@ case $set_ecmult_window in
   ;;
 esac
 
-# Set ecmult gen precision
-if test x"$req_ecmult_gen_precision" = x"auto"; then
-  set_ecmult_gen_precision=4
+# Set ecmult gen blocks
+if test x"$req_ecmult_gen_blocks" = x"auto"; then
+  set_ecmult_gen_blocks=4
 else
-  set_ecmult_gen_precision=$req_ecmult_gen_precision
+  set_ecmult_gen_blocks=$req_ecmult_gen_blocks
 fi
+error_gen_blocks=['option to --with-ecmult-gen-blocks not an integer in range [1..256] or "auto"']
+case $set_ecmult_gen_blocks in
+''|*[[!0-9]]*)
+  # no valid integer
+  AC_MSG_ERROR($error_gen_blocks)
+  ;;
+*)
+  if test "$set_ecmult_gen_blocks" -lt 1 -o "$set_ecmult_gen_blocks" -gt 256 ; then
+    # not in range
+    AC_MSG_ERROR($error_gen_blocks)
+  fi
+  AC_DEFINE_UNQUOTED(COMB_BLOCKS, $set_ecmult_gen_blocks, [Set number of blocks in ecmult_gen precomputation])
+  ;;
+esac
 
-case $set_ecmult_gen_precision in
-2|4|8)
-  AC_DEFINE_UNQUOTED(ECMULT_GEN_PREC_BITS, $set_ecmult_gen_precision, [Set ecmult gen precision bits])
+#set ecmult gen teeth
+if test x"$req_ecmult_gen_teeth" = x"auto"; then
+  set_ecmult_gen_teeth=5
+else
+  set_ecmult_gen_teeth=$req_ecmult_gen_teeth
+fi
+error_gen_teeth=['option to --with-ecmult-gen-teeth not an integer in range [1..8] or "auto"']
+case $set_ecmult_gen_teeth in
+''|*[[!0-9]]*)
+  # no valid integer
+  AC_MSG_ERROR($error_gen_teeth)
   ;;
 *)
-  AC_MSG_ERROR(['ecmult gen precision not 2, 4, 8 or "auto"'])
+  if test "$set_ecmult_gen_teeth" -lt 1 -o "$set_ecmult_gen_teeth" -gt 8 ; then
+    # not in range
+    AC_MSG_ERROR($error_gen_teeth)
+  fi
+  AC_DEFINE_UNQUOTED(COMB_TEETH, $set_ecmult_gen_teeth, [Set number of teeth in ecmult_gen precomputation])
   ;;
 esac
 
@@ -515,7 +549,8 @@ echo "  module schnorrsig       = $enable_module_schnorrsig"
 echo
 echo "  asm                     = $set_asm"
 echo "  ecmult window size      = $set_ecmult_window"
-echo "  ecmult gen prec. bits   = $set_ecmult_gen_precision"
+echo "  ecmult gen blocks       = $set_ecmult_gen_blocks"
+echo "  ecmult gen teeth        = $set_ecmult_gen_teeth"
 # Hide test-only options unless they're used.
 if test x"$set_widemul" != xauto; then
 echo "  wide multiplication     = $set_widemul"
diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
index e9c60ba928..c9445751e3 100644
--- a/src/ecmult_gen.h
+++ b/src/ecmult_gen.h
@@ -10,11 +10,19 @@
 #include "scalar.h"
 #include "group.h"
 
+#if defined HAVE_CONFIG_H
+#include "libsecp256k1-config.h"
+#endif
+
 #if defined(EXHAUSTIVE_TEST_ORDER)
 
   /* We need to control these values for exhaustive tests because
    * the tables cannot have infinities in them (secp256k1_ge_storage
    * doesn't support infinities) */
+#undef COMB_BLOCKS
+#undef COMB_TEETH
+#undef COMB_SPACING
+
 #  if EXHAUSTIVE_TEST_ORDER > 32
 #    define COMB_BLOCKS 52
 #    define COMB_TEETH 5
@@ -39,9 +47,15 @@
   /* COMB_BLOCKS, COMB_TEETH, COMB_SPACING must all be positive and the product of the three (COMB_BITS)
    * must evaluate to a value in the range [256, 288]. The resulting memory usage for precomputation
    * will be COMB_POINTS_TOTAL * sizeof(secp256k1_ge_storage). */
-  #define COMB_BLOCKS 4
-  #define COMB_TEETH 5
-  #define COMB_SPACING 13
+#  ifndef COMB_BLOCKS
+#    define COMB_BLOCKS 4
+#  endif
+#  ifndef COMB_TEETH
+#    define COMB_TEETH 5
+#  endif
+#  ifndef COMB_SPACING
+#    define COMB_SPACING ((COMB_BLOCKS * COMB_TEETH + 255) / (COMB_BLOCKS * COMB_TEETH))
+#  endif
 
 #endif