bitcoin-core · peterdettman · Aug 2, 2018 · Aug 5, 2018 · Sep 23, 2018 · Sep 24, 2018
diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
@@ -10,7 +10,68 @@
 #include "scalar.h"
 #include "group.h"
 
+#ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#define USE_COMB 1
+#endif
+
+#if USE_COMB
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+
+  /* We need to control these values for exhaustive tests because
+   * the tables cannot have infinities in them (secp256k1_ge_storage
+   * doesn't support infinities) */
+#  if EXHAUSTIVE_TEST_ORDER > 32
+#    define COMB_BLOCKS 52
+#    define COMB_TEETH 5
+#  elif EXHAUSTIVE_TEST_ORDER > 16
+#    define COMB_BLOCKS 64
+#    define COMB_TEETH 4
+#  elif EXHAUSTIVE_TEST_ORDER > 8
+#    define COMB_BLOCKS 86
+#    define COMB_TEETH 3
+#  elif EXHAUSTIVE_TEST_ORDER > 4
+#    define COMB_BLOCKS 128
+#    define COMB_TEETH 2
+#  else
+#    define COMB_BLOCKS 256
+#    define COMB_TEETH 1
+#  endif
+
+#  define COMB_SPACING 1
+
+#else
+
+  /* COMB_BLOCKS, COMB_TEETH, COMB_SPACING must all be positive and the product of the three (COMB_BITS)
+   * must evaluate to a value in the range [256, 288]. The resulting memory usage for precomputation
+   * will be COMB_POINTS_TOTAL * sizeof(secp256k1_ge_storage). */
+  #define COMB_BLOCKS 4
+  #define COMB_TEETH 5
+  #define COMB_SPACING 13
+
+#endif
+
+/* The remaining COMB_* parameters are derived values, don't modify these. */
+#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
+#define COMB_GROUPED ((COMB_SPACING == 1) && ((32 % COMB_TEETH) == 0))
+#define COMB_OFFSET (COMB_BITS == 256)
+#define COMB_POINTS (1 << (COMB_TEETH - 1))
+#define COMB_POINTS_TOTAL (COMB_BLOCKS * COMB_POINTS)
+#define COMB_MASK (COMB_POINTS - 1)
+
+#endif
+
 typedef struct {
+#if USE_COMB
+    /* Precomputation data for the signed-digit multi-comb algorithm as described in section 3.3 of:
+     *     "Fast and compact elliptic-curve cryptography", Mike Hamburg
+     *         (https://eprint.iacr.org/2012/309)
+     */
+    secp256k1_ge_storage (*prec)[COMB_BLOCKS][COMB_POINTS];
+#if COMB_OFFSET
+    secp256k1_ge offset;
+#endif
+#else
     /* For accelerating the computation of a*G:
      * To harden against timing attacks, use the following mechanism:
      * * Break up the multiplicand into groups of 4 bits, called n_0, n_1, n_2, ..., n_63.
@@ -24,6 +85,7 @@ typedef struct {
      * the intermediate sums while computing a*G.
      */
     secp256k1_ge_storage (*prec)[64][16]; /* prec[j][i] = 16^j * i * G + U_i */
+#endif
     secp256k1_scalar blind;
     secp256k1_gej initial;
 } secp256k1_ecmult_gen_context;

diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
@@ -20,16 +20,70 @@ static void secp256k1_ecmult_gen_context_init(secp256k1_ecmult_gen_context *ctx)
 
 static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx, const secp256k1_callback* cb) {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#if USE_COMB
+    secp256k1_ge prec[COMB_POINTS_TOTAL + COMB_OFFSET];
+    secp256k1_gej u, sum;
+    int block, index, spacing, stride, tooth;
+#else
     secp256k1_ge prec[1024];
     secp256k1_gej gj;
     secp256k1_gej nums_gej;
     int i, j;
+#endif
 #endif
 
     if (ctx->prec != NULL) {
         return;
     }
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#if USE_COMB
+    ctx->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])checked_malloc(cb, sizeof(*ctx->prec));
+
+    /* get the generator */
+    secp256k1_gej_set_ge(&u, &secp256k1_ge_const_g);
+
+    /* compute prec. */
+    {
+        secp256k1_gej ds[COMB_TEETH];
+        secp256k1_gej vs[COMB_POINTS_TOTAL + COMB_OFFSET];
+        int vs_pos = 0;
+
+        for (block = 0; block < COMB_BLOCKS; ++block) {
+            secp256k1_gej_set_infinity(&sum);
+            for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
+                secp256k1_gej_add_var(&sum, &sum, &u, NULL);
+                secp256k1_gej_double(&u, &u);
+                ds[tooth] = u;
+                for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+                    secp256k1_gej_double(&u, &u);
+                }
+            }
+            secp256k1_gej_neg(&vs[vs_pos++], &sum);
+            for (tooth = 0; tooth < (COMB_TEETH - 1); ++tooth) {
+                stride = 1 << tooth;
+                for (index = 0; index < stride; ++index, ++vs_pos) {
+                    secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
+                }
+            }
+        }
+        VERIFY_CHECK(vs_pos == COMB_POINTS_TOTAL);
+#if COMB_OFFSET
+        vs[COMB_POINTS_TOTAL] = ds[COMB_TEETH - 1];
+#endif
+        secp256k1_ge_set_all_gej_var(prec, vs, COMB_POINTS_TOTAL + COMB_OFFSET, cb);
+    }
+
+    for (block = 0; block < COMB_BLOCKS; ++block) {
+        for (index = 0; index < COMB_POINTS; ++index) {
+            secp256k1_ge_to_storage(&(*ctx->prec)[block][index], &prec[block * COMB_POINTS + index]);
+        }
+    }
+
+#if COMB_OFFSET
+    ctx->offset = prec[COMB_POINTS_TOTAL];
+#endif
+
+#else
     ctx->prec = (secp256k1_ge_storage (*)[64][16])checked_malloc(cb, sizeof(*ctx->prec));
 
     /* get the generator */
@@ -84,6 +138,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context *ctx
             secp256k1_ge_to_storage(&(*ctx->prec)[j][i], &prec[j*16 + i]);
         }
     }
+#endif
 #else
     (void)cb;
     ctx->prec = (secp256k1_ge_storage (*)[64][16])secp256k1_ecmult_static_context;
@@ -101,7 +156,14 @@ static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context *dst
         dst->prec = NULL;
     } else {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
+#if USE_COMB
+        dst->prec = (secp256k1_ge_storage (*)[COMB_BLOCKS][COMB_POINTS])checked_malloc(cb, sizeof(*dst->prec));
+#if COMB_OFFSET
+        dst->offset = src->offset;
+#endif
+#else
         dst->prec = (secp256k1_ge_storage (*)[64][16])checked_malloc(cb, sizeof(*dst->prec));
+#endif
         memcpy(dst->prec, src->prec, sizeof(*dst->prec));
 #else
         (void)cb;
@@ -115,6 +177,11 @@ static void secp256k1_ecmult_gen_context_clone(secp256k1_ecmult_gen_context *dst
 static void secp256k1_ecmult_gen_context_clear(secp256k1_ecmult_gen_context *ctx) {
 #ifndef USE_ECMULT_STATIC_PRECOMPUTATION
     free(ctx->prec);
+#if USE_COMB
+#if COMB_OFFSET
+    secp256k1_ge_clear(&ctx->offset);
+#endif
+#endif
 #endif
     secp256k1_scalar_clear(&ctx->blind);
     secp256k1_gej_clear(&ctx->initial);
@@ -126,6 +193,69 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
     secp256k1_ge_storage adds;
     secp256k1_scalar gnb;
     int bits;
+
+#if USE_COMB
+
+    int abs, bit_pos, block, comb_off, index, sign;
+#if !COMB_GROUPED
+    int bit, tooth;
+#endif
+    uint32_t recoded[9];
+    secp256k1_fe neg;
+
+    memset(&adds, 0, sizeof(adds));
+    *r = ctx->initial;
+
+    /* Blind scalar/point multiplication by computing (n-b)G + bG instead of nG. */
+    secp256k1_scalar_add(&gnb, gn, &ctx->blind);
+    secp256k1_scalar_signed_recoding(recoded, &gnb, COMB_BITS + COMB_OFFSET);
+
+    comb_off = COMB_SPACING - 1;
+    for (;;) {
+        bit_pos = comb_off;
+        for (block = 0; block < COMB_BLOCKS; ++block) {
+#if COMB_GROUPED
+            bits = (recoded[bit_pos >> 5] >> (bit_pos & 0x1F)) & ((1 << COMB_TEETH) - 1);
+            bit_pos += COMB_SPACING * COMB_TEETH;
+#else
+            bits = 0;
+            for (tooth = 0; tooth < COMB_TEETH; ++tooth) {
+                bit = (recoded[bit_pos >> 5] >> (bit_pos & 0x1F)) & 1;
+                bits |= bit << tooth;
+                bit_pos += COMB_SPACING;
+            }
+#endif
+
+            sign = (bits >> (COMB_TEETH - 1)) & 1;
+            abs = (bits ^ -sign) & COMB_MASK;
+
+            VERIFY_CHECK(sign == 0 || sign == 1);
+            VERIFY_CHECK(0 <= abs && abs < COMB_POINTS);
+
+            for (index = 0; index < COMB_POINTS; ++index) {
+                secp256k1_ge_storage_cmov(&adds, &(*ctx->prec)[block][index], index == abs);
+            }
+
+            secp256k1_ge_from_storage(&add, &adds);
+            secp256k1_fe_negate(&neg, &add.y, 1);
+            secp256k1_fe_cmov(&add.y, &neg, sign);
+
+            secp256k1_gej_add_ge(r, r, &add);
+        }
+
+        if (--comb_off < 0) {
+            break;
+        }
+
+        secp256k1_gej_double(r, r);
+    }
+
+    secp256k1_fe_clear(&neg);
+    memset(recoded, 0, sizeof(recoded));
+    abs = 0;
+    sign = 0;
+
+#else
     int i, j;
     memset(&adds, 0, sizeof(adds));
     *r = ctx->initial;
@@ -150,13 +280,18 @@ static void secp256k1_ecmult_gen(const secp256k1_ecmult_gen_context *ctx, secp25
         secp256k1_ge_from_storage(&add, &adds);
         secp256k1_gej_add_ge(r, r, &add);
     }
+#endif
     bits = 0;
     secp256k1_ge_clear(&add);
+    memset(&adds, 0, sizeof(adds));
     secp256k1_scalar_clear(&gnb);
 }
 
 /* Setup blinding values for secp256k1_ecmult_gen. */
 static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const unsigned char *seed32) {
+#if USE_COMB
+    int spacing;
+#endif
     secp256k1_scalar b;
     secp256k1_gej gb;
     secp256k1_fe s;
@@ -169,6 +304,14 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
         secp256k1_gej_set_ge(&ctx->initial, &secp256k1_ge_const_g);
         secp256k1_gej_neg(&ctx->initial, &ctx->initial);
         secp256k1_scalar_set_int(&ctx->blind, 1);
+#if USE_COMB
+        for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+            secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
+        }
+#if COMB_OFFSET
+        secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
+#endif
+#endif
     }
     /* The prior blinding value (if not reset) is chained forward by including it in the hash. */
     secp256k1_scalar_get_b32(nonce32, &ctx->blind);
@@ -203,6 +346,14 @@ static void secp256k1_ecmult_gen_blind(secp256k1_ecmult_gen_context *ctx, const
     secp256k1_scalar_negate(&b, &b);
     ctx->blind = b;
     ctx->initial = gb;
+#if USE_COMB
+    for (spacing = 1; spacing < COMB_SPACING; ++spacing) {
+        secp256k1_scalar_add(&ctx->blind, &ctx->blind, &ctx->blind);
+    }
+#if COMB_OFFSET
+    secp256k1_gej_add_ge(&ctx->initial, &ctx->initial, &ctx->offset);
+#endif
+#endif
     secp256k1_scalar_clear(&b);
     secp256k1_gej_clear(&gb);
 }

diff --git a/src/group.h b/src/group.h
@@ -100,13 +100,16 @@ static int secp256k1_gej_is_infinity(const secp256k1_gej *a);
 /** Check whether a group element's y coordinate is a quadratic residue. */
 static int secp256k1_gej_has_quad_y_var(const secp256k1_gej *a);
 
-/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0).
- * a may not be zero. Constant time. */
-static void secp256k1_gej_double_nonzero(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr);
+/** Set r equal to the double of a. */
+static void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a);
 
 /** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0). */
 static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr);
 
+/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0).
+ * a may not be zero. Constant time. */
+static void secp256k1_gej_double_nonzero(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr);
+
 /** Set r equal to the sum of a and b. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */
 static void secp256k1_gej_add_var(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_gej *b, secp256k1_fe *rzr);
 

diff --git a/src/group_impl.h b/src/group_impl.h
@@ -304,7 +304,7 @@ static int secp256k1_ge_is_valid_var(const secp256k1_ge *a) {
     return secp256k1_fe_equal_var(&y2, &x3);
 }
 
-static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) {
+static void secp256k1_gej_double(secp256k1_gej *r, const secp256k1_gej *a) {
     /* Operations: 3 mul, 4 sqr, 0 normalize, 12 mul_int/add/negate.
      *
      * Note that there is an implementation described at
@@ -324,19 +324,6 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s
      *  point will be gibberish (z = 0 but infinity = 0).
      */
     r->infinity = a->infinity;
-    if (r->infinity) {
-        if (rzr != NULL) {
-            secp256k1_fe_set_int(rzr, 1);
-        }
-        return;
-    }
-
-    if (rzr != NULL) {
-        *rzr = a->y;
-        secp256k1_fe_normalize_weak(rzr);
-        secp256k1_fe_mul_int(rzr, 2);
-    }
-
     secp256k1_fe_mul(&r->z, &a->z, &a->y);
     secp256k1_fe_mul_int(&r->z, 2);       /* Z' = 2*Y*Z (2) */
     secp256k1_fe_sqr(&t1, &a->x);
@@ -359,6 +346,22 @@ static void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, s
     secp256k1_fe_add(&r->y, &t2);         /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */
 }
 
+static SECP256K1_INLINE void secp256k1_gej_double_var(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) {
+    if (a->infinity) {
+        r->infinity = 1;
+        if (rzr != NULL) {
+            secp256k1_fe_set_int(rzr, 1);
+        }
+        return;
+    }
+    if (rzr != NULL) {
+        *rzr = a->y;
+        secp256k1_fe_normalize_weak(rzr);
+        secp256k1_fe_mul_int(rzr, 2);
+    }
+    secp256k1_gej_double(r, a);
+}
+
 static SECP256K1_INLINE void secp256k1_gej_double_nonzero(secp256k1_gej *r, const secp256k1_gej *a, secp256k1_fe *rzr) {
     VERIFY_CHECK(!secp256k1_gej_is_infinity(a));
     secp256k1_gej_double_var(r, a, rzr);

diff --git a/src/scalar.h b/src/scalar.h
@@ -103,4 +103,6 @@ static void secp256k1_scalar_split_lambda(secp256k1_scalar *r1, secp256k1_scalar
 /** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
 static void secp256k1_scalar_mul_shift_var(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b, unsigned int shift);
 
+static void secp256k1_scalar_signed_recoding(uint32_t r[9], const secp256k1_scalar *a, int bits);
+
 #endif /* SECP256K1_SCALAR_H */