bitcoin-core · sipa · Dec 25, 2021 · Dec 26, 2021 · Dec 26, 2021 · Dec 29, 2021
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -8,7 +8,7 @@ env:
   BUILD: check
   ### secp256k1 config
   ECMULTWINDOW: auto
-  ECMULTGENPRECISION: auto
+  ECMULTGENKB: auto
   ASM: no
   WIDEMUL: auto
   WITH_VALGRIND: yes
@@ -75,7 +75,7 @@ task:
     - env: {CPPFLAGS: -DDETERMINISTIC}
     - env: {CFLAGS: -O0, CTIMETEST: no}
     - env: { ECMULTGENPRECISION: 2, ECMULTWINDOW: 2 }
-    - env: { ECMULTGENPRECISION: 8, ECMULTWINDOW: 4 }
+    - env: { ECMULTGENPRECISION: 22, ECMULTWINDOW: 4 }
   matrix:
     - env:
         CC: gcc

diff --git a/ci/cirrus.sh b/ci/cirrus.sh
@@ -16,7 +16,7 @@ valgrind --version || true
     --enable-experimental="$EXPERIMENTAL" \
     --with-test-override-wide-multiply="$WIDEMUL" --with-asm="$ASM" \
     --with-ecmult-window="$ECMULTWINDOW" \
-    --with-ecmult-gen-precision="$ECMULTGENPRECISION" \
+    --with-ecmult-gen-kb="$ECMULTGENKB" \
     --enable-module-ecdh="$ECDH" --enable-module-recovery="$RECOVERY" \
     --enable-module-schnorrsig="$SCHNORRSIG" \
     --with-valgrind="$WITH_VALGRIND" \

diff --git a/configure.ac b/configure.ac
@@ -177,13 +177,12 @@ AC_ARG_WITH([ecmult-window], [AS_HELP_STRING([--with-ecmult-window=SIZE|auto],
 )],
 [req_ecmult_window=$withval], [req_ecmult_window=auto])
 
-AC_ARG_WITH([ecmult-gen-precision], [AS_HELP_STRING([--with-ecmult-gen-precision=2|4|8|auto],
-[Precision bits to tune the precomputed table size for signing.]
-[The size of the table is 32kB for 2 bits, 64kB for 4 bits, 512kB for 8 bits of precision.]
-[A larger table size usually results in possible faster signing.]
-["auto" is a reasonable setting for desktop machines (currently 4). [default=auto]]
+AC_ARG_WITH([ecmult-gen-kb], [AS_HELP_STRING([--with-ecmult-gen-kb=2|22|86|auto],
+[The size of the precomputed table for signing in multiples of 1024 bytes (on typical platforms).]
+[Larger values result in possibly better signing/keygeneration performance at the cost of a larger table.]
+["auto" is a reasonable setting for desktop machines (currently 86). [default=auto]]
 )],
-[req_ecmult_gen_precision=$withval], [req_ecmult_gen_precision=auto])
+[req_ecmult_gen_kb=$withval], [req_ecmult_gen_kb=auto])
 
 AC_ARG_WITH([valgrind], [AS_HELP_STRING([--with-valgrind=yes|no|auto],
 [Build with extra checks for running inside Valgrind [default=auto]]
@@ -307,20 +306,31 @@ case $set_ecmult_window in
 esac
 
 # Set ecmult gen precision
-if test x"$req_ecmult_gen_precision" = x"auto"; then
-  set_ecmult_gen_precision=4
+if test x"$req_ecmult_gen_kb" = x"auto"; then
+  set_ecmult_gen_kb=86
 else
-  set_ecmult_gen_precision=$req_ecmult_gen_precision
+  set_ecmult_gen_kb=$req_ecmult_gen_kb
 fi
 
-case $set_ecmult_gen_precision in
-2|4|8)
-  AC_DEFINE_UNQUOTED(ECMULT_GEN_PREC_BITS, $set_ecmult_gen_precision, [Set ecmult gen precision bits])
+case $set_ecmult_gen_kb in
+2)
+  ecmult_gen_blocks=2
+  ecmult_gen_teeth=5
+  ;;
+22)
+  ecmult_gen_blocks=11
+  ecmult_gen_teeth=6
+  ;;
+86)
+  ecmult_gen_blocks=43
+  ecmult_gen_teeth=6
   ;;
 *)
-  AC_MSG_ERROR(['ecmult gen precision not 2, 4, 8 or "auto"'])
+  AC_MSG_ERROR(['ecmult gen table size not 2, 22, 86 or "auto"'])
   ;;
 esac
+AC_DEFINE_UNQUOTED(COMB_BLOCKS, $ecmult_gen_blocks, [Number of blocks for ecmult_gen computation])
+AC_DEFINE_UNQUOTED(COMB_TEETH, $ecmult_gen_teeth, [Number of teeth for ecmult_gen computation])
 
 if test x"$enable_valgrind" = x"yes"; then
   SECP_INCLUDES="$SECP_INCLUDES $VALGRIND_CPPFLAGS"
@@ -424,7 +434,7 @@ echo "  module schnorrsig       = $enable_module_schnorrsig"
 echo
 echo "  asm                     = $set_asm"
 echo "  ecmult window size      = $set_ecmult_window"
-echo "  ecmult gen prec. bits   = $set_ecmult_gen_precision"
+echo "  ecmult gen kB           = $set_ecmult_gen_kb"
 # Hide test-only options unless they're used.
 if test x"$set_widemul" != xauto; then
 echo "  wide multiplication     = $set_widemul"

diff --git a/src/bench_internal.c b/src/bench_internal.c
@@ -89,6 +89,15 @@ void bench_scalar_add(void* arg, int iters) {
     CHECK(j <= iters);
 }
 
+void bench_scalar_half(void* arg, int iters) {
+    int i;
+    bench_inv *data = (bench_inv*)arg;
+
+    for (i = 0; i < iters; i++) {
+        secp256k1_scalar_half(&data->scalar[0], &data->scalar[0]);
+    }
+}
+
 void bench_scalar_negate(void* arg, int iters) {
     int i;
     bench_inv *data = (bench_inv*)arg;
@@ -347,6 +356,7 @@ int main(int argc, char **argv) {
     int d = argc == 1; /* default */
     print_output_table_header_row();
 
+    if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "half")) run_benchmark("scalar_half", bench_scalar_half, bench_setup, NULL, &data, 10, iters*100);
     if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
     if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
     if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);

diff --git a/src/ecmult_gen.h b/src/ecmult_gen.h
@@ -1,5 +1,5 @@
 /***********************************************************************
- * Copyright (c) 2013, 2014 Pieter Wuille                              *
+ * Copyright (c) 2013, 2014, 2021 Pieter Wuille, Peter Dettman         *
  * Distributed under the MIT software license, see the accompanying    *
  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
  ***********************************************************************/
@@ -10,19 +10,90 @@
 #include "scalar.h"
 #include "group.h"
 
-#if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8
-#  error "Set ECMULT_GEN_PREC_BITS to 2, 4 or 8."
+/* Configuration parameters for the signed-digit multi-comb algorithm:
+ *
+ * - COMB_BLOCKS is the number of lookup tables.
+ * - COMB_TEETH is the number of bits covered by one table.
+ *
+ * The comb's spacing (COMB_SPACING), or the distance between the teeth,
+ * is defined as ceil(256 / (COMB_BLOCKS * COMB_TEETH)).
+ *
+ * The size of the precomputed table is COMB_BLOCKS * (1 << (COMB_TEETH - 1))
+ * secp256k1_ge_storages.
+ *
+ * The number of point additions equals COMB_BLOCKS * COMB_SPACING. Each point
+ * addition involves a cmov from (1 << (COMB_TEETH - 1)) table entries and a
+ * conditional negation.
+ *
+ * The number of point doublings is COMB_SPACING - 1. */
+
+#if defined(EXHAUSTIVE_TEST_ORDER)
+/* We need to control these values for exhaustive tests because
+ * the tables cannot have infinities in them (secp256k1_ge_storage
+ * doesn't support infinities) */
+#  undef COMB_BLOCKS
+#  undef COMB_TEETH
+#  if EXHAUSTIVE_TEST_ORDER > 32
+#    define COMB_BLOCKS 52
+#    define COMB_TEETH 5
+#  elif EXHAUSTIVE_TEST_ORDER > 16
+#    define COMB_BLOCKS 64
+#    define COMB_TEETH 4
+#  elif EXHAUSTIVE_TEST_ORDER > 8
+#    define COMB_BLOCKS 86
+#    define COMB_TEETH 3
+#  elif EXHAUSTIVE_TEST_ORDER > 4
+#    define COMB_BLOCKS 128
+#    define COMB_TEETH 2
+#  else
+#    define COMB_BLOCKS 256
+#    define COMB_TEETH 1
+#  endif
+#else /* !defined(EXHAUSTIVE_TEST_ORDER) */
+/* Use (11, 6) as default configuration, which results in a 22 kB table. */
+#  ifndef COMB_BLOCKS
+#    define COMB_BLOCKS 11
+#  endif
+#  ifndef COMB_TEETH
+#    define COMB_TEETH 6
+#  endif
+#endif /* defined(EXHAUSTIVE_TEST_ORDER) */
+
+/* Range checks on the parameters. */
+#if !(1 <= COMB_BLOCKS && COMB_BLOCKS <= 256)
+#  error "COMB_BLOCKS must be in the range [1, 256]"
+#endif
+#if !(1 <= COMB_TEETH && COMB_TEETH <= 8)
+#  error "COMB_TEETH must be in the range [1, 8]"
+#endif
+
+/* The remaining COMB_* parameters are derived values, don't modify these. */
+/* - The distance between the teeth of the comb. */
+#define COMB_SPACING ((255 + COMB_BLOCKS * COMB_TEETH) / (COMB_BLOCKS * COMB_TEETH))
+/* - The number of bits covered by all the combs; must be at least 256. */
+#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
+/* - The number of points per table. */
+#define COMB_POINTS (1 << (COMB_TEETH - 1))
+
+/* Additional sanity checks. */
+#if (COMB_BLOCKS - 1) * COMB_TEETH * COMB_SPACING >= 256
+#  error "COMB_BLOCKS can be reduced"
+#endif
+#if COMB_BLOCKS * (COMB_TEETH - 1) * COMB_SPACING >= 256
+#  error "COMB_TEETH can be reduced"
 #endif
-#define ECMULT_GEN_PREC_G(bits) (1 << bits)
-#define ECMULT_GEN_PREC_N(bits) (256 / bits)
 
 typedef struct {
     /* Whether the context has been built. */
     int built;
 
-    /* Blinding values used when computing (n-b)G + bG. */
-    secp256k1_scalar blind; /* -b */
-    secp256k1_gej initial;  /* bG */
+    /* Blinding values used when computing nG as (n-b)G + bG. */
+    secp256k1_scalar scalar_offset; /* -b */
+    secp256k1_ge final_point_add;  /* bG */
+
+    /* Factor used for projective blinding. This value is used
+     * to rescale the Z coordinate of the first table lookup. */
+    secp256k1_fe proj_blind;
 } secp256k1_ecmult_gen_context;
 
 static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context* ctx);

diff --git a/src/ecmult_gen_compute_table.h b/src/ecmult_gen_compute_table.h
@@ -1,5 +1,5 @@
 /***********************************************************************
- * Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell       *
+ * Copyright (c) 2013, 2014, 2015, 2021 Pieter Wuille, Gregory Maxwell *
  * Distributed under the MIT software license, see the accompanying    *
  * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
  ***********************************************************************/
@@ -9,6 +9,6 @@
 
 #include "ecmult_gen.h"
 
-static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int bits);
+static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth);
 
 #endif /* SECP256K1_ECMULT_GEN_COMPUTE_TABLE_H */
diff --git a/src/ecmult_gen_compute_table_impl.h b/src/ecmult_gen_compute_table_impl.h
@@ -1,8 +1,8 @@
-/***********************************************************************
- * Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell       *
- * Distributed under the MIT software license, see the accompanying    *
- * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
- ***********************************************************************/
+/*******************************************************************************
+ * Copyright (c) 2013-2015, 2021 Pieter Wuille, Gregory Maxwell, Peter Dettman *
+ * Distributed under the MIT software license, see the accompanying            *
+ * file COPYING or https://www.opensource.org/licenses/mit-license.php.        *
+ *******************************************************************************/
 
 #ifndef SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H
 #define SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H
@@ -13,68 +13,73 @@
 #include "ecmult_gen.h"
 #include "util.h"
 
-static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int bits) {
-    int g = ECMULT_GEN_PREC_G(bits);
-    int n = ECMULT_GEN_PREC_N(bits);
+static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth) {
+    size_t points = ((size_t)1) << (teeth - 1);
+    size_t points_total = points * blocks;
+    int spacing = (256 + blocks * teeth - 1) / (blocks * teeth);
+    secp256k1_ge* prec = checked_malloc(&default_error_callback, points_total * sizeof(*prec));
+    secp256k1_gej* ds = checked_malloc(&default_error_callback, teeth * sizeof(*ds));
+    secp256k1_gej* vs = checked_malloc(&default_error_callback, points_total * sizeof(*vs));
+    secp256k1_gej u;
+    size_t vs_pos = 0;
+    int block;
 
-    secp256k1_ge* prec = checked_malloc(&default_error_callback, n * g * sizeof(*prec));
-    secp256k1_gej gj;
-    secp256k1_gej nums_gej;
-    int i, j;
-
-    /* get the generator */
-    secp256k1_gej_set_ge(&gj, gen);
-
-    /* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
-    {
-        static const unsigned char nums_b32[33] = "The scalar for this x is unknown";
-        secp256k1_fe nums_x;
-        secp256k1_ge nums_ge;
-        int r;
-        r = secp256k1_fe_set_b32(&nums_x, nums_b32);
-        (void)r;
-        VERIFY_CHECK(r);
-        r = secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0);
-        (void)r;
-        VERIFY_CHECK(r);
-        secp256k1_gej_set_ge(&nums_gej, &nums_ge);
-        /* Add G to make the bits in x uniformly distributed. */
-        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, gen, NULL);
-    }
-
-    /* compute prec. */
-    {
-        secp256k1_gej gbase;
-        secp256k1_gej numsbase;
-        secp256k1_gej* precj = checked_malloc(&default_error_callback, n * g * sizeof(*precj));  /* Jacobian versions of prec. */
-        gbase = gj; /* PREC_G^j * G */
-        numsbase = nums_gej; /* 2^j * nums. */
-        for (j = 0; j < n; j++) {
-            /* Set precj[j*PREC_G .. j*PREC_G+(PREC_G-1)] to (numsbase, numsbase + gbase, ..., numsbase + (PREC_G-1)*gbase). */
-            precj[j*g] = numsbase;
-            for (i = 1; i < g; i++) {
-                secp256k1_gej_add_var(&precj[j*g + i], &precj[j*g + i - 1], &gbase, NULL);
-            }
-            /* Multiply gbase by PREC_G. */
-            for (i = 0; i < bits; i++) {
-                secp256k1_gej_double_var(&gbase, &gbase, NULL);
+    /* u is the running power of two times gen we're working with, initially 1*gen. */
+    secp256k1_gej_set_ge(&u, gen);
+    for (block = 0; block < blocks; ++block) {
+        int tooth;
+        /* Here u = 2^(block*teeth*spacing) * gen. */
+        secp256k1_gej sum;
+        secp256k1_gej_set_infinity(&sum);
+        for (tooth = 0; tooth < teeth; ++tooth) {
+            /* Here u = 2^((block*teeth + tooth)*spacing) * gen. */
+            /* Make sum = sum(2^((block*teeth + t)*spacing), t=0..tooth). */
+            secp256k1_gej_add_var(&sum, &sum, &u, NULL);
+            /* Make u = 2^((block*teeth + tooth)*spacing + 1) * gen. */
+            secp256k1_gej_double_var(&u, &u, NULL);
+            /* Make ds[tooth] = u = 2^((block*teeth + tooth)*spacing + 1) * gen. */
+            ds[tooth] = u;
+            /* Make u = 2^((block*teeth + tooth + 1)*spacing), unless at the end. */
+            if (block + tooth != blocks + teeth - 2) {
+                int bit_off;
+                for (bit_off = 1; bit_off < spacing; ++bit_off) {
+                    secp256k1_gej_double_var(&u, &u, NULL);
+                }
             }
-            /* Multiply numbase by 2. */
-            secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
-            if (j == n - 2) {
-                /* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
-                secp256k1_gej_neg(&numsbase, &numsbase);
-                secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
+        }
+        /* Now u = 2^(block*(teeth + 1)*spacing) * gen. */
+
+        /* Next, compute the table entries for block block in Jacobian coordinates.
+         * The entries will occupy vs[block*points + i] for i=0..points-1.
+         * We start by computing the first (i=0) value corresponding to all summed
+         * powers of two times G being negative. */
+        secp256k1_gej_neg(&vs[vs_pos++], &sum);
+        /* And then teeth-1 times "double" the range of i values for which the table
+         * is computed: in each iteration, double the table by taking an existing
+         * table entry and adding ds[tooth]. */
+        for (tooth = 0; tooth < teeth - 1; ++tooth) {
+            size_t stride = ((size_t)1) << tooth;
+            size_t index;
+            for (index = 0; index < stride; ++index, ++vs_pos) {
+                secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
             }
         }
-        secp256k1_ge_set_all_gej_var(prec, precj, n * g);
-        free(precj);
     }
-    for (j = 0; j < n; j++) {
-        for (i = 0; i < g; i++) {
-            secp256k1_ge_to_storage(&table[j*g + i], &prec[j*g + i]);
+    VERIFY_CHECK(vs_pos == points_total);
+
+    /* Convert all points simultaneously from secp256k1_gej to secp256k1_ge. */
+    secp256k1_ge_set_all_gej_var(prec, vs, points_total);
+    /* Convert all points from secp256k1_ge to secp256k1_ge_storage output. */
+    for (block = 0; block < blocks; ++block) {
+        size_t index;
+        for (index = 0; index < points; ++index) {
+            secp256k1_ge_to_storage(&table[block * points + index], &prec[block * points + index]);
         }
     }
+
+    /* Free memory. */
+    free(vs);
+    free(ds);
     free(prec);
 }