Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Signed-digit multi-comb ecmult_gen algorithm #1057

Closed
wants to merge 12 commits into from
Closed
4 changes: 2 additions & 2 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ env:
BUILD: check
### secp256k1 config
ECMULTWINDOW: auto
ECMULTGENPRECISION: auto
ECMULTGENKB: auto
ASM: no
WIDEMUL: auto
WITH_VALGRIND: yes
Expand Down Expand Up @@ -75,7 +75,7 @@ task:
- env: {CPPFLAGS: -DDETERMINISTIC}
- env: {CFLAGS: -O0, CTIMETEST: no}
- env: { ECMULTGENPRECISION: 2, ECMULTWINDOW: 2 }
- env: { ECMULTGENPRECISION: 8, ECMULTWINDOW: 4 }
- env: { ECMULTGENPRECISION: 22, ECMULTWINDOW: 4 }
matrix:
- env:
CC: gcc
Expand Down
2 changes: 1 addition & 1 deletion ci/cirrus.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ valgrind --version || true
--enable-experimental="$EXPERIMENTAL" \
--with-test-override-wide-multiply="$WIDEMUL" --with-asm="$ASM" \
--with-ecmult-window="$ECMULTWINDOW" \
--with-ecmult-gen-precision="$ECMULTGENPRECISION" \
--with-ecmult-gen-kb="$ECMULTGENKB" \
--enable-module-ecdh="$ECDH" --enable-module-recovery="$RECOVERY" \
--enable-module-schnorrsig="$SCHNORRSIG" \
--with-valgrind="$WITH_VALGRIND" \
Expand Down
38 changes: 24 additions & 14 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,12 @@ AC_ARG_WITH([ecmult-window], [AS_HELP_STRING([--with-ecmult-window=SIZE|auto],
)],
[req_ecmult_window=$withval], [req_ecmult_window=auto])

AC_ARG_WITH([ecmult-gen-precision], [AS_HELP_STRING([--with-ecmult-gen-precision=2|4|8|auto],
[Precision bits to tune the precomputed table size for signing.]
[The size of the table is 32kB for 2 bits, 64kB for 4 bits, 512kB for 8 bits of precision.]
[A larger table size usually results in possible faster signing.]
["auto" is a reasonable setting for desktop machines (currently 4). [default=auto]]
AC_ARG_WITH([ecmult-gen-kb], [AS_HELP_STRING([--with-ecmult-gen-kb=2|22|86|auto],
[The size of the precomputed table for signing in multiples of 1024 bytes (on typical platforms).]
[Larger values result in possibly better signing/keygeneration performance at the cost of a larger table.]
["auto" is a reasonable setting for desktop machines (currently 86). [default=auto]]
)],
[req_ecmult_gen_precision=$withval], [req_ecmult_gen_precision=auto])
[req_ecmult_gen_kb=$withval], [req_ecmult_gen_kb=auto])

AC_ARG_WITH([valgrind], [AS_HELP_STRING([--with-valgrind=yes|no|auto],
[Build with extra checks for running inside Valgrind [default=auto]]
Expand Down Expand Up @@ -307,20 +306,31 @@ case $set_ecmult_window in
esac

# Set ecmult gen precision
if test x"$req_ecmult_gen_precision" = x"auto"; then
set_ecmult_gen_precision=4
if test x"$req_ecmult_gen_kb" = x"auto"; then
set_ecmult_gen_kb=86
else
set_ecmult_gen_precision=$req_ecmult_gen_precision
set_ecmult_gen_kb=$req_ecmult_gen_kb
fi

case $set_ecmult_gen_precision in
2|4|8)
AC_DEFINE_UNQUOTED(ECMULT_GEN_PREC_BITS, $set_ecmult_gen_precision, [Set ecmult gen precision bits])
case $set_ecmult_gen_kb in
2)
ecmult_gen_blocks=2
ecmult_gen_teeth=5
;;
22)
ecmult_gen_blocks=11
ecmult_gen_teeth=6
;;
86)
ecmult_gen_blocks=43
ecmult_gen_teeth=6
;;
*)
AC_MSG_ERROR(['ecmult gen precision not 2, 4, 8 or "auto"'])
AC_MSG_ERROR(['ecmult gen table size not 2, 22, 86 or "auto"'])
;;
esac
AC_DEFINE_UNQUOTED(COMB_BLOCKS, $ecmult_gen_blocks, [Number of blocks for ecmult_gen computation])
AC_DEFINE_UNQUOTED(COMB_TEETH, $ecmult_gen_teeth, [Number of teeth for ecmult_gen computation])

if test x"$enable_valgrind" = x"yes"; then
SECP_INCLUDES="$SECP_INCLUDES $VALGRIND_CPPFLAGS"
Expand Down Expand Up @@ -424,7 +434,7 @@ echo " module schnorrsig = $enable_module_schnorrsig"
echo
echo " asm = $set_asm"
echo " ecmult window size = $set_ecmult_window"
echo " ecmult gen prec. bits = $set_ecmult_gen_precision"
echo " ecmult gen kB = $set_ecmult_gen_kb"
# Hide test-only options unless they're used.
if test x"$set_widemul" != xauto; then
echo " wide multiplication = $set_widemul"
Expand Down
10 changes: 10 additions & 0 deletions src/bench_internal.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,15 @@ void bench_scalar_add(void* arg, int iters) {
CHECK(j <= iters);
}

void bench_scalar_half(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;

for (i = 0; i < iters; i++) {
secp256k1_scalar_half(&data->scalar[0], &data->scalar[0]);
}
}

void bench_scalar_negate(void* arg, int iters) {
int i;
bench_inv *data = (bench_inv*)arg;
Expand Down Expand Up @@ -347,6 +356,7 @@ int main(int argc, char **argv) {
int d = argc == 1; /* default */
print_output_table_header_row();

if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "half")) run_benchmark("scalar_half", bench_scalar_half, bench_setup, NULL, &data, 10, iters*100);
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
Expand Down
87 changes: 79 additions & 8 deletions src/ecmult_gen.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***********************************************************************
* Copyright (c) 2013, 2014 Pieter Wuille *
* Copyright (c) 2013, 2014, 2021 Pieter Wuille, Peter Dettman *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
***********************************************************************/
Expand All @@ -10,19 +10,90 @@
#include "scalar.h"
#include "group.h"

#if ECMULT_GEN_PREC_BITS != 2 && ECMULT_GEN_PREC_BITS != 4 && ECMULT_GEN_PREC_BITS != 8
# error "Set ECMULT_GEN_PREC_BITS to 2, 4 or 8."
/* Configuration parameters for the signed-digit multi-comb algorithm:
*
* - COMB_BLOCKS is the number of lookup tables.
* - COMB_TEETH is the number of bits covered by one table.
*
* The comb's spacing (COMB_SPACING), or the distance between the teeth,
* is defined as ceil(256 / (COMB_BLOCKS * COMB_TEETH)).
*
* The size of the precomputed table is COMB_BLOCKS * (1 << (COMB_TEETH - 1))
* secp256k1_ge_storages.
*
* The number of point additions equals COMB_BLOCKS * COMB_SPACING. Each point
* addition involves a cmov from (1 << (COMB_TEETH - 1)) table entries and a
* conditional negation.
*
* The number of point doublings is COMB_SPACING - 1. */

#if defined(EXHAUSTIVE_TEST_ORDER)
/* We need to control these values for exhaustive tests because
* the tables cannot have infinities in them (secp256k1_ge_storage
* doesn't support infinities) */
# undef COMB_BLOCKS
# undef COMB_TEETH
# if EXHAUSTIVE_TEST_ORDER > 32
# define COMB_BLOCKS 52
# define COMB_TEETH 5
# elif EXHAUSTIVE_TEST_ORDER > 16
# define COMB_BLOCKS 64
# define COMB_TEETH 4
# elif EXHAUSTIVE_TEST_ORDER > 8
# define COMB_BLOCKS 86
# define COMB_TEETH 3
# elif EXHAUSTIVE_TEST_ORDER > 4
# define COMB_BLOCKS 128
# define COMB_TEETH 2
# else
# define COMB_BLOCKS 256
# define COMB_TEETH 1
# endif
#else /* !defined(EXHAUSTIVE_TEST_ORDER) */
/* Use (11, 6) as default configuration, which results in a 22 kB table. */
# ifndef COMB_BLOCKS
# define COMB_BLOCKS 11
# endif
# ifndef COMB_TEETH
# define COMB_TEETH 6
# endif
#endif /* defined(EXHAUSTIVE_TEST_ORDER) */

/* Range checks on the parameters. */
#if !(1 <= COMB_BLOCKS && COMB_BLOCKS <= 256)
# error "COMB_BLOCKS must be in the range [1, 256]"
#endif
#if !(1 <= COMB_TEETH && COMB_TEETH <= 8)
# error "COMB_TEETH must be in the range [1, 8]"
#endif

/* The remaining COMB_* parameters are derived values, don't modify these. */
/* - The distance between the teeth of the comb. */
#define COMB_SPACING ((255 + COMB_BLOCKS * COMB_TEETH) / (COMB_BLOCKS * COMB_TEETH))
/* - The number of bits covered by all the combs; must be at least 256. */
#define COMB_BITS (COMB_BLOCKS * COMB_TEETH * COMB_SPACING)
/* - The number of points per table. */
#define COMB_POINTS (1 << (COMB_TEETH - 1))

/* Additional sanity checks. */
#if (COMB_BLOCKS - 1) * COMB_TEETH * COMB_SPACING >= 256
# error "COMB_BLOCKS can be reduced"
#endif
#if COMB_BLOCKS * (COMB_TEETH - 1) * COMB_SPACING >= 256
# error "COMB_TEETH can be reduced"
#endif
#define ECMULT_GEN_PREC_G(bits) (1 << bits)
#define ECMULT_GEN_PREC_N(bits) (256 / bits)

typedef struct {
/* Whether the context has been built. */
int built;

/* Blinding values used when computing (n-b)G + bG. */
secp256k1_scalar blind; /* -b */
secp256k1_gej initial; /* bG */
/* Blinding values used when computing nG as (n-b)G + bG. */
secp256k1_scalar scalar_offset; /* -b */
secp256k1_ge final_point_add; /* bG */

/* Factor used for projective blinding. This value is used
* to rescale the Z coordinate of the first table lookup. */
secp256k1_fe proj_blind;
} secp256k1_ecmult_gen_context;

static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context* ctx);
Expand Down
4 changes: 2 additions & 2 deletions src/ecmult_gen_compute_table.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/***********************************************************************
* Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell *
* Copyright (c) 2013, 2014, 2015, 2021 Pieter Wuille, Gregory Maxwell *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
***********************************************************************/
Expand All @@ -9,6 +9,6 @@

#include "ecmult_gen.h"

static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int bits);
static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth);

#endif /* SECP256K1_ECMULT_GEN_COMPUTE_TABLE_H */
125 changes: 65 additions & 60 deletions src/ecmult_gen_compute_table_impl.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/***********************************************************************
* Copyright (c) 2013, 2014, 2015 Pieter Wuille, Gregory Maxwell *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
***********************************************************************/
/*******************************************************************************
* Copyright (c) 2013-2015, 2021 Pieter Wuille, Gregory Maxwell, Peter Dettman *
* Distributed under the MIT software license, see the accompanying *
* file COPYING or https://www.opensource.org/licenses/mit-license.php. *
*******************************************************************************/

#ifndef SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H
#define SECP256K1_ECMULT_GEN_COMPUTE_TABLE_IMPL_H
Expand All @@ -13,68 +13,73 @@
#include "ecmult_gen.h"
#include "util.h"

static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int bits) {
int g = ECMULT_GEN_PREC_G(bits);
int n = ECMULT_GEN_PREC_N(bits);
static void secp256k1_ecmult_gen_compute_table(secp256k1_ge_storage* table, const secp256k1_ge* gen, int blocks, int teeth) {
size_t points = ((size_t)1) << (teeth - 1);
size_t points_total = points * blocks;
int spacing = (256 + blocks * teeth - 1) / (blocks * teeth);
secp256k1_ge* prec = checked_malloc(&default_error_callback, points_total * sizeof(*prec));
secp256k1_gej* ds = checked_malloc(&default_error_callback, teeth * sizeof(*ds));
secp256k1_gej* vs = checked_malloc(&default_error_callback, points_total * sizeof(*vs));
secp256k1_gej u;
size_t vs_pos = 0;
int block;

secp256k1_ge* prec = checked_malloc(&default_error_callback, n * g * sizeof(*prec));
secp256k1_gej gj;
secp256k1_gej nums_gej;
int i, j;

/* get the generator */
secp256k1_gej_set_ge(&gj, gen);

/* Construct a group element with no known corresponding scalar (nothing up my sleeve). */
{
static const unsigned char nums_b32[33] = "The scalar for this x is unknown";
secp256k1_fe nums_x;
secp256k1_ge nums_ge;
int r;
r = secp256k1_fe_set_b32(&nums_x, nums_b32);
(void)r;
VERIFY_CHECK(r);
r = secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0);
(void)r;
VERIFY_CHECK(r);
secp256k1_gej_set_ge(&nums_gej, &nums_ge);
/* Add G to make the bits in x uniformly distributed. */
secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, gen, NULL);
}

/* compute prec. */
{
secp256k1_gej gbase;
secp256k1_gej numsbase;
secp256k1_gej* precj = checked_malloc(&default_error_callback, n * g * sizeof(*precj)); /* Jacobian versions of prec. */
gbase = gj; /* PREC_G^j * G */
numsbase = nums_gej; /* 2^j * nums. */
for (j = 0; j < n; j++) {
/* Set precj[j*PREC_G .. j*PREC_G+(PREC_G-1)] to (numsbase, numsbase + gbase, ..., numsbase + (PREC_G-1)*gbase). */
precj[j*g] = numsbase;
for (i = 1; i < g; i++) {
secp256k1_gej_add_var(&precj[j*g + i], &precj[j*g + i - 1], &gbase, NULL);
}
/* Multiply gbase by PREC_G. */
for (i = 0; i < bits; i++) {
secp256k1_gej_double_var(&gbase, &gbase, NULL);
/* u is the running power of two times gen we're working with, initially 1*gen. */
secp256k1_gej_set_ge(&u, gen);
for (block = 0; block < blocks; ++block) {
int tooth;
/* Here u = 2^(block*teeth*spacing) * gen. */
secp256k1_gej sum;
secp256k1_gej_set_infinity(&sum);
for (tooth = 0; tooth < teeth; ++tooth) {
/* Here u = 2^((block*teeth + tooth)*spacing) * gen. */
/* Make sum = sum(2^((block*teeth + t)*spacing), t=0..tooth). */
secp256k1_gej_add_var(&sum, &sum, &u, NULL);
/* Make u = 2^((block*teeth + tooth)*spacing + 1) * gen. */
secp256k1_gej_double_var(&u, &u, NULL);
/* Make ds[tooth] = u = 2^((block*teeth + tooth)*spacing + 1) * gen. */
ds[tooth] = u;
/* Make u = 2^((block*teeth + tooth + 1)*spacing), unless at the end. */
if (block + tooth != blocks + teeth - 2) {
int bit_off;
for (bit_off = 1; bit_off < spacing; ++bit_off) {
secp256k1_gej_double_var(&u, &u, NULL);
}
}
/* Multiply numbase by 2. */
secp256k1_gej_double_var(&numsbase, &numsbase, NULL);
if (j == n - 2) {
/* In the last iteration, numsbase is (1 - 2^j) * nums instead. */
secp256k1_gej_neg(&numsbase, &numsbase);
secp256k1_gej_add_var(&numsbase, &numsbase, &nums_gej, NULL);
}
/* Now u = 2^(block*(teeth + 1)*spacing) * gen. */

/* Next, compute the table entries for block block in Jacobian coordinates.
* The entries will occupy vs[block*points + i] for i=0..points-1.
* We start by computing the first (i=0) value corresponding to all summed
* powers of two times G being negative. */
secp256k1_gej_neg(&vs[vs_pos++], &sum);
/* And then teeth-1 times "double" the range of i values for which the table
* is computed: in each iteration, double the table by taking an existing
* table entry and adding ds[tooth]. */
for (tooth = 0; tooth < teeth - 1; ++tooth) {
size_t stride = ((size_t)1) << tooth;
size_t index;
for (index = 0; index < stride; ++index, ++vs_pos) {
secp256k1_gej_add_var(&vs[vs_pos], &vs[vs_pos - stride], &ds[tooth], NULL);
}
}
secp256k1_ge_set_all_gej_var(prec, precj, n * g);
free(precj);
}
for (j = 0; j < n; j++) {
for (i = 0; i < g; i++) {
secp256k1_ge_to_storage(&table[j*g + i], &prec[j*g + i]);
VERIFY_CHECK(vs_pos == points_total);

/* Convert all points simultaneously from secp256k1_gej to secp256k1_ge. */
secp256k1_ge_set_all_gej_var(prec, vs, points_total);
/* Convert all points from secp256k1_ge to secp256k1_ge_storage output. */
for (block = 0; block < blocks; ++block) {
size_t index;
for (index = 0; index < points; ++index) {
secp256k1_ge_to_storage(&table[block * points + index], &prec[block * points + index]);
}
}

/* Free memory. */
free(vs);
free(ds);
free(prec);
}

Expand Down
Loading