Skip to content

Commit 7973576

Browse files
Merge #662: Add ecmult_gen, ecmult_const and ecmult to benchmark
8f879c2 Fix array size in bench_ecmult (Jonas Nick) 2fe1b50 Add ecmult_gen, ecmult_const and ecmult to benchmark (Jonas Nick) 593e6ba Clean up ecmult_bench to make space for more benchmarks (Jonas Nick) Pull request description: I was trying to determine the impact of ecmult_gen in schnorrsig signing and noticed that there is no way to bench this right now. The new benchmarks look like this: ``` $ ./bench_ecmult ecmult_gen: min 20.9us / avg 21.2us / max 21.7us ecmult_const: min 63.9us / avg 64.3us / max 64.8us ecmult 1: min 49.4us / avg 49.7us / max 50.3us ecmult 1g: min 39.8us / avg 40.0us / max 40.3us ecmult 2g: min 27.2us / avg 27.3us / max 27.8us ecmult_multi 1g: min 39.8us / avg 40.0us / max 40.2us ecmult_multi 2g: min 27.2us / avg 27.4us / max 27.7us ecmult_multi 3g: min 22.8us / avg 22.9us / max 23.1us ecmult_multi 4g: min 20.6us / avg 20.8us / max 21.1us ecmult_multi 5g: min 19.3us / avg 19.5us / max 19.7us ``` (Turns out ecmult_gen is 37% of the 55.8us that schnorrsig sign takes) ACKs for top commit: real-or-random: ACK 8f879c2 elichai: tACK 8f879c2 Tree-SHA512: 8a739f5de1e2c0467c8d1c3ceeaf453b396a470ea0e8e5bef15fe1b32f3f9633b6b1c7e2ce1d94d736cf3e9adecd8f4f983ad4ba37450cd5991767f1a95db85c
2 parents 50f3367 + 8f879c2 commit 7973576

File tree

1 file changed

+187
-33
lines changed

1 file changed

+187
-33
lines changed

src/bench_ecmult.c

+187-33
Original file line numberDiff line numberDiff line change
@@ -18,29 +18,174 @@
1818

1919
#define POINTS 32768
2020

21+
void help(char **argv) {
22+
printf("Benchmark EC multiplication algorithms\n");
23+
printf("\n");
24+
printf("Usage: %s <help|pippenger_wnaf|strauss_wnaf|simple>\n", argv[0]);
25+
printf("The output shows the number of multiplied and summed points right after the\n");
26+
printf("function name. The letter 'g' indicates that one of the points is the generator.\n");
27+
printf("The benchmarks are divided by the number of points.\n");
28+
printf("\n");
29+
printf("default (ecmult_multi): picks pippenger_wnaf or strauss_wnaf depending on the\n");
30+
printf(" batch size\n");
31+
printf("pippenger_wnaf: for all batch sizes\n");
32+
printf("strauss_wnaf: for all batch sizes\n");
33+
printf("simple: multiply and sum each point individually\n");
34+
}
35+
2136
typedef struct {
2237
/* Setup once in advance */
2338
secp256k1_context* ctx;
2439
secp256k1_scratch_space* scratch;
2540
secp256k1_scalar* scalars;
2641
secp256k1_ge* pubkeys;
42+
secp256k1_gej* pubkeys_gej;
2743
secp256k1_scalar* seckeys;
2844
secp256k1_gej* expected_output;
2945
secp256k1_ecmult_multi_func ecmult_multi;
3046

31-
/* Changes per test */
47+
/* Changes per benchmark */
3248
size_t count;
3349
int includes_g;
3450

35-
/* Changes per test iteration */
51+
/* Changes per benchmark iteration, used to pick different scalars and pubkeys
52+
* in each run. */
3653
size_t offset1;
3754
size_t offset2;
3855

39-
/* Test output. */
56+
/* Benchmark output. */
4057
secp256k1_gej* output;
4158
} bench_data;
4259

43-
static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
60+
/* Hashes x into [0, POINTS) twice and store the result in offset1 and offset2. */
61+
static void hash_into_offset(bench_data* data, size_t x) {
62+
data->offset1 = (x * 0x537b7f6f + 0x8f66a481) % POINTS;
63+
data->offset2 = (x * 0x7f6f537b + 0x6a1a8f49) % POINTS;
64+
}
65+
66+
/* Check correctness of the benchmark by computing
67+
* sum(outputs) ?= (sum(scalars_gen) + sum(seckeys)*sum(scalars))*G */
68+
static void bench_ecmult_teardown_helper(bench_data* data, size_t* seckey_offset, size_t* scalar_offset, size_t* scalar_gen_offset, int iters) {
69+
int i;
70+
secp256k1_gej sum_output, tmp;
71+
secp256k1_scalar sum_scalars;
72+
73+
secp256k1_gej_set_infinity(&sum_output);
74+
secp256k1_scalar_clear(&sum_scalars);
75+
for (i = 0; i < iters; ++i) {
76+
secp256k1_gej_add_var(&sum_output, &sum_output, &data->output[i], NULL);
77+
if (scalar_gen_offset != NULL) {
78+
secp256k1_scalar_add(&sum_scalars, &sum_scalars, &data->scalars[(*scalar_gen_offset+i) % POINTS]);
79+
}
80+
if (seckey_offset != NULL) {
81+
secp256k1_scalar s = data->seckeys[(*seckey_offset+i) % POINTS];
82+
secp256k1_scalar_mul(&s, &s, &data->scalars[(*scalar_offset+i) % POINTS]);
83+
secp256k1_scalar_add(&sum_scalars, &sum_scalars, &s);
84+
}
85+
}
86+
secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &tmp, &sum_scalars);
87+
secp256k1_gej_neg(&tmp, &tmp);
88+
secp256k1_gej_add_var(&tmp, &tmp, &sum_output, NULL);
89+
CHECK(secp256k1_gej_is_infinity(&tmp));
90+
}
91+
92+
static void bench_ecmult_setup(void* arg) {
93+
bench_data* data = (bench_data*)arg;
94+
/* Re-randomize offset to ensure that we're using different scalars and
95+
* group elements in each run. */
96+
hash_into_offset(data, data->offset1);
97+
}
98+
99+
static void bench_ecmult_gen(void* arg, int iters) {
100+
bench_data* data = (bench_data*)arg;
101+
int i;
102+
103+
for (i = 0; i < iters; ++i) {
104+
secp256k1_ecmult_gen(&data->ctx->ecmult_gen_ctx, &data->output[i], &data->scalars[(data->offset1+i) % POINTS]);
105+
}
106+
}
107+
108+
static void bench_ecmult_gen_teardown(void* arg, int iters) {
109+
bench_data* data = (bench_data*)arg;
110+
bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
111+
}
112+
113+
static void bench_ecmult_const(void* arg, int iters) {
114+
bench_data* data = (bench_data*)arg;
115+
int i;
116+
117+
for (i = 0; i < iters; ++i) {
118+
secp256k1_ecmult_const(&data->output[i], &data->pubkeys[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], 256);
119+
}
120+
}
121+
122+
static void bench_ecmult_const_teardown(void* arg, int iters) {
123+
bench_data* data = (bench_data*)arg;
124+
bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
125+
}
126+
127+
static void bench_ecmult_1(void* arg, int iters) {
128+
bench_data* data = (bench_data*)arg;
129+
int i;
130+
131+
for (i = 0; i < iters; ++i) {
132+
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], NULL);
133+
}
134+
}
135+
136+
static void bench_ecmult_1_teardown(void* arg, int iters) {
137+
bench_data* data = (bench_data*)arg;
138+
bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, NULL, iters);
139+
}
140+
141+
static void bench_ecmult_1g(void* arg, int iters) {
142+
bench_data* data = (bench_data*)arg;
143+
secp256k1_scalar zero;
144+
int i;
145+
146+
secp256k1_scalar_set_int(&zero, 0);
147+
for (i = 0; i < iters; ++i) {
148+
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], NULL, &zero, &data->scalars[(data->offset1+i) % POINTS]);
149+
}
150+
}
151+
152+
static void bench_ecmult_1g_teardown(void* arg, int iters) {
153+
bench_data* data = (bench_data*)arg;
154+
bench_ecmult_teardown_helper(data, NULL, NULL, &data->offset1, iters);
155+
}
156+
157+
static void bench_ecmult_2g(void* arg, int iters) {
158+
bench_data* data = (bench_data*)arg;
159+
int i;
160+
161+
for (i = 0; i < iters/2; ++i) {
162+
secp256k1_ecmult(&data->ctx->ecmult_ctx, &data->output[i], &data->pubkeys_gej[(data->offset1+i) % POINTS], &data->scalars[(data->offset2+i) % POINTS], &data->scalars[(data->offset1+i) % POINTS]);
163+
}
164+
}
165+
166+
static void bench_ecmult_2g_teardown(void* arg, int iters) {
167+
bench_data* data = (bench_data*)arg;
168+
bench_ecmult_teardown_helper(data, &data->offset1, &data->offset2, &data->offset1, iters/2);
169+
}
170+
171+
static void run_ecmult_bench(bench_data* data, int iters) {
172+
char str[32];
173+
sprintf(str, "ecmult_gen");
174+
run_benchmark(str, bench_ecmult_gen, bench_ecmult_setup, bench_ecmult_gen_teardown, data, 10, iters);
175+
sprintf(str, "ecmult_const");
176+
run_benchmark(str, bench_ecmult_const, bench_ecmult_setup, bench_ecmult_const_teardown, data, 10, iters);
177+
/* ecmult with non generator point */
178+
sprintf(str, "ecmult 1");
179+
run_benchmark(str, bench_ecmult_1, bench_ecmult_setup, bench_ecmult_1_teardown, data, 10, iters);
180+
/* ecmult with generator point */
181+
sprintf(str, "ecmult 1g");
182+
run_benchmark(str, bench_ecmult_1g, bench_ecmult_setup, bench_ecmult_1g_teardown, data, 10, iters);
183+
/* ecmult with generator and non-generator point. The reported time is per point. */
184+
sprintf(str, "ecmult 2g");
185+
run_benchmark(str, bench_ecmult_2g, bench_ecmult_setup, bench_ecmult_2g_teardown, data, 10, 2*iters);
186+
}
187+
188+
static int bench_ecmult_multi_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, void* arg) {
44189
bench_data* data = (bench_data*)arg;
45190
if (data->includes_g) ++idx;
46191
if (idx == 0) {
@@ -53,7 +198,7 @@ static int bench_callback(secp256k1_scalar* sc, secp256k1_ge* ge, size_t idx, vo
53198
return 1;
54199
}
55200

56-
static void bench_ecmult(void* arg, int iters) {
201+
static void bench_ecmult_multi(void* arg, int iters) {
57202
bench_data* data = (bench_data*)arg;
58203

59204
int includes_g = data->includes_g;
@@ -62,19 +207,18 @@ static void bench_ecmult(void* arg, int iters) {
62207
iters = iters / data->count;
63208

64209
for (iter = 0; iter < iters; ++iter) {
65-
data->ecmult_multi(&data->ctx->error_callback, &data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_callback, arg, count - includes_g);
210+
data->ecmult_multi(&data->ctx->error_callback, &data->ctx->ecmult_ctx, data->scratch, &data->output[iter], data->includes_g ? &data->scalars[data->offset1] : NULL, bench_ecmult_multi_callback, arg, count - includes_g);
66211
data->offset1 = (data->offset1 + count) % POINTS;
67212
data->offset2 = (data->offset2 + count - 1) % POINTS;
68213
}
69214
}
70215

71-
static void bench_ecmult_setup(void* arg) {
216+
static void bench_ecmult_multi_setup(void* arg) {
72217
bench_data* data = (bench_data*)arg;
73-
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
74-
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
218+
hash_into_offset(data, data->count);
75219
}
76220

77-
static void bench_ecmult_teardown(void* arg, int iters) {
221+
static void bench_ecmult_multi_teardown(void* arg, int iters) {
78222
bench_data* data = (bench_data*)arg;
79223
int iter;
80224
iters = iters / data->count;
@@ -88,7 +232,7 @@ static void bench_ecmult_teardown(void* arg, int iters) {
88232

89233
static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
90234
secp256k1_sha256 sha256;
91-
unsigned char c[11] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
235+
unsigned char c[10] = {'e', 'c', 'm', 'u', 'l', 't', 0, 0, 0, 0};
92236
unsigned char buf[32];
93237
int overflow = 0;
94238
c[6] = num;
@@ -102,7 +246,7 @@ static void generate_scalar(uint32_t num, secp256k1_scalar* scalar) {
102246
CHECK(!overflow);
103247
}
104248

105-
static void run_test(bench_data* data, size_t count, int includes_g, int num_iters) {
249+
static void run_ecmult_multi_bench(bench_data* data, size_t count, int includes_g, int num_iters) {
106250
char str[32];
107251
static const secp256k1_scalar zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
108252
size_t iters = 1 + num_iters / count;
@@ -112,8 +256,7 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite
112256
data->includes_g = includes_g;
113257

114258
/* Compute (the negation of) the expected results directly. */
115-
data->offset1 = (data->count * 0x537b7f6f + 0x8f66a481) % POINTS;
116-
data->offset2 = (data->count * 0x7f6f537b + 0x6a1a8f49) % POINTS;
259+
hash_into_offset(data, data->count);
117260
for (iter = 0; iter < iters; ++iter) {
118261
secp256k1_scalar tmp;
119262
secp256k1_scalar total = data->scalars[(data->offset1++) % POINTS];
@@ -127,65 +270,75 @@ static void run_test(bench_data* data, size_t count, int includes_g, int num_ite
127270
}
128271

129272
/* Run the benchmark. */
130-
sprintf(str, includes_g ? "ecmult_%ig" : "ecmult_%i", (int)count);
131-
run_benchmark(str, bench_ecmult, bench_ecmult_setup, bench_ecmult_teardown, data, 10, count * iters);
273+
sprintf(str, includes_g ? "ecmult_multi %ig" : "ecmult_multi %i", (int)count);
274+
run_benchmark(str, bench_ecmult_multi, bench_ecmult_multi_setup, bench_ecmult_multi_teardown, data, 10, count * iters);
132275
}
133276

134277
int main(int argc, char **argv) {
135278
bench_data data;
136279
int i, p;
137-
secp256k1_gej* pubkeys_gej;
138280
size_t scratch_size;
139281

140282
int iters = get_iters(10000);
141283

142-
data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
143-
scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
144-
data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
145284
data.ecmult_multi = secp256k1_ecmult_multi_var;
146285

147286
if (argc > 1) {
148-
if(have_flag(argc, argv, "pippenger_wnaf")) {
287+
if(have_flag(argc, argv, "-h")
288+
|| have_flag(argc, argv, "--help")
289+
|| have_flag(argc, argv, "help")) {
290+
help(argv);
291+
return 1;
292+
} else if(have_flag(argc, argv, "pippenger_wnaf")) {
149293
printf("Using pippenger_wnaf:\n");
150294
data.ecmult_multi = secp256k1_ecmult_pippenger_batch_single;
151295
} else if(have_flag(argc, argv, "strauss_wnaf")) {
152296
printf("Using strauss_wnaf:\n");
153297
data.ecmult_multi = secp256k1_ecmult_strauss_batch_single;
154298
} else if(have_flag(argc, argv, "simple")) {
155299
printf("Using simple algorithm:\n");
156-
data.ecmult_multi = secp256k1_ecmult_multi_var;
157-
secp256k1_scratch_space_destroy(data.ctx, data.scratch);
158-
data.scratch = NULL;
159300
} else {
160-
fprintf(stderr, "%s: unrecognized argument '%s'.\n", argv[0], argv[1]);
161-
fprintf(stderr, "Use 'pippenger_wnaf', 'strauss_wnaf', 'simple' or no argument to benchmark a combined algorithm.\n");
301+
fprintf(stderr, "%s: unrecognized argument '%s'.\n\n", argv[0], argv[1]);
302+
help(argv);
162303
return 1;
163304
}
164305
}
165306

307+
data.ctx = secp256k1_context_create(SECP256K1_CONTEXT_SIGN | SECP256K1_CONTEXT_VERIFY);
308+
scratch_size = secp256k1_strauss_scratch_size(POINTS) + STRAUSS_SCRATCH_OBJECTS*16;
309+
if (!have_flag(argc, argv, "simple")) {
310+
data.scratch = secp256k1_scratch_space_create(data.ctx, scratch_size);
311+
} else {
312+
data.scratch = NULL;
313+
}
314+
166315
/* Allocate stuff */
167316
data.scalars = malloc(sizeof(secp256k1_scalar) * POINTS);
168317
data.seckeys = malloc(sizeof(secp256k1_scalar) * POINTS);
169318
data.pubkeys = malloc(sizeof(secp256k1_ge) * POINTS);
319+
data.pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
170320
data.expected_output = malloc(sizeof(secp256k1_gej) * (iters + 1));
171321
data.output = malloc(sizeof(secp256k1_gej) * (iters + 1));
172322

173323
/* Generate a set of scalars, and private/public keypairs. */
174-
pubkeys_gej = malloc(sizeof(secp256k1_gej) * POINTS);
175-
secp256k1_gej_set_ge(&pubkeys_gej[0], &secp256k1_ge_const_g);
324+
secp256k1_gej_set_ge(&data.pubkeys_gej[0], &secp256k1_ge_const_g);
176325
secp256k1_scalar_set_int(&data.seckeys[0], 1);
177326
for (i = 0; i < POINTS; ++i) {
178327
generate_scalar(i, &data.scalars[i]);
179328
if (i) {
180-
secp256k1_gej_double_var(&pubkeys_gej[i], &pubkeys_gej[i - 1], NULL);
329+
secp256k1_gej_double_var(&data.pubkeys_gej[i], &data.pubkeys_gej[i - 1], NULL);
181330
secp256k1_scalar_add(&data.seckeys[i], &data.seckeys[i - 1], &data.seckeys[i - 1]);
182331
}
183332
}
184-
secp256k1_ge_set_all_gej_var(data.pubkeys, pubkeys_gej, POINTS);
185-
free(pubkeys_gej);
333+
secp256k1_ge_set_all_gej_var(data.pubkeys, data.pubkeys_gej, POINTS);
334+
335+
336+
/* Initialize offset1 and offset2 */
337+
hash_into_offset(&data, 0);
338+
run_ecmult_bench(&data, iters);
186339

187340
for (i = 1; i <= 8; ++i) {
188-
run_test(&data, i, 1, iters);
341+
run_ecmult_multi_bench(&data, i, 1, iters);
189342
}
190343

191344
/* This is disabled with low count of iterations because the loop runs 77 times even with iters=1
@@ -194,7 +347,7 @@ int main(int argc, char **argv) {
194347
if (iters > 2) {
195348
for (p = 0; p <= 11; ++p) {
196349
for (i = 9; i <= 16; ++i) {
197-
run_test(&data, i << p, 1, iters);
350+
run_ecmult_multi_bench(&data, i << p, 1, iters);
198351
}
199352
}
200353
}
@@ -205,6 +358,7 @@ int main(int argc, char **argv) {
205358
secp256k1_context_destroy(data.ctx);
206359
free(data.scalars);
207360
free(data.pubkeys);
361+
free(data.pubkeys_gej);
208362
free(data.seckeys);
209363
free(data.output);
210364
free(data.expected_output);

0 commit comments

Comments
 (0)