Skip to content

Commit ebc1af7

Browse files
peterdettmansipa
authored andcommitted
Optimization: track f,g limb count and pass to new variable-time update_fg_var
The magnitude of the f and g variables generally goes down as the algorithm progresses. Make use of this by keeping tracking how many limbs are used, and when the number becomes small enough, make use of this to reduce the complexity of arithmetic on them. Refactored by: Pieter Wuille <[email protected]>
1 parent b306935 commit ebc1af7

File tree

2 files changed

+207
-109
lines changed

2 files changed

+207
-109
lines changed

src/modinv32_impl.h

+104-55
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,25 @@
2424
static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
2525

2626
/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27-
static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int32_t factor) {
27+
static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
2828
const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
2929
int64_t c = 0;
3030
int i;
3131
for (i = 0; i < 8; ++i) {
32-
c += (int64_t)a->v[i] * factor;
32+
if (i < alen) c += (int64_t)a->v[i] * factor;
3333
r->v[i] = (int32_t)c & M30; c >>= 30;
3434
}
35-
c += (int64_t)a->v[8] * factor;
35+
if (8 < alen) c += (int64_t)a->v[8] * factor;
3636
VERIFY_CHECK(c == (int32_t)c);
3737
r->v[8] = (int32_t)c;
3838
}
3939

40-
/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. */
41-
static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, const secp256k1_modinv32_signed30 *b, int32_t factor) {
40+
/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41+
static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
4242
int i;
4343
secp256k1_modinv32_signed30 am, bm;
44-
secp256k1_modinv32_mul_30(&am, a, 1); /* Normalize all but the top limb of a. */
45-
secp256k1_modinv32_mul_30(&bm, b, factor);
44+
secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45+
secp256k1_modinv32_mul_30(&bm, b, 9, factor);
4646
for (i = 0; i < 8; ++i) {
4747
/* Verify that all but the top limb of a and b are normalized. */
4848
VERIFY_CHECK(am.v[i] >> 30 == 0);
@@ -73,8 +73,8 @@ static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int3
7373
VERIFY_CHECK(r->v[i] >= -M30);
7474
VERIFY_CHECK(r->v[i] <= M30);
7575
}
76-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, &modinfo->modulus, 1) < 0); /* r < modulus */
76+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
7878
#endif
7979

8080
/* In a first step, add the modulus if the input is negative, and then negate if requested.
@@ -154,8 +154,8 @@ static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int3
154154
VERIFY_CHECK(r6 >> 30 == 0);
155155
VERIFY_CHECK(r7 >> 30 == 0);
156156
VERIFY_CHECK(r8 >> 30 == 0);
157-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, &modinfo->modulus, 1) < 0); /* r < modulus */
157+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
159159
#endif
160160
}
161161

@@ -331,10 +331,10 @@ static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp
331331
int64_t cd, ce;
332332
int i;
333333
#ifdef VERIFY
334-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
335-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, &modinfo->modulus, 1) < 0); /* d < modulus */
336-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
337-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, &modinfo->modulus, 1) < 0); /* e < modulus */
334+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
335+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
336+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
337+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
338338
VERIFY_CHECK((labs(u) + labs(v)) >= 0); /* |u|+|v| doesn't overflow */
339339
VERIFY_CHECK((labs(q) + labs(r)) >= 0); /* |q|+|r| doesn't overflow */
340340
VERIFY_CHECK((labs(u) + labs(v)) <= M30 + 1); /* |u|+|v| <= 2^30 */
@@ -375,10 +375,10 @@ static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp
375375
d->v[8] = (int32_t)cd;
376376
e->v[8] = (int32_t)ce;
377377
#ifdef VERIFY
378-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
379-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, &modinfo->modulus, 1) < 0); /* d < modulus */
380-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
381-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, &modinfo->modulus, 1) < 0); /* e < modulus */
378+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
379+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
380+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
381+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
382382
#endif
383383
}
384384

@@ -415,6 +415,42 @@ static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp
415415
g->v[8] = (int32_t)cg;
416416
}
417417

418+
/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
419+
*
420+
* Version that operates on a variable number of limbs in f and g.
421+
*
422+
* This implements the update_fg function from the explanation in modinv64_impl.h.
423+
*/
424+
static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
425+
const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
426+
const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
427+
int32_t fi, gi;
428+
int64_t cf, cg;
429+
int i;
430+
VERIFY_CHECK(len > 0);
431+
/* Start computing t*[f,g]. */
432+
fi = f->v[0];
433+
gi = g->v[0];
434+
cf = (int64_t)u * fi + (int64_t)v * gi;
435+
cg = (int64_t)q * fi + (int64_t)r * gi;
436+
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
437+
VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
438+
VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
439+
/* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
440+
* down by 30 bits). */
441+
for (i = 1; i < len; ++i) {
442+
fi = f->v[i];
443+
gi = g->v[i];
444+
cf += (int64_t)u * fi + (int64_t)v * gi;
445+
cg += (int64_t)q * fi + (int64_t)r * gi;
446+
f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
447+
g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
448+
}
449+
/* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
450+
f->v[len - 1] = (int32_t)cf;
451+
g->v[len - 1] = (int32_t)cg;
452+
}
453+
418454
/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
419455
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
420456
/* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
@@ -434,17 +470,17 @@ static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_m
434470
secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
435471
/* Update f,g using that transition matrix. */
436472
#ifdef VERIFY
437-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, -1) > 0); /* f > -modulus */
438-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, 1) <= 0); /* f <= modulus */
439-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, -1) > 0); /* g > -modulus */
440-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, 1) < 0); /* g < modulus */
473+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
474+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
475+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
476+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
441477
#endif
442478
secp256k1_modinv32_update_fg_30(&f, &g, &t);
443479
#ifdef VERIFY
444-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, -1) > 0); /* f > -modulus */
445-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, 1) <= 0); /* f <= modulus */
446-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, -1) > 0); /* g > -modulus */
447-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, 1) < 0); /* g < modulus */
480+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
481+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
482+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
483+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
448484
#endif
449485
}
450486

@@ -453,14 +489,14 @@ static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_m
453489
* values i.e. +/- 1, and d now contains +/- the modular inverse. */
454490
#ifdef VERIFY
455491
/* g == 0 */
456-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &SECP256K1_SIGNED30_ONE, 0) == 0);
492+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
457493
/* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
458-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
459-
secp256k1_modinv32_mul_cmp_30(&f, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
460-
(secp256k1_modinv32_mul_cmp_30(x, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
461-
secp256k1_modinv32_mul_cmp_30(&d, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
462-
(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, 1) == 0 ||
463-
secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, -1) == 0)));
494+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
495+
secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
496+
(secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
497+
secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
498+
(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
499+
secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
464500
#endif
465501

466502
/* Optionally negate d, normalize to [0,modulus), and return it. */
@@ -478,9 +514,9 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
478514
#ifdef VERIFY
479515
int i = 0;
480516
#endif
481-
int j;
517+
int j, len = 9;
482518
int32_t eta = -1;
483-
int32_t cond;
519+
int32_t cond, fn, gn;
484520

485521
/* Do iterations of 30 divsteps each until g=0. */
486522
while (1) {
@@ -491,47 +527,60 @@ static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256
491527
secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
492528
/* Update f,g using that transition matrix. */
493529
#ifdef VERIFY
494-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, -1) > 0); /* f > -modulus */
495-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, 1) <= 0); /* f <= modulus */
496-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, -1) > 0); /* g > -modulus */
497-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, 1) < 0); /* g < modulus */
530+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
531+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
532+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
533+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
498534
#endif
499-
secp256k1_modinv32_update_fg_30(&f, &g, &t);
535+
secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
500536
/* If the bottom limb of g is 0, there is a chance g=0. */
501537
if (g.v[0] == 0) {
502538
cond = 0;
503-
/* Check if the other limbs are also 0. */
504-
for (j = 1; j < 9; ++j) {
539+
/* Check if all other limbs are also 0. */
540+
for (j = 1; j < len; ++j) {
505541
cond |= g.v[j];
506542
}
507543
/* If so, we're done. */
508544
if (cond == 0) break;
509545
}
546+
547+
/* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
548+
fn = f.v[len - 1];
549+
gn = g.v[len - 1];
550+
cond = ((int32_t)len - 2) >> 31;
551+
cond |= fn ^ (fn >> 31);
552+
cond |= gn ^ (gn >> 31);
553+
/* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
554+
if (cond == 0) {
555+
f.v[len - 2] |= (uint32_t)fn << 30;
556+
g.v[len - 2] |= (uint32_t)gn << 30;
557+
--len;
558+
}
510559
#ifdef VERIFY
511560
VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
512-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, -1) > 0); /* f > -modulus */
513-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, 1) <= 0); /* f <= modulus */
514-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, -1) > 0); /* g > -modulus */
515-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &modinfo->modulus, 1) < 0); /* g < modulus */
561+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
562+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
563+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
564+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
516565
#endif
517566
}
518567

519568
/* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
520569
* the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
521570
#ifdef VERIFY
522571
/* g == 0 */
523-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, &SECP256K1_SIGNED30_ONE, 0) == 0);
572+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
524573
/* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
525-
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
526-
secp256k1_modinv32_mul_cmp_30(&f, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
527-
(secp256k1_modinv32_mul_cmp_30(x, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
528-
secp256k1_modinv32_mul_cmp_30(&d, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
529-
(secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, 1) == 0 ||
530-
secp256k1_modinv32_mul_cmp_30(&f, &modinfo->modulus, -1) == 0)));
574+
VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
575+
secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
576+
(secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
577+
secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
578+
(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
579+
secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
531580
#endif
532581

533582
/* Optionally negate d, normalize to [0,modulus), and return it. */
534-
secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
583+
secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
535584
*x = d;
536585
}
537586

0 commit comments

Comments
 (0)