10
10
#include "modinv64.h"
11
11
12
12
#include "util.h"
13
+ #include "int128.h"
13
14
14
15
/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
15
16
* modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
@@ -32,15 +33,17 @@ static const secp256k1_modinv64_signed62 SECP256K1_SIGNED62_ONE = {{1}};
32
33
/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^62). */
33
34
static void secp256k1_modinv64_mul_62 (secp256k1_modinv64_signed62 * r , const secp256k1_modinv64_signed62 * a , int alen , int64_t factor ) {
34
35
const int64_t M62 = (int64_t )(UINT64_MAX >> 2 );
35
- int128_t c = 0 ;
36
+ secp256k1_int128 c , d ;
36
37
int i ;
38
+ secp256k1_i128_from_i64 (& c , 0 );
37
39
for (i = 0 ; i < 4 ; ++ i ) {
38
- if (i < alen ) c += ( int128_t ) a -> v [i ] * factor ;
39
- r -> v [i ] = ( int64_t ) c & M62 ; c >>= 62 ;
40
+ if (i < alen ) secp256k1_i128_accum_mul ( & c , a -> v [i ], factor ) ;
41
+ r -> v [i ] = secp256k1_i128_to_i64 ( & c ) & M62 ; secp256k1_i128_rshift ( & c , 62 ) ;
40
42
}
41
- if (4 < alen ) c += (int128_t )a -> v [4 ] * factor ;
42
- VERIFY_CHECK (c == (int64_t )c );
43
- r -> v [4 ] = (int64_t )c ;
43
+ if (4 < alen ) secp256k1_i128_accum_mul (& c , a -> v [4 ], factor );
44
+ secp256k1_i128_from_i64 (& d , secp256k1_i128_to_i64 (& c ));
45
+ VERIFY_CHECK (secp256k1_i128_eq (& c , & d ));
46
+ r -> v [4 ] = secp256k1_i128_to_i64 (& c );
44
47
}
45
48
46
49
/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A has alen limbs; b has 5. */
@@ -307,7 +310,7 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
307
310
const int64_t e0 = e -> v [0 ], e1 = e -> v [1 ], e2 = e -> v [2 ], e3 = e -> v [3 ], e4 = e -> v [4 ];
308
311
const int64_t u = t -> u , v = t -> v , q = t -> q , r = t -> r ;
309
312
int64_t md , me , sd , se ;
310
- int128_t cd , ce ;
313
+ secp256k1_int128 cd , ce ;
311
314
#ifdef VERIFY
312
315
VERIFY_CHECK (secp256k1_modinv64_mul_cmp_62 (d , 5 , & modinfo -> modulus , -2 ) > 0 ); /* d > -2*modulus */
313
316
VERIFY_CHECK (secp256k1_modinv64_mul_cmp_62 (d , 5 , & modinfo -> modulus , 1 ) < 0 ); /* d < modulus */
@@ -324,54 +327,64 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
324
327
md = (u & sd ) + (v & se );
325
328
me = (q & sd ) + (r & se );
326
329
/* Begin computing t*[d,e]. */
327
- cd = (int128_t )u * d0 + (int128_t )v * e0 ;
328
- ce = (int128_t )q * d0 + (int128_t )r * e0 ;
330
+ secp256k1_i128_mul (& cd , u , d0 );
331
+ secp256k1_i128_accum_mul (& cd , v , e0 );
332
+ secp256k1_i128_mul (& ce , q , d0 );
333
+ secp256k1_i128_accum_mul (& ce , r , e0 );
329
334
/* Correct md,me so that t*[d,e]+modulus*[md,me] has 62 zero bottom bits. */
330
- md -= (modinfo -> modulus_inv62 * (uint64_t )cd + md ) & M62 ;
331
- me -= (modinfo -> modulus_inv62 * (uint64_t )ce + me ) & M62 ;
335
+ md -= (modinfo -> modulus_inv62 * (uint64_t )secp256k1_i128_to_i64 ( & cd ) + md ) & M62 ;
336
+ me -= (modinfo -> modulus_inv62 * (uint64_t )secp256k1_i128_to_i64 ( & ce ) + me ) & M62 ;
332
337
/* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
333
- cd += ( int128_t ) modinfo -> modulus .v [0 ] * md ;
334
- ce += ( int128_t ) modinfo -> modulus .v [0 ] * me ;
338
+ secp256k1_i128_accum_mul ( & cd , modinfo -> modulus .v [0 ], md ) ;
339
+ secp256k1_i128_accum_mul ( & ce , modinfo -> modulus .v [0 ], me ) ;
335
340
/* Verify that the low 62 bits of the computation are indeed zero, and then throw them away. */
336
- VERIFY_CHECK ((( int64_t ) cd & M62 ) == 0 ); cd >>= 62 ;
337
- VERIFY_CHECK ((( int64_t ) ce & M62 ) == 0 ); ce >>= 62 ;
341
+ VERIFY_CHECK ((secp256k1_i128_to_i64 ( & cd ) & M62 ) == 0 ); secp256k1_i128_rshift ( & cd , 62 ) ;
342
+ VERIFY_CHECK ((secp256k1_i128_to_i64 ( & ce ) & M62 ) == 0 ); secp256k1_i128_rshift ( & ce , 62 ) ;
338
343
/* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
339
- cd += (int128_t )u * d1 + (int128_t )v * e1 ;
340
- ce += (int128_t )q * d1 + (int128_t )r * e1 ;
344
+ secp256k1_i128_accum_mul (& cd , u , d1 );
345
+ secp256k1_i128_accum_mul (& cd , v , e1 );
346
+ secp256k1_i128_accum_mul (& ce , q , d1 );
347
+ secp256k1_i128_accum_mul (& ce , r , e1 );
341
348
if (modinfo -> modulus .v [1 ]) { /* Optimize for the case where limb of modulus is zero. */
342
- cd += ( int128_t ) modinfo -> modulus .v [1 ] * md ;
343
- ce += ( int128_t ) modinfo -> modulus .v [1 ] * me ;
349
+ secp256k1_i128_accum_mul ( & cd , modinfo -> modulus .v [1 ], md ) ;
350
+ secp256k1_i128_accum_mul ( & ce , modinfo -> modulus .v [1 ], me ) ;
344
351
}
345
- d -> v [0 ] = ( int64_t ) cd & M62 ; cd >>= 62 ;
346
- e -> v [0 ] = ( int64_t ) ce & M62 ; ce >>= 62 ;
352
+ d -> v [0 ] = secp256k1_i128_to_i64 ( & cd ) & M62 ; secp256k1_i128_rshift ( & cd , 62 ) ;
353
+ e -> v [0 ] = secp256k1_i128_to_i64 ( & ce ) & M62 ; secp256k1_i128_rshift ( & ce , 62 ) ;
347
354
/* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
348
- cd += (int128_t )u * d2 + (int128_t )v * e2 ;
349
- ce += (int128_t )q * d2 + (int128_t )r * e2 ;
355
+ secp256k1_i128_accum_mul (& cd , u , d2 );
356
+ secp256k1_i128_accum_mul (& cd , v , e2 );
357
+ secp256k1_i128_accum_mul (& ce , q , d2 );
358
+ secp256k1_i128_accum_mul (& ce , r , e2 );
350
359
if (modinfo -> modulus .v [2 ]) { /* Optimize for the case where limb of modulus is zero. */
351
- cd += ( int128_t ) modinfo -> modulus .v [2 ] * md ;
352
- ce += ( int128_t ) modinfo -> modulus .v [2 ] * me ;
360
+ secp256k1_i128_accum_mul ( & cd , modinfo -> modulus .v [2 ], md ) ;
361
+ secp256k1_i128_accum_mul ( & ce , modinfo -> modulus .v [2 ], me ) ;
353
362
}
354
- d -> v [1 ] = ( int64_t ) cd & M62 ; cd >>= 62 ;
355
- e -> v [1 ] = ( int64_t ) ce & M62 ; ce >>= 62 ;
363
+ d -> v [1 ] = secp256k1_i128_to_i64 ( & cd ) & M62 ; secp256k1_i128_rshift ( & cd , 62 ) ;
364
+ e -> v [1 ] = secp256k1_i128_to_i64 ( & ce ) & M62 ; secp256k1_i128_rshift ( & ce , 62 ) ;
356
365
/* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
357
- cd += (int128_t )u * d3 + (int128_t )v * e3 ;
358
- ce += (int128_t )q * d3 + (int128_t )r * e3 ;
366
+ secp256k1_i128_accum_mul (& cd , u , d3 );
367
+ secp256k1_i128_accum_mul (& cd , v , e3 );
368
+ secp256k1_i128_accum_mul (& ce , q , d3 );
369
+ secp256k1_i128_accum_mul (& ce , r , e3 );
359
370
if (modinfo -> modulus .v [3 ]) { /* Optimize for the case where limb of modulus is zero. */
360
- cd += ( int128_t ) modinfo -> modulus .v [3 ] * md ;
361
- ce += ( int128_t ) modinfo -> modulus .v [3 ] * me ;
371
+ secp256k1_i128_accum_mul ( & cd , modinfo -> modulus .v [3 ], md ) ;
372
+ secp256k1_i128_accum_mul ( & ce , modinfo -> modulus .v [3 ], me ) ;
362
373
}
363
- d -> v [2 ] = ( int64_t ) cd & M62 ; cd >>= 62 ;
364
- e -> v [2 ] = ( int64_t ) ce & M62 ; ce >>= 62 ;
374
+ d -> v [2 ] = secp256k1_i128_to_i64 ( & cd ) & M62 ; secp256k1_i128_rshift ( & cd , 62 ) ;
375
+ e -> v [2 ] = secp256k1_i128_to_i64 ( & ce ) & M62 ; secp256k1_i128_rshift ( & ce , 62 ) ;
365
376
/* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */
366
- cd += (int128_t )u * d4 + (int128_t )v * e4 ;
367
- ce += (int128_t )q * d4 + (int128_t )r * e4 ;
368
- cd += (int128_t )modinfo -> modulus .v [4 ] * md ;
369
- ce += (int128_t )modinfo -> modulus .v [4 ] * me ;
370
- d -> v [3 ] = (int64_t )cd & M62 ; cd >>= 62 ;
371
- e -> v [3 ] = (int64_t )ce & M62 ; ce >>= 62 ;
377
+ secp256k1_i128_accum_mul (& cd , u , d4 );
378
+ secp256k1_i128_accum_mul (& cd , v , e4 );
379
+ secp256k1_i128_accum_mul (& ce , q , d4 );
380
+ secp256k1_i128_accum_mul (& ce , r , e4 );
381
+ secp256k1_i128_accum_mul (& cd , modinfo -> modulus .v [4 ], md );
382
+ secp256k1_i128_accum_mul (& ce , modinfo -> modulus .v [4 ], me );
383
+ d -> v [3 ] = secp256k1_i128_to_i64 (& cd ) & M62 ; secp256k1_i128_rshift (& cd , 62 );
384
+ e -> v [3 ] = secp256k1_i128_to_i64 (& ce ) & M62 ; secp256k1_i128_rshift (& ce , 62 );
372
385
/* What remains is limb 5 of t*[d,e]+modulus*[md,me]; store it as output limb 4. */
373
- d -> v [4 ] = ( int64_t ) cd ;
374
- e -> v [4 ] = ( int64_t ) ce ;
386
+ d -> v [4 ] = secp256k1_i128_to_i64 ( & cd ) ;
387
+ e -> v [4 ] = secp256k1_i128_to_i64 ( & ce ) ;
375
388
#ifdef VERIFY
376
389
VERIFY_CHECK (secp256k1_modinv64_mul_cmp_62 (d , 5 , & modinfo -> modulus , -2 ) > 0 ); /* d > -2*modulus */
377
390
VERIFY_CHECK (secp256k1_modinv64_mul_cmp_62 (d , 5 , & modinfo -> modulus , 1 ) < 0 ); /* d < modulus */
@@ -389,36 +402,46 @@ static void secp256k1_modinv64_update_fg_62(secp256k1_modinv64_signed62 *f, secp
389
402
const int64_t f0 = f -> v [0 ], f1 = f -> v [1 ], f2 = f -> v [2 ], f3 = f -> v [3 ], f4 = f -> v [4 ];
390
403
const int64_t g0 = g -> v [0 ], g1 = g -> v [1 ], g2 = g -> v [2 ], g3 = g -> v [3 ], g4 = g -> v [4 ];
391
404
const int64_t u = t -> u , v = t -> v , q = t -> q , r = t -> r ;
392
- int128_t cf , cg ;
405
+ secp256k1_int128 cf , cg ;
393
406
/* Start computing t*[f,g]. */
394
- cf = (int128_t )u * f0 + (int128_t )v * g0 ;
395
- cg = (int128_t )q * f0 + (int128_t )r * g0 ;
407
+ secp256k1_i128_mul (& cf , u , f0 );
408
+ secp256k1_i128_accum_mul (& cf , v , g0 );
409
+ secp256k1_i128_mul (& cg , q , f0 );
410
+ secp256k1_i128_accum_mul (& cg , r , g0 );
396
411
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
397
- VERIFY_CHECK ((( int64_t ) cf & M62 ) == 0 ); cf >>= 62 ;
398
- VERIFY_CHECK ((( int64_t ) cg & M62 ) == 0 ); cg >>= 62 ;
412
+ VERIFY_CHECK ((secp256k1_i128_to_i64 ( & cf ) & M62 ) == 0 ); secp256k1_i128_rshift ( & cf , 62 ) ;
413
+ VERIFY_CHECK ((secp256k1_i128_to_i64 ( & cg ) & M62 ) == 0 ); secp256k1_i128_rshift ( & cg , 62 ) ;
399
414
/* Compute limb 1 of t*[f,g], and store it as output limb 0 (= down shift). */
400
- cf += (int128_t )u * f1 + (int128_t )v * g1 ;
401
- cg += (int128_t )q * f1 + (int128_t )r * g1 ;
402
- f -> v [0 ] = (int64_t )cf & M62 ; cf >>= 62 ;
403
- g -> v [0 ] = (int64_t )cg & M62 ; cg >>= 62 ;
415
+ secp256k1_i128_accum_mul (& cf , u , f1 );
416
+ secp256k1_i128_accum_mul (& cf , v , g1 );
417
+ secp256k1_i128_accum_mul (& cg , q , f1 );
418
+ secp256k1_i128_accum_mul (& cg , r , g1 );
419
+ f -> v [0 ] = secp256k1_i128_to_i64 (& cf ) & M62 ; secp256k1_i128_rshift (& cf , 62 );
420
+ g -> v [0 ] = secp256k1_i128_to_i64 (& cg ) & M62 ; secp256k1_i128_rshift (& cg , 62 );
404
421
/* Compute limb 2 of t*[f,g], and store it as output limb 1. */
405
- cf += (int128_t )u * f2 + (int128_t )v * g2 ;
406
- cg += (int128_t )q * f2 + (int128_t )r * g2 ;
407
- f -> v [1 ] = (int64_t )cf & M62 ; cf >>= 62 ;
408
- g -> v [1 ] = (int64_t )cg & M62 ; cg >>= 62 ;
422
+ secp256k1_i128_accum_mul (& cf , u , f2 );
423
+ secp256k1_i128_accum_mul (& cf , v , g2 );
424
+ secp256k1_i128_accum_mul (& cg , q , f2 );
425
+ secp256k1_i128_accum_mul (& cg , r , g2 );
426
+ f -> v [1 ] = secp256k1_i128_to_i64 (& cf ) & M62 ; secp256k1_i128_rshift (& cf , 62 );
427
+ g -> v [1 ] = secp256k1_i128_to_i64 (& cg ) & M62 ; secp256k1_i128_rshift (& cg , 62 );
409
428
/* Compute limb 3 of t*[f,g], and store it as output limb 2. */
410
- cf += (int128_t )u * f3 + (int128_t )v * g3 ;
411
- cg += (int128_t )q * f3 + (int128_t )r * g3 ;
412
- f -> v [2 ] = (int64_t )cf & M62 ; cf >>= 62 ;
413
- g -> v [2 ] = (int64_t )cg & M62 ; cg >>= 62 ;
429
+ secp256k1_i128_accum_mul (& cf , u , f3 );
430
+ secp256k1_i128_accum_mul (& cf , v , g3 );
431
+ secp256k1_i128_accum_mul (& cg , q , f3 );
432
+ secp256k1_i128_accum_mul (& cg , r , g3 );
433
+ f -> v [2 ] = secp256k1_i128_to_i64 (& cf ) & M62 ; secp256k1_i128_rshift (& cf , 62 );
434
+ g -> v [2 ] = secp256k1_i128_to_i64 (& cg ) & M62 ; secp256k1_i128_rshift (& cg , 62 );
414
435
/* Compute limb 4 of t*[f,g], and store it as output limb 3. */
415
- cf += (int128_t )u * f4 + (int128_t )v * g4 ;
416
- cg += (int128_t )q * f4 + (int128_t )r * g4 ;
417
- f -> v [3 ] = (int64_t )cf & M62 ; cf >>= 62 ;
418
- g -> v [3 ] = (int64_t )cg & M62 ; cg >>= 62 ;
436
+ secp256k1_i128_accum_mul (& cf , u , f4 );
437
+ secp256k1_i128_accum_mul (& cf , v , g4 );
438
+ secp256k1_i128_accum_mul (& cg , q , f4 );
439
+ secp256k1_i128_accum_mul (& cg , r , g4 );
440
+ f -> v [3 ] = secp256k1_i128_to_i64 (& cf ) & M62 ; secp256k1_i128_rshift (& cf , 62 );
441
+ g -> v [3 ] = secp256k1_i128_to_i64 (& cg ) & M62 ; secp256k1_i128_rshift (& cg , 62 );
419
442
/* What remains is limb 5 of t*[f,g]; store it as output limb 4. */
420
- f -> v [4 ] = ( int64_t ) cf ;
421
- g -> v [4 ] = ( int64_t ) cg ;
443
+ f -> v [4 ] = secp256k1_i128_to_i64 ( & cf ) ;
444
+ g -> v [4 ] = secp256k1_i128_to_i64 ( & cg ) ;
422
445
}
423
446
424
447
/* Compute (t/2^62) * [f, g], where t is a transition matrix for 62 divsteps.
@@ -431,30 +454,34 @@ static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_sign
431
454
const int64_t M62 = (int64_t )(UINT64_MAX >> 2 );
432
455
const int64_t u = t -> u , v = t -> v , q = t -> q , r = t -> r ;
433
456
int64_t fi , gi ;
434
- int128_t cf , cg ;
457
+ secp256k1_int128 cf , cg ;
435
458
int i ;
436
459
VERIFY_CHECK (len > 0 );
437
460
/* Start computing t*[f,g]. */
438
461
fi = f -> v [0 ];
439
462
gi = g -> v [0 ];
440
- cf = (int128_t )u * fi + (int128_t )v * gi ;
441
- cg = (int128_t )q * fi + (int128_t )r * gi ;
463
+ secp256k1_i128_mul (& cf , u , fi );
464
+ secp256k1_i128_accum_mul (& cf , v , gi );
465
+ secp256k1_i128_mul (& cg , q , fi );
466
+ secp256k1_i128_accum_mul (& cg , r , gi );
442
467
/* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
443
- VERIFY_CHECK ((( int64_t ) cf & M62 ) == 0 ); cf >>= 62 ;
444
- VERIFY_CHECK ((( int64_t ) cg & M62 ) == 0 ); cg >>= 62 ;
468
+ VERIFY_CHECK ((secp256k1_i128_to_i64 ( & cf ) & M62 ) == 0 ); secp256k1_i128_rshift ( & cf , 62 ) ;
469
+ VERIFY_CHECK ((secp256k1_i128_to_i64 ( & cg ) & M62 ) == 0 ); secp256k1_i128_rshift ( & cg , 62 ) ;
445
470
/* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
446
471
* down by 62 bits). */
447
472
for (i = 1 ; i < len ; ++ i ) {
448
473
fi = f -> v [i ];
449
474
gi = g -> v [i ];
450
- cf += (int128_t )u * fi + (int128_t )v * gi ;
451
- cg += (int128_t )q * fi + (int128_t )r * gi ;
452
- f -> v [i - 1 ] = (int64_t )cf & M62 ; cf >>= 62 ;
453
- g -> v [i - 1 ] = (int64_t )cg & M62 ; cg >>= 62 ;
475
+ secp256k1_i128_accum_mul (& cf , u , fi );
476
+ secp256k1_i128_accum_mul (& cf , v , gi );
477
+ secp256k1_i128_accum_mul (& cg , q , fi );
478
+ secp256k1_i128_accum_mul (& cg , r , gi );
479
+ f -> v [i - 1 ] = secp256k1_i128_to_i64 (& cf ) & M62 ; secp256k1_i128_rshift (& cf , 62 );
480
+ g -> v [i - 1 ] = secp256k1_i128_to_i64 (& cg ) & M62 ; secp256k1_i128_rshift (& cg , 62 );
454
481
}
455
482
/* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
456
- f -> v [len - 1 ] = ( int64_t ) cf ;
457
- g -> v [len - 1 ] = ( int64_t ) cg ;
483
+ f -> v [len - 1 ] = secp256k1_i128_to_i64 ( & cf ) ;
484
+ g -> v [len - 1 ] = secp256k1_i128_to_i64 ( & cg ) ;
458
485
}
459
486
460
487
/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
0 commit comments