Skip to content

Commit 9164a1b

Browse files
committed
Optimization: special-case zero modulus limbs in modinv64
Both the field and scalar modulus can be written in signed{30,62} notation with one or more zero limbs. Make use of this in the update_de function to avoid a few wide multiplications when that is the case. This doesn't appear to be a win in the 32-bit implementation, so only do it for the 64-bit one.
1 parent 1f233b3 commit 9164a1b

File tree

1 file changed

+12
-6
lines changed

1 file changed

+12
-6
lines changed

src/modinv64_impl.h

+12-6
Original file line numberDiff line numberDiff line change
@@ -338,22 +338,28 @@ static void secp256k1_modinv64_update_de_62(secp256k1_modinv64_signed62 *d, secp
338338
/* Compute limb 1 of t*[d,e]+modulus*[md,me], and store it as output limb 0 (= down shift). */
339339
cd += (int128_t)u * d1 + (int128_t)v * e1;
340340
ce += (int128_t)q * d1 + (int128_t)r * e1;
341-
cd += (int128_t)modinfo->modulus.v[1] * md;
342-
ce += (int128_t)modinfo->modulus.v[1] * me;
341+
if (modinfo->modulus.v[1]) { /* Optimize for the case where limb of modulus is zero. */
342+
cd += (int128_t)modinfo->modulus.v[1] * md;
343+
ce += (int128_t)modinfo->modulus.v[1] * me;
344+
}
343345
d->v[0] = (int64_t)cd & M62; cd >>= 62;
344346
e->v[0] = (int64_t)ce & M62; ce >>= 62;
345347
/* Compute limb 2 of t*[d,e]+modulus*[md,me], and store it as output limb 1. */
346348
cd += (int128_t)u * d2 + (int128_t)v * e2;
347349
ce += (int128_t)q * d2 + (int128_t)r * e2;
348-
cd += (int128_t)modinfo->modulus.v[2] * md;
349-
ce += (int128_t)modinfo->modulus.v[2] * me;
350+
if (modinfo->modulus.v[2]) { /* Optimize for the case where limb of modulus is zero. */
351+
cd += (int128_t)modinfo->modulus.v[2] * md;
352+
ce += (int128_t)modinfo->modulus.v[2] * me;
353+
}
350354
d->v[1] = (int64_t)cd & M62; cd >>= 62;
351355
e->v[1] = (int64_t)ce & M62; ce >>= 62;
352356
/* Compute limb 3 of t*[d,e]+modulus*[md,me], and store it as output limb 2. */
353357
cd += (int128_t)u * d3 + (int128_t)v * e3;
354358
ce += (int128_t)q * d3 + (int128_t)r * e3;
355-
cd += (int128_t)modinfo->modulus.v[3] * md;
356-
ce += (int128_t)modinfo->modulus.v[3] * me;
359+
if (modinfo->modulus.v[3]) { /* Optimize for the case where limb of modulus is zero. */
360+
cd += (int128_t)modinfo->modulus.v[3] * md;
361+
ce += (int128_t)modinfo->modulus.v[3] * me;
362+
}
357363
d->v[2] = (int64_t)cd & M62; cd >>= 62;
358364
e->v[2] = (int64_t)ce & M62; ce >>= 62;
359365
/* Compute limb 4 of t*[d,e]+modulus*[md,me], and store it as output limb 3. */

0 commit comments

Comments
 (0)