Skip to content

Commit b306935

Browse files
peterdettmansipa
authored andcommitted
Optimization: use formulas instead of lookup tables for cancelling g bits
This only seems to be a win on 64-bit platforms, so only do it there. Refactored by: Pieter Wuille <[email protected]>
1 parent 9164a1b commit b306935

File tree

1 file changed

+21
-25
lines changed

1 file changed

+21
-25
lines changed

src/modinv64_impl.h

+21-25
Original file line numberDiff line numberDiff line change
@@ -220,21 +220,6 @@ static int64_t secp256k1_modinv64_divsteps_62(int64_t eta, uint64_t f0, uint64_t
220220
* Implements the divsteps_n_matrix_var function from the explanation.
221221
*/
222222
static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
223-
/* inv256[i] = -(2*i+1)^-1 (mod 256) */
224-
static const uint8_t inv256[128] = {
225-
0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
226-
0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
227-
0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
228-
0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
229-
0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
230-
0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
231-
0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
232-
0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
233-
0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
234-
0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
235-
0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
236-
};
237-
238223
/* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */
239224
uint64_t u = 1, v = 0, q = 0, r = 1;
240225
uint64_t f = f0, g = g0, m;
@@ -265,17 +250,28 @@ static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint
265250
tmp = f; f = g; g = -tmp;
266251
tmp = u; u = q; q = -tmp;
267252
tmp = v; v = r; r = -tmp;
253+
/* Use a formula to cancel out up to 6 bits of g. Also, no more than i can be cancelled
254+
* out (as we'd be done before that point), and no more than eta+1 can be done as its
255+
* will flip again once that happens. */
256+
limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
257+
VERIFY_CHECK(limit > 0 && limit <= 62);
258+
/* m is a mask for the bottom min(limit, 6) bits. */
259+
m = (UINT64_MAX >> (64 - limit)) & 63U;
260+
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 6)
261+
* bits. */
262+
w = (f * g * (f * f - 2)) & m;
263+
} else {
264+
/* In this branch, use a simpler formula that only lets us cancel up to 4 bits of g, as
265+
* eta tends to be smaller here. */
266+
limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
267+
VERIFY_CHECK(limit > 0 && limit <= 62);
268+
/* m is a mask for the bottom min(limit, 4) bits. */
269+
m = (UINT64_MAX >> (64 - limit)) & 15U;
270+
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 4)
271+
* bits. */
272+
w = f + (((f + 1) & 4) << 1);
273+
w = (-w * g) & m;
268274
}
269-
/* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
270-
* than i can be cancelled out (as we'd be done before that point), and no more than eta+1
271-
* can be done as its sign will flip once that happens. */
272-
limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
273-
/* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
274-
VERIFY_CHECK(limit > 0 && limit <= 62);
275-
m = (UINT64_MAX >> (64 - limit)) & 255U;
276-
/* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
277-
w = (g * inv256[(f >> 1) & 127]) & m;
278-
/* Do so. */
279275
g += f * w;
280276
q += u * w;
281277
r += v * w;

0 commit comments

Comments
 (0)