diff --git a/doc/safegcd_implementation.md b/doc/safegcd_implementation.md
index 063aa8efae..6e832ed4a2 100644
--- a/doc/safegcd_implementation.md
+++ b/doc/safegcd_implementation.md
@@ -155,14 +155,14 @@ do one division by *2N* as a final step:
```python
def divsteps_n_matrix(delta, f, g):
"""Compute delta and transition matrix t after N divsteps (multiplied by 2^N)."""
- u, v, q, r = 1, 0, 0, 1 # start with identity matrix
+ u, v, q, r = 1< 0 and g & 1:
- delta, f, g, u, v, q, r = 1 - delta, g, (g - f) // 2, 2*q, 2*r, q-u, r-v
+ delta, f, g, u, v, q, r = 1 - delta, g, (g-f)//2, q, r, (q-u)//2, (r-v)//2
elif g & 1:
- delta, f, g, u, v, q, r = 1 + delta, f, (g + f) // 2, 2*u, 2*v, q+u, r+v
+ delta, f, g, u, v, q, r = 1 + delta, f, (g+f)//2, u, v, (q+u)//2, (r+v)//2
else:
- delta, f, g, u, v, q, r = 1 + delta, f, (g ) // 2, 2*u, 2*v, q , r
+ delta, f, g, u, v, q, r = 1 + delta, f, (g )//2, u, v, (q )//2, (r )//2
return delta, (u, v, q, r)
```
@@ -414,9 +414,9 @@ operations (and hope the C compiler isn't smart enough to turn them back into br
divstep can be written instead as (compare to the inner loop of `gcd` in section 1).
```python
- x = -f if delta > 0 else f # set x equal to (input) -f or f
+ x = f if delta > 0 else -f # set x equal to (input) f or -f
if g & 1:
- g += x # set g to (input) g-f or g+f
+ g -= x # set g to (input) g-f or g+f
if delta > 0:
delta = -delta
f += g # set f to (input) g (note that g was set to g-f before)
@@ -433,19 +433,21 @@ that *-v == (v ^ -1) - (-1)*. Thus, if we have a variable *c* that takes on valu
Using this we can write:
```python
- x = -f if delta > 0 else f
+ x = f if delta > 0 else -f
```
in constant-time form as:
```python
- c1 = (-delta) >> 63
+ # Compute c1=0 if delta>0 and c1=-1 if delta<=0.
+ c1 = (delta - 1) >> 63
# Conditionally negate f based on c1:
x = (f ^ c1) - c1
```
-To use that trick, we need a helper mask variable *c1* that resolves the condition *δ>0* to *-1*
-(if true) or *0* (if false). We compute *c1* using right shifting, which is equivalent to dividing by
+To use that trick, we need a helper mask variable *c1* that resolves the condition *δ≤0* to *-1*
+(if true) or *0* (if false). We compute *c1* by first subtracting *1*, which results in a negative value
+if and only if *δ≤0*. That is then right shifted, which is equivalent to dividing by
the specified power of *2* and rounding down (in Python, and also in C under the assumption of a typical two's complement system; see
`assumptions.h` for tests that this is the case). Right shifting by *63* thus maps all
numbers in range *[-263,0)* to *-1*, and numbers in range *[0,263)* to *0*.
@@ -454,7 +456,7 @@ Using the facts that *x&0=0* and *x&(-1)=x* (on two's complement systems again),
```python
if g & 1:
- g += x
+ g -= x
```
as:
@@ -463,7 +465,7 @@ as:
# Compute c2=0 if g is even and c2=-1 if g is odd.
c2 = -(g & 1)
# This masks out x if g is even, and leaves x be if g is odd.
- g += x & c2
+ g -= x & c2
```
Using the conditional negation trick again we can write:
@@ -478,7 +480,7 @@ as:
```python
# Compute c3=-1 if g is odd and delta>0, and 0 otherwise.
- c3 = c1 & c2
+ c3 = ~c1 & c2
# Conditionally negate delta based on c3:
delta = (delta ^ c3) - c3
```
@@ -497,45 +499,61 @@ becomes:
f += g & c3
```
-It turns out that this can be implemented more efficiently by applying the substitution
-*η=-δ*. In this representation, negating *δ* corresponds to negating *η*, and incrementing
-*δ* corresponds to decrementing *η*. This allows us to remove the negation in the *c1*
-computation:
+Putting everything together, extending all operations on f,g (with helper x) to also be applied
+to u,q (with helper y) and v,r (with helper z), gives:
```python
- # Compute a mask c1 for eta < 0, and compute the conditional negation x of f:
- c1 = eta >> 63
- x = (f ^ c1) - c1
- # Compute a mask c2 for odd g, and conditionally add x to g:
- c2 = -(g & 1)
- g += x & c2
- # Compute a mask c for (eta < 0) and odd (input) g, and use it to conditionally negate eta,
- # and add g to f:
- c3 = c1 & c2
- eta = (eta ^ c3) - c3
- f += g & c3
- # Incrementing delta corresponds to decrementing eta.
- eta -= 1
- g >>= 1
+def divsteps_n_matrix(delta, f, g):
+ """Compute delta and transition matrix t after N divsteps (multiplied by 2^N)."""
+ u, v, q, r = 1<> 63
+ # Compute x, y, z as conditionally-negated versions of f, u, v.
+ x, y, z = (f ^ c1) - c1, (u ^ c1) - c1, (v ^ c1) - c1
+ c2 = -(g & 1)
+ # Conditionally subtract x, y, z from g, q, r.
+ g, q, r = g - (x & c2), q - (y & c2), r - (z & c2)
+ c3 = ~c1 & c2
+ # Conditionally negate delta, and then increment it by 1.
+ delta = (delta ^ c3) - c3 + 1
+ # Conditionally add g, q, r to f, u, v.
+ f, u, v = f + (g & c3), u + (q & c3), v + (r & c3)
+ # Shift down g, q, r.
+ g, q, r = g >> 1, u >> 1, v >> 1
+ return delta, (u, v, q, r)
```
-A variant of divsteps with better worst-case performance can be used instead: starting *δ* at
+An interesting optimization is possible here. If we were to drop the *-c1* in the computation
+of *x*, *y*, and *z*, we are making them at worst *1* less than the correct value. That
+translates to *g*, *q*, and *r* further being at worst *1* more than the correct value.
+Now observe that at the start of every iteration of the loop, *u*, *v*, *q*, and *r* are
+all multiples of *2N-i*, with *i* the iteration number, and thus all even.
+In other words, this potential off by one in *g*, *q*, and *r* only affects their bottommost
+bit, which is shifted away at the end of the loop. Thus we can instead write:
+
+```python
+ # Compute x, y, z as conditionally complemented versions of f, u, v.
+ x, y, z = f ^ c1, u ^ c1, v ^ c1
+```
+
+Finally, a variant of divsteps with better worst-case performance can be used instead: starting *δ* at
*1/2* instead of *1*. This reduces the worst case number of iterations to *590* for *256*-bit inputs
-(which can be shown using convex hull analysis). In this case, the substitution *ζ=-(δ+1/2)*
-is used instead to keep the variable integral. Incrementing *δ* by *1* still translates to
-decrementing *ζ* by *1*, but negating *δ* now corresponds to going from *ζ* to *-(ζ+1)*, or
-*~ζ*. Doing that conditionally based on *c3* is simply:
+(which can be shown using [convex hull analysis](https://github.com/sipa/safegcd-bounds)).
+In this case, the substitution *θ=δ-1/2* is used to keep the variable integral.
+*δ≤0* then translates to *θ≤-1/2*, or because *θ* is integral, *θ<0*.
+Thus instead of `c1 = (delta - 1) >> 63` we get `c1 = theta >> 63`.
+Negating *δ* now corresponds to going from *θ* to
+*-θ-1*. Doing that conditionally based on *c3* (and then incrementing by one) gives us:
```python
...
- c3 = c1 & c2
- zeta ^= c3
+ theta = (theta ^ c3) + 1
...
```
By replacing the loop in `divsteps_n_matrix` with a variant of the divstep code above (extended to
also apply all *f* operations to *u*, *v* and all *g* operations to *q*, *r*), a constant-time version of
-`divsteps_n_matrix` is obtained. The full code will be in section 7.
+`divsteps_n_matrix` is obtained. The resulting code will be in section 7.
These bit fiddling tricks can also be used to make the conditional negations and additions in
`update_de` and `normalize` constant-time.
@@ -550,7 +568,7 @@ faster non-constant time `divsteps_n_matrix` function.
To do so, first consider yet another way of writing the inner loop of divstep operations in
`gcd` from section 1. This decomposition is also explained in the paper in section 8.2. We use
-the original version with initial *δ=1* and *η=-δ* here.
+the original version with initial *δ=1*, but make the substitution *η=-δ*.
```python
for _ in range(N):
@@ -651,8 +669,13 @@ Here we need the negated modular inverse, which is a simple transformation of th
have this 6-bit function (based on the 3-bit function above):
- *f(f2 - 2)*
-This loop, again extended to also handle *u*, *v*, *q*, and *r* alongside *f* and *g*, placed in
-`divsteps_n_matrix`, gives a significantly faster, but non-constant time version.
+This loop, extended to also handle *u*, *v*, *q*, and *r* alongside *f* and *g*, placed in
+`divsteps_n_matrix`, gives a significantly faster, but non-constant time version. In order to
+avoid intermediary values that need more than N+1 bits, it is possible to instead start
+*u* and *v* at *1* instead of at *2N*, and then shift up *u* and *v* whenever
+*g* is shifted down (instead of shifting down *q* and *r*). This is effectively making the
+algorithm operate on *i*-bits downshifted versions of all these variables. The resulting
+code is shown in the next section.
## 7. Final Python version
@@ -660,28 +683,27 @@ This loop, again extended to also handle *u*, *v*, *q*, and *r* alongside *f* an
All together we need the following functions:
- A way to compute the transition matrix in constant time, using the `divsteps_n_matrix` function
- from section 2, but with its loop replaced by a variant of the constant-time divstep from
- section 5, extended to handle *u*, *v*, *q*, *r*:
+ from section 5, modified to operate on *θ* instead of *δ*:
```python
-def divsteps_n_matrix(zeta, f, g):
- """Compute zeta and transition matrix t after N divsteps (multiplied by 2^N)."""
- u, v, q, r = 1, 0, 0, 1 # start with identity matrix
+def divsteps_n_matrix(theta, f, g):
+ """Compute theta and transition matrix t after N divsteps (multiplied by 2^N)."""
+ u, v, q, r = 1<> 63
- # Compute x, y, z as conditionally-negated versions of f, u, v.
- x, y, z = (f ^ c1) - c1, (u ^ c1) - c1, (v ^ c1) - c1
+ c1 = theta >> 63
+ # Compute x, y, z as conditionally complemented versions of f, u, v.
+ x, y, z = f ^ c1, u ^ c1, v ^ c1
c2 = -(g & 1)
- # Conditionally add x, y, z to g, q, r.
- g, q, r = g + (x & c2), q + (y & c2), r + (z & c2)
- c1 &= c2 # reusing c1 here for the earlier c3 variable
- zeta = (zeta ^ c1) - 1 # inlining the unconditional zeta decrement here
+ # Conditionally subtract x, y, z from g, q, r.
+ g, q, r = g - (x & c2), q - (y & c2), r - (z & c2)
+ c3 = ~c1 & c2
+ # Conditionally complement theta, and then increment it by 1.
+ theta = (theta ^ c3) + 1
# Conditionally add g, q, r to f, u, v.
- f, u, v = f + (g & c1), u + (q & c1), v + (r & c1)
- # When shifting g down, don't shift q, r, as we construct a transition matrix multiplied
- # by 2^N. Instead, shift f's coefficients u and v up.
- g, u, v = g >> 1, u << 1, v << 1
- return zeta, (u, v, q, r)
+ f, u, v = f + (g & c3), u + (q & c3), v + (r & c3)
+ # Shift down f, q, r.
+ g, q, r = g >> 1, u >> 1, v >> 1
+ return theta, (u, v, q, r)
```
- The functions to update *f* and *g*, and *d* and *e*, from section 2 and section 4, with the constant-time
@@ -723,15 +745,15 @@ def normalize(sign, v, M):
return v
```
-- And finally the `modinv` function too, adapted to use *ζ* instead of *δ*, and using the fixed
+- And finally the `modinv` function too, adapted to use *θ* instead of *δ*, and using the fixed
iteration count from section 5:
```python
def modinv(M, Mi, x):
"""Compute the modular inverse of x mod M, given Mi=1/M mod 2^N."""
- zeta, f, g, d, e = -1, M, x, 0, 1
+ theta, f, g, d, e = 0, M, x, 0, 1
for _ in range((590 + N - 1) // N):
- zeta, t = divsteps_n_matrix(zeta, f % 2**N, g % 2**N)
+ theta, t = divsteps_n_matrix(theta, f % 2**N, g % 2**N)
f, g = update_fg(f, g, t)
d, e = update_de(d, e, t, M, Mi)
return normalize(f, d, M)
@@ -745,7 +767,7 @@ def modinv(M, Mi, x):
NEGINV16 = [15, 5, 3, 9, 7, 13, 11, 1] # NEGINV16[n//2] = (-n)^-1 mod 16, for odd n
def divsteps_n_matrix_var(eta, f, g):
"""Compute eta and transition matrix t after N divsteps (multiplied by 2^N)."""
- u, v, q, r = 1, 0, 0, 1
+ u, v, q, r = 1, 0, 0, 1 # Start with identity matrix (not scaled; shift during run instead).
i = N
while True:
zeros = min(i, count_trailing_zeros(g))
diff --git a/src/modinv32_impl.h b/src/modinv32_impl.h
index 661c5fc04c..13317ecbfb 100644
--- a/src/modinv32_impl.h
+++ b/src/modinv32_impl.h
@@ -168,68 +168,71 @@ typedef struct {
int32_t u, v, q, r;
} secp256k1_modinv32_trans2x2;
-/* Compute the transition matrix and zeta for 30 divsteps.
+/* Compute the transition matrix and theta for 30 divsteps (where theta = delta-1/2).
*
- * Input: zeta: initial zeta
- * f0: bottom limb of initial f
- * g0: bottom limb of initial g
+ * Input: theta: initial theta
+ * f0: bottom limb of initial f
+ * g0: bottom limb of initial g
* Output: t: transition matrix
- * Return: final zeta
+ * Return: final theta
*
* Implements the divsteps_n_matrix function from the explanation.
*/
-static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
- /* u,v,q,r are the elements of the transformation matrix being built up,
- * starting with the identity matrix. Semantically they are signed integers
- * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
- * permits left shifting (which is UB for negative numbers). The range
- * being inside [-2^31,2^31) means that casting to signed works correctly.
- */
- uint32_t u = 1, v = 0, q = 0, r = 1;
- uint32_t c1, c2, f = f0, g = g0, x, y, z;
+static int32_t secp256k1_modinv32_divsteps_30(int32_t theta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
+ /* The transformation matrix being built up, scaled by 2^30. */
+ int32_t u = (int32_t)1 << 30, v = 0, q = 0, r = (int32_t)1 << 30, y, z;
+ uint32_t c1, c2, f = f0, g = g0, x;
int i;
for (i = 0; i < 30; ++i) {
- VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
- VERIFY_CHECK((u * f0 + v * g0) == f << i);
- VERIFY_CHECK((q * f0 + r * g0) == g << i);
- /* Compute conditional masks for (zeta < 0) and for (g & 1). */
- c1 = zeta >> 31;
+ /* f must always be odd */
+ VERIFY_CHECK((f & 1) == 1);
+ /* Minimum trailing zeros count for matrix elements decreases in each iteration */
+ VERIFY_CHECK(!((u | v | q | r) & (0xFFFFFFFFULL >> (i + 2))));
+ /* Applying the matrix so far to the initial f,g gives current f,g. */
+ VERIFY_CHECK((u >> (30 - i)) * f0 + (v >> (30 - i)) * g0 == f << i);
+ VERIFY_CHECK((q >> (30 - i)) * f0 + (r >> (30 - i)) * g0 == g << i);
+ /* Compute conditional masks for (theta < 0) and for (g & 1). */
+ c1 = theta >> 31;
c2 = -(g & 1);
- /* Compute x,y,z, conditionally negated versions of f,u,v. */
- x = (f ^ c1) - c1;
- y = (u ^ c1) - c1;
- z = (v ^ c1) - c1;
- /* Conditionally add x,y,z to g,q,r. */
- g += x & c2;
- q += y & c2;
- r += z & c2;
- /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */
- c1 &= c2;
- /* Conditionally change zeta into -zeta-2 or zeta-1. */
- zeta = (zeta ^ c1) - 1;
+ /* Compute x,y,z, conditionally complemented versions of f,u,v. */
+ x = f ^ c1;
+ y = u ^ c1;
+ z = v ^ c1;
+ /* Conditionally subtract x,y,z from g,q,r. */
+ g -= x & c2;
+ q -= y & c2;
+ r -= z & c2;
+ /* In what follows, c2 is a condition mask for (theta >= 0) and (g & 1). */
+ c2 &= ~c1;
+ /* Conditionally change theta into -theta or theta+1. */
+ theta = (theta ^ c2) + 1;
/* Conditionally add g,q,r to f,u,v. */
- f += g & c1;
- u += q & c1;
- v += r & c1;
+ f += g & c2;
+ u += q & c2;
+ v += r & c2;
/* Shifts */
g >>= 1;
- u <<= 1;
- v <<= 1;
- /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
- VERIFY_CHECK(zeta >= -601 && zeta <= 601);
+ q >>= 1;
+ r >>= 1;
+ /* Bounds on theta that follow from the bounds on iteration count (max 20*30 divsteps). */
+ VERIFY_CHECK(theta >= -600 && theta <= 600);
}
/* Return data in t and return value. */
- t->u = (int32_t)u;
- t->v = (int32_t)v;
- t->q = (int32_t)q;
- t->r = (int32_t)r;
+ t->u = u;
+ t->v = v;
+ t->q = q;
+ t->r = r;
+ /* Applying the final matrix to the initial f,g gives final f,g. */
+ VERIFY_CHECK(u * f0 + v * g0 == f << 30);
+ VERIFY_CHECK(q * f0 + r * g0 == g << 30);
/* The determinant of t must be a power of two. This guarantees that multiplication with t
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
- * will be divided out again). As each divstep's individual matrix has determinant 2, the
- * aggregate of 30 of them will have determinant 2^30. */
+ * will be divided out again). As each divstep's individual matrix has determinant 2^-1,
+ * the aggregate of 30 of them will have determinant 2^-30. Multiplying with the initial
+ * 2^30*identity (which has determinant 2^60) means the result has determinant 2^30. */
VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
- return zeta;
+ return theta;
}
/* Compute the transition matrix and eta for 30 divsteps (variable time).
@@ -258,7 +261,12 @@ static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint
0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
};
- /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
+ /* u,v,q,r are the elements of the transformation matrix being built up,
+ * starting with the identity matrix. Semantically they are signed integers
+ * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
+ * permits left shifting (which is UB for negative numbers). The range
+ * being inside [-2^31,2^31) means that casting to signed works correctly.
+ */
uint32_t u = 1, v = 0, q = 0, r = 1;
uint32_t f = f0, g = g0, m;
uint16_t w;
@@ -453,19 +461,19 @@ static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_sign
/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
- /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
+ /* Start with d=0, e=1, f=modulus, g=x, theta=0. */
secp256k1_modinv32_signed30 d = {{0}};
secp256k1_modinv32_signed30 e = {{1}};
secp256k1_modinv32_signed30 f = modinfo->modulus;
secp256k1_modinv32_signed30 g = *x;
int i;
- int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
+ int32_t theta = 0; /* theta = delta-1/2; delta is initially 1/2. */
/* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
for (i = 0; i < 20; ++i) {
/* Compute transition matrix and new zeta after 30 divsteps. */
secp256k1_modinv32_trans2x2 t;
- zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
+ theta = secp256k1_modinv32_divsteps_30(theta, f.v[0], g.v[0], &t);
/* Update d,e using that transition matrix. */
secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
/* Update f,g using that transition matrix. */
diff --git a/src/modinv64_impl.h b/src/modinv64_impl.h
index 0743a9c821..8e2de33cb0 100644
--- a/src/modinv64_impl.h
+++ b/src/modinv64_impl.h
@@ -145,72 +145,74 @@ typedef struct {
int64_t u, v, q, r;
} secp256k1_modinv64_trans2x2;
-/* Compute the transition matrix and eta for 59 divsteps (where zeta=-(delta+1/2)).
- * Note that the transformation matrix is scaled by 2^62 and not 2^59.
+/* Compute the transition matrix and theta for 59 divsteps (where theta=delta-1/2)
+ * Although only 59 divsteps are performed, the resulting transformation matrix
+ * is scaled by 2^62 to allow reuse of _update_de_62 and _update_fg_62 between
+ * _modinv64 and _modinv64_var.
*
- * Input: zeta: initial zeta
- * f0: bottom limb of initial f
- * g0: bottom limb of initial g
+ * Input: theta: initial theta
+ * f0: bottom limb of initial f
+ * g0: bottom limb of initial g
* Output: t: transition matrix
- * Return: final zeta
+ * Return: final theta
*
* Implements the divsteps_n_matrix function from the explanation.
*/
-static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
- /* u,v,q,r are the elements of the transformation matrix being built up,
- * starting with the identity matrix times 8 (because the caller expects
- * a result scaled by 2^62). Semantically they are signed integers
- * in range [-2^62,2^62], but here represented as unsigned mod 2^64. This
- * permits left shifting (which is UB for negative numbers). The range
- * being inside [-2^63,2^63) means that casting to signed works correctly.
- */
- uint64_t u = 8, v = 0, q = 0, r = 8;
- uint64_t c1, c2, f = f0, g = g0, x, y, z;
+static int64_t secp256k1_modinv64_divsteps_59(int64_t theta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
+ /* The transformation matrix being built up, scaled by 2^62. */
+ int64_t u = (int64_t)1 << 62, v = 0, q = 0, r = (int64_t)1 << 62, y, z;
+ uint64_t c1, c2, f = f0, g = g0, x;
int i;
for (i = 3; i < 62; ++i) {
- VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
- VERIFY_CHECK((u * f0 + v * g0) == f << i);
- VERIFY_CHECK((q * f0 + r * g0) == g << i);
- /* Compute conditional masks for (zeta < 0) and for (g & 1). */
- c1 = zeta >> 63;
+ /* f must always be odd */
+ VERIFY_CHECK((f & 1) == 1);
+ /* Minimum trailing zeros count for matrix elements decreases in each iteration */
+ VERIFY_CHECK(!((u | v | q | r) & (0xFFFFFFFFFFFFFFFFULL >> (i - 1))));
+ /* Applying the matrix so far to the initial f,g gives current f,g. */
+ VERIFY_CHECK((u >> (62 - i)) * f0 + (v >> (62 - i)) * g0 == f << i);
+ VERIFY_CHECK((q >> (62 - i)) * f0 + (r >> (62 - i)) * g0 == g << i);
+ /* Compute conditional masks for (theta < 0) and for (g & 1). */
+ c1 = theta >> 63;
c2 = -(g & 1);
- /* Compute x,y,z, conditionally negated versions of f,u,v. */
- x = (f ^ c1) - c1;
- y = (u ^ c1) - c1;
- z = (v ^ c1) - c1;
- /* Conditionally add x,y,z to g,q,r. */
- g += x & c2;
- q += y & c2;
- r += z & c2;
- /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */
- c1 &= c2;
- /* Conditionally change zeta into -zeta-2 or zeta-1. */
- zeta = (zeta ^ c1) - 1;
+ /* Compute x,y,z, conditionally complemented versions of f,u,v. */
+ x = f ^ c1;
+ y = u ^ c1;
+ z = v ^ c1;
+ /* Conditionally subtract x,y,z from g,q,r. */
+ g -= x & c2;
+ q -= y & c2;
+ r -= z & c2;
+ /* In what follows, c2 is a condition mask for (theta >= 0) and (g & 1). */
+ c2 &= ~c1;
+ /* Conditionally change theta into -theta or theta+1. */
+ theta = (theta ^ c2) + 1;
/* Conditionally add g,q,r to f,u,v. */
- f += g & c1;
- u += q & c1;
- v += r & c1;
+ f += g & c2;
+ u += q & c2;
+ v += r & c2;
/* Shifts */
g >>= 1;
- u <<= 1;
- v <<= 1;
- /* Bounds on zeta that follow from the bounds on iteration count (max 10*59 divsteps). */
- VERIFY_CHECK(zeta >= -591 && zeta <= 591);
+ q >>= 1;
+ r >>= 1;
+ /* Bounds on theta that follow from the bounds on iteration count (max 10*59 divsteps). */
+ VERIFY_CHECK(theta >= -590 && theta <= 590);
}
/* Return data in t and return value. */
- t->u = (int64_t)u;
- t->v = (int64_t)v;
- t->q = (int64_t)q;
- t->r = (int64_t)r;
+ t->u = u;
+ t->v = v;
+ t->q = q;
+ t->r = r;
+ /* Applying the final matrix to the initial f,g gives final f,g. */
+ VERIFY_CHECK(u * f0 + v * g0 == f << 62);
+ VERIFY_CHECK(q * f0 + r * g0 == g << 62);
/* The determinant of t must be a power of two. This guarantees that multiplication with t
* does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
- * will be divided out again). As each divstep's individual matrix has determinant 2, the
- * aggregate of 59 of them will have determinant 2^59. Multiplying with the initial
- * 8*identity (which has determinant 2^6) means the overall outputs has determinant
- * 2^65. */
+ * will be divided out again). As each divstep's individual matrix has determinant 2^-1,
+ * the aggregate of 59 of them will have determinant 2^-59. Multiplying with the initial
+ * 2^62*identity (which has determinant 2^124) means the result has determinant 2^65. */
VERIFY_CHECK((int128_t)t->u * t->r - (int128_t)t->v * t->q == ((int128_t)1) << 65);
- return zeta;
+ return theta;
}
/* Compute the transition matrix and eta for 62 divsteps (variable time, eta=-delta).
@@ -224,7 +226,12 @@ static int64_t secp256k1_modinv64_divsteps_59(int64_t zeta, uint64_t f0, uint64_
* Implements the divsteps_n_matrix_var function from the explanation.
*/
static int64_t secp256k1_modinv64_divsteps_62_var(int64_t eta, uint64_t f0, uint64_t g0, secp256k1_modinv64_trans2x2 *t) {
- /* Transformation matrix; see comments in secp256k1_modinv64_divsteps_62. */
+ /* u,v,q,r are the elements of the transformation matrix being built up,
+ * starting with the identity matrix. Semantically they are signed integers
+ * in range [-2^62,2^62], but here represented as unsigned mod 2^64. This
+ * permits left shifting (which is UB for negative numbers). The range
+ * being inside [-2^63,2^63) means that casting to signed works correctly.
+ */
uint64_t u = 1, v = 0, q = 0, r = 1;
uint64_t f = f0, g = g0, m;
uint32_t w;
@@ -459,19 +466,19 @@ static void secp256k1_modinv64_update_fg_62_var(int len, secp256k1_modinv64_sign
/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
static void secp256k1_modinv64(secp256k1_modinv64_signed62 *x, const secp256k1_modinv64_modinfo *modinfo) {
- /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
+ /* Start with d=0, e=1, f=modulus, g=x, theta=0. */
secp256k1_modinv64_signed62 d = {{0, 0, 0, 0, 0}};
secp256k1_modinv64_signed62 e = {{1, 0, 0, 0, 0}};
secp256k1_modinv64_signed62 f = modinfo->modulus;
secp256k1_modinv64_signed62 g = *x;
int i;
- int64_t zeta = -1; /* zeta = -(delta+1/2); delta starts at 1/2. */
+ int64_t theta = 0; /* theta = delta-1/2; delta starts at 1/2. */
/* Do 10 iterations of 59 divsteps each = 590 divsteps. This suffices for 256-bit inputs. */
for (i = 0; i < 10; ++i) {
/* Compute transition matrix and new zeta after 59 divsteps. */
secp256k1_modinv64_trans2x2 t;
- zeta = secp256k1_modinv64_divsteps_59(zeta, f.v[0], g.v[0], &t);
+ theta = secp256k1_modinv64_divsteps_59(theta, f.v[0], g.v[0], &t);
/* Update d,e using that transition matrix. */
secp256k1_modinv64_update_de_62(&d, &e, &t, modinfo);
/* Update f,g using that transition matrix. */