Merge #1029: Simpler and faster ecdh skew fixup

sipa · sipa · commit a1102b12196e · 2021-12-31T14:44:59.000-05:00
e82144e Fixup skew before global Z fixup (Peter Dettman) 40b624c Add tests for _gej_cmov (Peter Dettman) 8c13a9b ECDH skews by 0 or 1 (Peter Dettman) 1515099 Simpler and faster ecdh skew fixup (Peter Dettman) Pull request description: This PR adds a `_gej_cmov` method, with accompanying tests, and uses it to simplify the skew fixup at the end of `_ecmult_const`. In the existing code, `_wnaf_const` chooses a skew of either 1 or 2, and `_ecmult_const` needs a call to `_ge_set_gej` (which does an expensive field inversion internally) and some overly-complicated conversions to/from `_ge_storage` so that `_ge_storage_cmov` can be used to select what value to add for the fixup. This PR uses a simpler scheme where `_wnaf_const` chooses a skew of 0 or 1 and no longer needs special handling for scalars with value negative one. A new `_gej_cmov` method is used at the end of `_ecmult_const` for const-time optional addition to adjust the final result for the skew. Finally, the skew fixup is moved to before the global-Z adjustment, and the precomputed table entries (for 1P, &#955;(1P)) are used for the skew fixup, saving a field multiply and ensuring the fixup is done on the same isomorphism as the ladder. The resulting `_wnaf_const` and `_ecmult_const` are shorter and simpler, and the ECDH benchmark is around 5% faster (64bit, i7). Edit: Updated description once the final scope was clear. ACKs for top commit: apoelstra: ACK e82144e sipa: ACK e82144e real-or-random: ACK e82144e Tree-SHA512: 10d6770f4ef4f8d0c78abbf58d643f25f5daef68896643af0a3f7f877414e23356724b6f20af2027316a4353a35b8cb0a7851e057a3f6483897df02bf033a8a2
diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
@@ -56,7 +56,6 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
     secp256k1_fe_cmov(&(r)->y, &neg_y, (n) != abs_n); \
 } while(0)
 
-
 /** Convert a number to WNAF notation.
  *  The number becomes represented by sum(2^{wi} * wnaf[i], i=0..WNAF_SIZE(w)+1) - return_val.
  *  It has the following guarantees:
@@ -72,51 +71,35 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
  */
 static int secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar *scalar, int w, int size) {
     int global_sign;
-    int skew = 0;
+    int skew;
     int word = 0;
 
     /* 1 2 3 */
     int u_last;
     int u;
 
     int flip;
-    int bit;
-    secp256k1_scalar s;
-    int not_neg_one;
+    secp256k1_scalar s = *scalar;
 
     VERIFY_CHECK(w > 0);
     VERIFY_CHECK(size > 0);
 
     /* Note that we cannot handle even numbers by negating them to be odd, as is
      * done in other implementations, since if our scalars were specified to have
      * width < 256 for performance reasons, their negations would have width 256
-     * and we'd lose any performance benefit. Instead, we use a technique from
-     * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
-     * or 2 (for odd) to the number we are encoding, returning a skew value indicating
+     * and we'd lose any performance benefit. Instead, we use a variation of a
+     * technique from Section 4.2 of the Okeya/Tagaki paper, which is to add 1 to the
+     * number we are encoding when it is even, returning a skew value indicating
      * this, and having the caller compensate after doing the multiplication.
      *
      * In fact, we _do_ want to negate numbers to minimize their bit-lengths (and in
      * particular, to ensure that the outputs from the endomorphism-split fit into
-     * 128 bits). If we negate, the parity of our number flips, inverting which of
-     * {1, 2} we want to add to the scalar when ensuring that it's odd. Further
-     * complicating things, -1 interacts badly with `secp256k1_scalar_cadd_bit` and
-     * we need to special-case it in this logic. */
-    flip = secp256k1_scalar_is_high(scalar);
-    /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
-    bit = flip ^ !secp256k1_scalar_is_even(scalar);
-    /* We check for negative one, since adding 2 to it will cause an overflow */
-    secp256k1_scalar_negate(&s, scalar);
-    not_neg_one = !secp256k1_scalar_is_one(&s);
-    s = *scalar;
-    secp256k1_scalar_cadd_bit(&s, bit, not_neg_one);
-    /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects
-     * that we added two to it and flipped it. In fact for -1 these operations are
-     * identical. We only flipped, but since skewing is required (in the sense that
-     * the skew must be 1 or 2, never zero) and flipping is not, we need to change
-     * our flags to claim that we only skewed. */
+     * 128 bits). If we negate, the parity of our number flips, affecting whether
+     * we want to add to the scalar to ensure that it's odd. */
+    flip = secp256k1_scalar_is_high(&s);
+    skew = flip ^ secp256k1_scalar_is_even(&s);
+    secp256k1_scalar_cadd_bit(&s, 0, skew);
     global_sign = secp256k1_scalar_cond_negate(&s, flip);
-    global_sign *= not_neg_one * 2 - 1;
-    skew = 1 << bit;
 
     /* 4 */
     u_last = secp256k1_scalar_shr_int(&s, w);
@@ -230,42 +213,22 @@ static void secp256k1_ecmult_const(secp256k1_gej *r, const secp256k1_ge *a, cons
         }
     }
 
-    secp256k1_fe_mul(&r->z, &r->z, &Z);
-
     {
         /* Correct for wNAF skew */
-        secp256k1_ge correction = *a;
-        secp256k1_ge_storage correction_1_stor;
-        secp256k1_ge_storage correction_lam_stor;
-        secp256k1_ge_storage a2_stor;
         secp256k1_gej tmpj;
-        secp256k1_gej_set_ge(&tmpj, &correction);
-        secp256k1_gej_double_var(&tmpj, &tmpj, NULL);
-        secp256k1_ge_set_gej(&correction, &tmpj);
-        secp256k1_ge_to_storage(&correction_1_stor, a);
-        if (size > 128) {
-            secp256k1_ge_to_storage(&correction_lam_stor, a);
-        }
-        secp256k1_ge_to_storage(&a2_stor, &correction);
 
-        /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
-        secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
-        if (size > 128) {
-            secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
-        }
-
-        /* Apply the correction */
-        secp256k1_ge_from_storage(&correction, &correction_1_stor);
-        secp256k1_ge_neg(&correction, &correction);
-        secp256k1_gej_add_ge(r, r, &correction);
+        secp256k1_ge_neg(&tmpa, &pre_a[0]);
+        secp256k1_gej_add_ge(&tmpj, r, &tmpa);
+        secp256k1_gej_cmov(r, &tmpj, skew_1);
 
         if (size > 128) {
-            secp256k1_ge_from_storage(&correction, &correction_lam_stor);
-            secp256k1_ge_neg(&correction, &correction);
-            secp256k1_ge_mul_lambda(&correction, &correction);
-            secp256k1_gej_add_ge(r, r, &correction);
+            secp256k1_ge_neg(&tmpa, &pre_a_lam[0]);
+            secp256k1_gej_add_ge(&tmpj, r, &tmpa);
+            secp256k1_gej_cmov(r, &tmpj, skew_lam);
         }
     }
+
+    secp256k1_fe_mul(&r->z, &r->z, &Z);
 }
 
 #endif /* SECP256K1_ECMULT_CONST_IMPL_H */
diff --git a/src/group.h b/src/group.h
@@ -124,6 +124,9 @@ static void secp256k1_ge_to_storage(secp256k1_ge_storage *r, const secp256k1_ge
 /** Convert a group element back from the storage type. */
 static void secp256k1_ge_from_storage(secp256k1_ge *r, const secp256k1_ge_storage *a);
 
+/** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
+static void secp256k1_gej_cmov(secp256k1_gej *r, const secp256k1_gej *a, int flag);
+
 /** If flag is true, set *r equal to *a; otherwise leave it. Constant-time.  Both *r and *a must be initialized.*/
 static void secp256k1_ge_storage_cmov(secp256k1_ge_storage *r, const secp256k1_ge_storage *a, int flag);
 
diff --git a/src/group_impl.h b/src/group_impl.h
@@ -642,6 +642,14 @@ static void secp256k1_ge_from_storage(secp256k1_ge *r, const secp256k1_ge_storag
     r->infinity = 0;
 }
 
+static SECP256K1_INLINE void secp256k1_gej_cmov(secp256k1_gej *r, const secp256k1_gej *a, int flag) {
+    secp256k1_fe_cmov(&r->x, &a->x, flag);
+    secp256k1_fe_cmov(&r->y, &a->y, flag);
+    secp256k1_fe_cmov(&r->z, &a->z, flag);
+
+    r->infinity ^= (r->infinity ^ a->infinity) & flag;
+}
+
 static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage *r, const secp256k1_ge_storage *a, int flag) {
     secp256k1_fe_storage_cmov(&r->x, &a->x, flag);
     secp256k1_fe_storage_cmov(&r->y, &a->y, flag);
diff --git a/src/tests.c b/src/tests.c
@@ -100,6 +100,12 @@ void random_group_element_jacobian_test(secp256k1_gej *gej, const secp256k1_ge *
     gej->infinity = ge->infinity;
 }
 
+void random_gej_test(secp256k1_gej *gej) {
+    secp256k1_ge ge;
+    random_group_element_test(&ge);
+    random_group_element_jacobian_test(gej, &ge);
+}
+
 void random_scalar_order_test(secp256k1_scalar *num) {
     do {
         unsigned char b32[32];
@@ -3341,6 +3347,37 @@ void run_ge(void) {
     test_intialized_inf();
 }
 
+void test_gej_cmov(const secp256k1_gej *a, const secp256k1_gej *b) {
+    secp256k1_gej t = *a;
+    secp256k1_gej_cmov(&t, b, 0);
+    CHECK(gej_xyz_equals_gej(&t, a));
+    secp256k1_gej_cmov(&t, b, 1);
+    CHECK(gej_xyz_equals_gej(&t, b));
+}
+
+void run_gej(void) {
+    int i;
+    secp256k1_gej a, b;
+
+    /* Tests for secp256k1_gej_cmov */
+    for (i = 0; i < count; i++) {
+        secp256k1_gej_set_infinity(&a);
+        secp256k1_gej_set_infinity(&b);
+        test_gej_cmov(&a, &b);
+
+        random_gej_test(&a);
+        test_gej_cmov(&a, &b);
+        test_gej_cmov(&b, &a);
+
+        b = a;
+        test_gej_cmov(&a, &b);
+
+        random_gej_test(&b);
+        test_gej_cmov(&a, &b);
+        test_gej_cmov(&b, &a);
+    }
+}
+
 void test_ec_combine(void) {
     secp256k1_scalar sum = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
     secp256k1_pubkey data[6];
@@ -4522,7 +4559,7 @@ void test_constant_wnaf(const secp256k1_scalar *number, int w) {
         secp256k1_scalar_add(&x, &x, &t);
     }
     /* Skew num because when encoding numbers as odd we use an offset */
-    secp256k1_scalar_set_int(&scalar_skew, 1 << (skew == 2));
+    secp256k1_scalar_set_int(&scalar_skew, skew);
     secp256k1_scalar_add(&num, &num, &scalar_skew);
     CHECK(secp256k1_scalar_eq(&x, &num));
 }
@@ -6808,6 +6845,7 @@ int main(int argc, char **argv) {
 
     /* group tests */
     run_ge();
+    run_gej();
     run_group_decompress();
 
     /* ecmult tests */