WIP: Eliminate the prej array from ecmult_strauss_wnaf.

roconnor-blockstream · roconnor-blockstream · commit e6f090db8517 · 2021-02-26T15:18:50.000-05:00
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
@@ -82,7 +82,7 @@
  *  contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
  *  Prej's Z values are undefined, except for the last value.
  */
-static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, secp256k1_fe *zr, const secp256k1_gej *a) {
+static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_ge *pre_a, secp256k1_fe *zr, secp256k1_gej *a) {
     secp256k1_gej d;
     secp256k1_ge a_ge, d_ge;
     int i;
@@ -100,21 +100,19 @@ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, sec
     d_ge.infinity = 0;
 
     secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
-    prej[0].x = a_ge.x;
-    prej[0].y = a_ge.y;
-    prej[0].z = a->z;
-    prej[0].infinity = 0;
+    pre_a[0].x = a->x = a_ge.x;
+    pre_a[0].y = a->y = a_ge.y;
+    pre_a[0].infinity = 0;
 
     zr[0] = d.z;
     for (i = 1; i < n; i++) {
-        secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
+        secp256k1_gej_add_ge_var(a, a, &d_ge, &zr[i]);
+        pre_a[i].x = a->x;
+        pre_a[i].y = a->y;
+        pre_a[i].infinity = 0;
     }
 
-    /*
-     * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
-     * the final point's z coordinate is actually used though, so just update that.
-     */
-    secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
+    secp256k1_fe_mul(&a->z, &a->z, &d.z);
 }
 
 /** Fill a table 'pre' with precomputed odd multiples of a.
@@ -133,13 +131,14 @@ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, sec
  *  happen once).
  */
 static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *pre, secp256k1_fe *globalz, const secp256k1_gej *a) {
-    secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_gej tmp = *a;
     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
 
     /* Compute the odd multiples in Jacobian form. */
-    secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
+    secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), pre, zr, &tmp);
     /* Bring them to the same Z denominator. */
-    secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
+    *globalz = tmp.z;
+    secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, pre, zr);
 }
 
 static void secp256k1_ecmult_odd_multiples_table_storage_var(const int n, secp256k1_ge_storage *pre, const secp256k1_gej *a) {
@@ -491,18 +490,21 @@ static void secp256k1_ecmult_strauss_wnaf(const secp256k1_ecmult_context *ctx, c
      */
     if (no > 0) {
         /* Compute the odd multiples in Jacobian form. */
-        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]);
+        secp256k1_gej tmp = a[state->ps[0].input_pos];
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a, state->zr, &tmp);
         for (np = 1; np < no; ++np) {
-            secp256k1_gej tmp = a[state->ps[np].input_pos];
+            secp256k1_fe lastz = tmp.z;
+            tmp = a[state->ps[np].input_pos];
 #ifdef VERIFY
-            secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
+            secp256k1_fe_normalize_var(&lastz);
 #endif
-            secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
-            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp);
+            secp256k1_gej_rescale(&tmp, &lastz);
+            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp);
             secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
         }
         /* Bring them to the same Z denominator. */
-        secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr);
+        Z = tmp.z;
+        secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->pre_a, state->zr);
     } else {
         secp256k1_fe_set_int(&Z, 1);
     }
diff --git a/src/group.h b/src/group.h
@@ -73,7 +73,7 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a
  *  that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y
  *  coordinates of the result are stored in r, the common z coordinate is
  *  stored in globalz. */
-static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr);
+static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, const secp256k1_ge *a, const secp256k1_fe *zr);
 
 /** Set a group element (affine) equal to the point at infinity. */
 static void secp256k1_ge_set_infinity(secp256k1_ge *r);
diff --git a/src/group_impl.h b/src/group_impl.h
@@ -156,7 +156,7 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a
     }
 }
 
-static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr) {
+static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, const secp256k1_ge *a, const secp256k1_fe *zr) {
     size_t i = len - 1;
     secp256k1_fe zs;
 
@@ -166,17 +166,20 @@ static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp
         r[i].y = a[i].y;
         /* Ensure all y values are in weak normal form for fast negation of points */
         secp256k1_fe_normalize_weak(&r[i].y);
-        *globalz = a[i].z;
         r[i].infinity = 0;
         zs = zr[i];
 
         /* Work our way backwards, using the z-ratios to scale the x/y values. */
         while (i > 0) {
+            secp256k1_gej tmpa;
             if (i != len - 1) {
                 secp256k1_fe_mul(&zs, &zs, &zr[i]);
             }
             i--;
-            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zs);
+            tmpa.x = a[i].x;
+            tmpa.y = a[i].y;
+            tmpa.infinity = 0;
+            secp256k1_ge_set_gej_zinv(&r[i], &tmpa, &zs);
         }
     }
 }

Original file line number	Diff line number	Diff line change
`@@ -156,7 +156,7 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge r, const secp256k1_gej a`
`156`	`156`	`}`
`157`	`157`	`}`
`158`	`158`
`159`		`-static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge r, secp256k1_fe globalz, const secp256k1_gej a, const secp256k1_fe zr) {`
	`159`	`+static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge r, const secp256k1_ge a, const secp256k1_fe *zr) {`
`160`	`160`	`size_t i = len - 1;`
`161`	`161`	`secp256k1_fe zs;`
`162`	`162`
`@@ -166,17 +166,20 @@ static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp`
`166`	`166`	`r[i].y = a[i].y;`
`167`	`167`	`/* Ensure all y values are in weak normal form for fast negation of points */`
`168`	`168`	`secp256k1_fe_normalize_weak(&r[i].y);`
`169`		`- *globalz = a[i].z;`
`170`	`169`	`r[i].infinity = 0;`
`171`	`170`	`zs = zr[i];`
`172`	`171`
`173`	`172`	`/* Work our way backwards, using the z-ratios to scale the x/y values. */`
`174`	`173`	`while (i > 0) {`
	`174`	`+ secp256k1_gej tmpa;`
`175`	`175`	`if (i != len - 1) {`
`176`	`176`	`secp256k1_fe_mul(&zs, &zs, &zr[i]);`
`177`	`177`	`}`
`178`	`178`	`i--;`
`179`		`- secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zs);`
	`179`	`+ tmpa.x = a[i].x;`
	`180`	`+ tmpa.y = a[i].y;`
	`181`	`+ tmpa.infinity = 0;`
	`182`	`+ secp256k1_ge_set_gej_zinv(&r[i], &tmpa, &zs);`
`180`	`183`	`}`
`181`	`184`	`}`
`182`	`185`	`}`