From 350ceba09167ba828ea9266671db82bbccafa07d Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Fri, 26 Feb 2021 15:18:50 -0500
Subject: [PATCH 1/8] Eliminate the prej array from ecmult_strauss_wnaf.

---
 src/ecmult_impl.h | 71 +++++++++++++++++++++++++----------------------
 src/group.h       | 33 ++++++++++++++++------
 src/group_impl.h  | 15 +++++-----
 3 files changed, 70 insertions(+), 49 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 5bd4d4d23d..de47e7ad1c 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -56,14 +56,23 @@
 
 #define ECMULT_MAX_POINTS_PER_BATCH 5000000
 
-/** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
- *  the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
- *  contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
- *  Prej's Z values are undefined, except for the last value.
+/** Fill a table 'pre_a' with precomputed odd multiples of a.
+ *  pre_a will contain [1*a,3*a,...,(2*n-1)*a], so it needs space for n group elements.
+ *  zr needs space for n field elements.
+ *
+ *  Although pre_a is an array of _ge rather than _gej, it actually represents elements
+ *  in Jacobian coordinates with their z coordinates omitted. The omitted z-coordinates
+ *  can be recovered using z and zr. Using the notation z(b) to represent the omitted
+ *  z coordinate of b:
+ *  - z(pre_a[n-1]) = 'z'
+ *  - z(pre_a[i-1]) = z(pre_a[i]) / zr[i] for n > i > 0
+ *
+ *  Lastly the zr[0] value, which isn't used above, is set so that:
+ *  - a.z = z(pre_a[0]) / zr[0]
  */
-static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, secp256k1_fe *zr, const secp256k1_gej *a) {
-    secp256k1_gej d;
-    secp256k1_ge a_ge, d_ge;
+static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_ge *pre_a, secp256k1_fe *zr, secp256k1_fe *z, const secp256k1_gej *a) {
+    secp256k1_gej d, ai;
+    secp256k1_ge d_ge;
     int i;
 
     VERIFY_CHECK(!a->infinity);
@@ -71,29 +80,27 @@ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, sec
     secp256k1_gej_double_var(&d, a, NULL);
 
     /*
-     * Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate
-     * of 'd', and scale the 1P starting value's x/y coordinates without changing its z.
+     * Perform the additions using an isomorphism that divides the z coordinate by the
+     * constant d.z. The group law is the same in the image, and since 'd' maps to an
+     * affine point, addition becomes more efficient (using mixed coordinates).
+     *
+     *     phi(x, y, z) = (x, y, z/d.z)
+     *           phi(d) = (d.x, d.y, 1)
+     *           phi(a) = (a.x, a.y, a.z/d.z) = (a.x*d.z^2, a.y*d.z^3, a.z)
      */
-    d_ge.x = d.x;
-    d_ge.y = d.y;
-    d_ge.infinity = 0;
-
-    secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
-    prej[0].x = a_ge.x;
-    prej[0].y = a_ge.y;
-    prej[0].z = a->z;
-    prej[0].infinity = 0;
+    secp256k1_ge_set_xy(&d_ge, &d.x, &d.y);
+    secp256k1_ge_set_gej_zinv(&pre_a[0], a, &d.z);
+    secp256k1_gej_set_ge(&ai, &pre_a[0]);
+    ai.z = a->z;
 
     zr[0] = d.z;
     for (i = 1; i < n; i++) {
-        secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
+        secp256k1_gej_add_ge_var(&ai, &ai, &d_ge, &zr[i]);
+        secp256k1_ge_set_xy(&pre_a[i], &ai.x, &ai.y);
     }
 
-    /*
-     * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
-     * the final point's z coordinate is actually used though, so just update that.
-     */
-    secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
+    /* Multiply by d.z to undo the isomorphism that makes 'd' affine. */
+    secp256k1_fe_mul(z, &ai.z, &d.z);
 }
 
 /** Fill a table 'pre' with precomputed odd multiples of a.
@@ -106,13 +113,11 @@ static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej *prej, sec
  *  and use the precomputed table in <ecmult_static_pre_g.h> for G.
  */
 static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *pre, secp256k1_fe *globalz, const secp256k1_gej *a) {
-    secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
 
     /* Compute the odd multiples in Jacobian form. */
-    secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), prej, zr, a);
-    /* Bring them to the same Z denominator. */
-    secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A), pre, globalz, prej, zr);
+    secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), pre, zr, globalz, a);
+    secp256k1_ge_globalz_fixup_table(ECMULT_TABLE_SIZE(WINDOW_A), pre, zr);
 }
 
 /** The following two macro retrieves a particular odd multiple from a table
@@ -265,18 +270,18 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
      */
     if (no > 0) {
         /* Compute the odd multiples in Jacobian form. */
-        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej, state->zr, &a[state->ps[0].input_pos]);
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a, state->zr, &Z, &a[state->ps[0].input_pos]);
         for (np = 1; np < no; ++np) {
             secp256k1_gej tmp = a[state->ps[np].input_pos];
 #ifdef VERIFY
-            secp256k1_fe_normalize_var(&(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
+            secp256k1_fe_normalize_var(&Z);
 #endif
-            secp256k1_gej_rescale(&tmp, &(state->prej[(np - 1) * ECMULT_TABLE_SIZE(WINDOW_A) + ECMULT_TABLE_SIZE(WINDOW_A) - 1].z));
-            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->prej + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &tmp);
+            secp256k1_gej_rescale(&tmp, &Z);
+            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
             secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
         }
         /* Bring them to the same Z denominator. */
-        secp256k1_ge_globalz_set_table_gej(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, &Z, state->prej, state->zr);
+        secp256k1_ge_globalz_fixup_table(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->zr);
     } else {
         secp256k1_fe_set_int(&Z, 1);
     }
diff --git a/src/group.h b/src/group.h
index b9cd334dae..d5cc8a04ee 100644
--- a/src/group.h
+++ b/src/group.h
@@ -9,7 +9,10 @@
 
 #include "field.h"
 
-/** A group element of the secp256k1 curve, in affine coordinates. */
+/** A group element in affine coordinates on the secp256k1 curve,
+ *  or occasionally on an isomorphic curve of the from y^2 = x^3 + 7*t^6.
+ *  Note: For exhastive test mode, sepc256k1 is replaced by a small subgroup of a different curve.
+ */
 typedef struct {
     secp256k1_fe x;
     secp256k1_fe y;
@@ -19,7 +22,9 @@ typedef struct {
 #define SECP256K1_GE_CONST(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) {SECP256K1_FE_CONST((a),(b),(c),(d),(e),(f),(g),(h)), SECP256K1_FE_CONST((i),(j),(k),(l),(m),(n),(o),(p)), 0}
 #define SECP256K1_GE_CONST_INFINITY {SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 0), 1}
 
-/** A group element of the secp256k1 curve, in jacobian coordinates. */
+/** A group element of the secp256k1 curve, in jacobian coordinates.
+ *  Note: For exhastive test mode, sepc256k1 is replaced by a small subgroup of a different curve.
+ */
 typedef struct {
     secp256k1_fe x; /* actual X: x/z^2 */
     secp256k1_fe y; /* actual Y: y/z^3 */
@@ -64,12 +69,24 @@ static void secp256k1_ge_set_gej_var(secp256k1_ge *r, secp256k1_gej *a);
 /** Set a batch of group elements equal to the inputs given in jacobian coordinates */
 static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a, size_t len);
 
-/** Bring a batch inputs given in jacobian coordinates (with known z-ratios) to
- *  the same global z "denominator". zr must contain the known z-ratios such
- *  that mul(a[i].z, zr[i+1]) == a[i+1].z. zr[0] is ignored. The x and y
- *  coordinates of the result are stored in r, the common z coordinate is
- *  stored in globalz. */
-static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr);
+/** Bring a batch of inputs to the same global z "denominator", based on ratios between
+ *  (omitted) z coordinates of adjacent elements.
+ *
+ *  Although the elements a[i] are _ge rather than _gej, they actually represent elements
+ *  in Jacobian coordinates with their z coordinates omitted.
+ *
+ *  Using the notation z(b) to represent the omitted z coordinate of b, the array zr of
+ *  z coordinate ratios must satisfy zr[i] == z(a[i]) / z(a[i-1]) for 0 < 'i' < len.
+ *  The zr[0] value is unused.
+ *
+ *  This function adjusts the coordinates of 'a' in place so that for all 'i', z(a[i]) == z(a[len-1]).
+ *  In other words, the initial value of z(a[len-1]) becomes the global z "denominator". Only the
+ *  a[i].x and a[i].y coordinates are explicitly modified; the adjustment of the omitted z coordinate is
+ *  implicit.
+ *
+ *  The coordinates of the final element a[len-1] are not changed.
+ */
+static void secp256k1_ge_globalz_fixup_table(size_t len, secp256k1_ge *a, const secp256k1_fe *zr);
 
 /** Set a group element (affine) equal to the point at infinity. */
 static void secp256k1_ge_set_infinity(secp256k1_ge *r);
diff --git a/src/group_impl.h b/src/group_impl.h
index bce9fbdad5..24cd2624f7 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -161,27 +161,26 @@ static void secp256k1_ge_set_all_gej_var(secp256k1_ge *r, const secp256k1_gej *a
     }
 }
 
-static void secp256k1_ge_globalz_set_table_gej(size_t len, secp256k1_ge *r, secp256k1_fe *globalz, const secp256k1_gej *a, const secp256k1_fe *zr) {
+static void secp256k1_ge_globalz_fixup_table(size_t len, secp256k1_ge *a, const secp256k1_fe *zr) {
     size_t i = len - 1;
     secp256k1_fe zs;
 
     if (len > 0) {
-        /* The z of the final point gives us the "global Z" for the table. */
-        r[i].x = a[i].x;
-        r[i].y = a[i].y;
         /* Ensure all y values are in weak normal form for fast negation of points */
-        secp256k1_fe_normalize_weak(&r[i].y);
-        *globalz = a[i].z;
-        r[i].infinity = 0;
+        secp256k1_fe_normalize_weak(&a[i].y);
         zs = zr[i];
 
         /* Work our way backwards, using the z-ratios to scale the x/y values. */
         while (i > 0) {
+            secp256k1_gej tmpa;
             if (i != len - 1) {
                 secp256k1_fe_mul(&zs, &zs, &zr[i]);
             }
             i--;
-            secp256k1_ge_set_gej_zinv(&r[i], &a[i], &zs);
+            tmpa.x = a[i].x;
+            tmpa.y = a[i].y;
+            tmpa.infinity = 0;
+            secp256k1_ge_set_gej_zinv(&a[i], &tmpa, &zs);
         }
     }
 }

From ee03b344e7c714485c2b5b6a6a3c9b381ff938ce Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Fri, 26 Feb 2021 16:10:59 -0500
Subject: [PATCH 2/8] Remove the unused prej allocations.

---
 src/ecmult_impl.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index de47e7ad1c..074a91d3e4 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -47,7 +47,7 @@
 
 /* The number of objects allocated on the scratch space for ecmult_multi algorithms */
 #define PIPPENGER_SCRATCH_OBJECTS 6
-#define STRAUSS_SCRATCH_OBJECTS 7
+#define STRAUSS_SCRATCH_OBJECTS 6
 
 #define PIPPENGER_MAX_BUCKET_WINDOW 12
 
@@ -215,7 +215,6 @@ struct secp256k1_strauss_point_state {
 };
 
 struct secp256k1_strauss_state {
-    secp256k1_gej* prej;
     secp256k1_fe* zr;
     secp256k1_ge* pre_a;
     secp256k1_ge* pre_a_lam;
@@ -338,14 +337,12 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
 }
 
 static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
-    secp256k1_gej prej[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_point_state ps[1];
     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_state state;
 
-    state.prej = prej;
     state.zr = zr;
     state.pre_a = pre_a;
     state.pre_a_lam = pre_a_lam;
@@ -354,7 +351,7 @@ static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const sec
 }
 
 static size_t secp256k1_strauss_scratch_size(size_t n_points) {
-    static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_gej) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
+    static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
     return n_points*point_size;
 }
 
@@ -375,13 +372,12 @@ static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callba
      * constant and strauss_scratch_size accordingly. */
     points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej));
     scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
-    state.prej = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_gej));
     state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.pre_a_lam = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
 
-    if (points == NULL || scalars == NULL || state.prej == NULL || state.zr == NULL || state.pre_a == NULL || state.pre_a_lam == NULL || state.ps == NULL) {
+    if (points == NULL || scalars == NULL || state.zr == NULL || state.pre_a == NULL || state.pre_a_lam == NULL || state.ps == NULL) {
         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
         return 0;
     }

From c48a9f00562ad46e537b6a08c33adbf0c5434e2a Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Sat, 27 Feb 2021 07:57:16 -0500
Subject: [PATCH 3/8] Eliminate the pre_a_lam array from ecmult_strauss_wnaf.

---
 src/ecmult_impl.h | 37 ++++++++++++++++++++++++++-----------
 src/field_impl.h  |  4 ++++
 src/group_impl.h  |  6 +-----
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 074a91d3e4..6de0872ab3 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -134,6 +134,20 @@ static void secp256k1_ecmult_odd_multiples_table_globalz_windowa(secp256k1_ge *p
     } \
 } while(0)
 
+#define ECMULT_TABLE_GET_GE_LAMBDA(r,pre,aux,n,w) do { \
+    VERIFY_CHECK(((n) & 1) == 1); \
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
+    if ((n) > 0) { \
+        (r)->x = (aux)[((n)-1)/2]; \
+        (r)->y = (pre)[((n)-1)/2].y; \
+    } else { \
+        (r)->x = (aux)[(-(n)-1)/2]; \
+        secp256k1_fe_negate(&((r)->y), &((pre)[(-(n)-1)/2].y), 1); \
+    } \
+    (r)->infinity = 0; \
+} while(0)
+
 #define ECMULT_TABLE_GET_GE_STORAGE(r,pre,n,w) do { \
     VERIFY_CHECK(((n) & 1) == 1); \
     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
@@ -215,7 +229,8 @@ struct secp256k1_strauss_point_state {
 };
 
 struct secp256k1_strauss_state {
-    secp256k1_fe* zr;
+    /* aux is used to hold z-ratios, and then used to hold pre_a[i].x * BETA values. */
+    secp256k1_fe* aux;
     secp256k1_ge* pre_a;
     secp256k1_ge* pre_a_lam;
     struct secp256k1_strauss_point_state* ps;
@@ -269,25 +284,25 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
      */
     if (no > 0) {
         /* Compute the odd multiples in Jacobian form. */
-        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a, state->zr, &Z, &a[state->ps[0].input_pos]);
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a, state->aux, &Z, &a[state->ps[0].input_pos]);
         for (np = 1; np < no; ++np) {
             secp256k1_gej tmp = a[state->ps[np].input_pos];
 #ifdef VERIFY
             secp256k1_fe_normalize_var(&Z);
 #endif
             secp256k1_gej_rescale(&tmp, &Z);
-            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
-            secp256k1_fe_mul(state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), state->zr + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
+            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
+            secp256k1_fe_mul(state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
         }
         /* Bring them to the same Z denominator. */
-        secp256k1_ge_globalz_fixup_table(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->zr);
+        secp256k1_ge_globalz_fixup_table(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->aux);
     } else {
         secp256k1_fe_set_int(&Z, 1);
     }
 
     for (np = 0; np < no; ++np) {
         for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
-            secp256k1_ge_mul_lambda(&state->pre_a_lam[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i]);
+            secp256k1_fe_mul(&state->aux[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i].x, &secp256k1_const_beta);
         }
     }
 
@@ -317,7 +332,7 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
             }
             if (i < state->ps[np].bits_na_lam && (n = state->ps[np].wnaf_na_lam[i])) {
-                ECMULT_TABLE_GET_GE(&tmpa, state->pre_a_lam + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
+                ECMULT_TABLE_GET_GE_LAMBDA(&tmpa, state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), n, WINDOW_A);
                 secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
             }
         }
@@ -337,13 +352,13 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
 }
 
 static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
-    secp256k1_fe zr[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_fe aux[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_point_state ps[1];
     secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_state state;
 
-    state.zr = zr;
+    state.aux = aux;
     state.pre_a = pre_a;
     state.pre_a_lam = pre_a_lam;
     state.ps = ps;
@@ -372,12 +387,12 @@ static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callba
      * constant and strauss_scratch_size accordingly. */
     points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej));
     scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
-    state.zr = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
+    state.aux = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.pre_a_lam = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
 
-    if (points == NULL || scalars == NULL || state.zr == NULL || state.pre_a == NULL || state.pre_a_lam == NULL || state.ps == NULL) {
+    if (points == NULL || scalars == NULL || state.aux == NULL || state.pre_a == NULL || state.pre_a_lam == NULL || state.ps == NULL) {
         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
         return 0;
     }
diff --git a/src/field_impl.h b/src/field_impl.h
index 374284a1f4..17807c3168 100644
--- a/src/field_impl.h
+++ b/src/field_impl.h
@@ -136,5 +136,9 @@ static int secp256k1_fe_sqrt(secp256k1_fe *r, const secp256k1_fe *a) {
 }
 
 static const secp256k1_fe secp256k1_fe_one = SECP256K1_FE_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+static const secp256k1_fe secp256k1_const_beta = SECP256K1_FE_CONST(
+    0x7ae96a2bul, 0x657c0710ul, 0x6e64479eul, 0xac3434e9ul,
+    0x9cf04975ul, 0x12f58995ul, 0xc1396c28ul, 0x719501eeul
+);
 
 #endif /* SECP256K1_FIELD_IMPL_H */
diff --git a/src/group_impl.h b/src/group_impl.h
index 24cd2624f7..f157f5de4f 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -647,12 +647,8 @@ static SECP256K1_INLINE void secp256k1_ge_storage_cmov(secp256k1_ge_storage *r,
 }
 
 static void secp256k1_ge_mul_lambda(secp256k1_ge *r, const secp256k1_ge *a) {
-    static const secp256k1_fe beta = SECP256K1_FE_CONST(
-        0x7ae96a2bul, 0x657c0710ul, 0x6e64479eul, 0xac3434e9ul,
-        0x9cf04975ul, 0x12f58995ul, 0xc1396c28ul, 0x719501eeul
-    );
     *r = *a;
-    secp256k1_fe_mul(&r->x, &r->x, &beta);
+    secp256k1_fe_mul(&r->x, &r->x, &secp256k1_const_beta);
 }
 
 static int secp256k1_ge_is_in_correct_subgroup(const secp256k1_ge* ge) {

From 508f659a625b5cc812ae4eaa8137b68b1bba20ab Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Sat, 27 Feb 2021 08:00:55 -0500
Subject: [PATCH 4/8] Remove the unused pre_a_lam allocations.

---
 src/ecmult_impl.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 6de0872ab3..0e806f41a3 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -47,7 +47,7 @@
 
 /* The number of objects allocated on the scratch space for ecmult_multi algorithms */
 #define PIPPENGER_SCRATCH_OBJECTS 6
-#define STRAUSS_SCRATCH_OBJECTS 6
+#define STRAUSS_SCRATCH_OBJECTS 5
 
 #define PIPPENGER_MAX_BUCKET_WINDOW 12
 
@@ -232,7 +232,6 @@ struct secp256k1_strauss_state {
     /* aux is used to hold z-ratios, and then used to hold pre_a[i].x * BETA values. */
     secp256k1_fe* aux;
     secp256k1_ge* pre_a;
-    secp256k1_ge* pre_a_lam;
     struct secp256k1_strauss_point_state* ps;
 };
 
@@ -355,18 +354,16 @@ static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const sec
     secp256k1_fe aux[ECMULT_TABLE_SIZE(WINDOW_A)];
     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_point_state ps[1];
-    secp256k1_ge pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_state state;
 
     state.aux = aux;
     state.pre_a = pre_a;
-    state.pre_a_lam = pre_a_lam;
     state.ps = ps;
     secp256k1_ecmult_strauss_wnaf(&state, r, 1, a, na, ng);
 }
 
 static size_t secp256k1_strauss_scratch_size(size_t n_points) {
-    static const size_t point_size = (2 * sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
+    static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
     return n_points*point_size;
 }
 
@@ -389,10 +386,9 @@ static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callba
     scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
     state.aux = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
-    state.pre_a_lam = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
 
-    if (points == NULL || scalars == NULL || state.aux == NULL || state.pre_a == NULL || state.pre_a_lam == NULL || state.ps == NULL) {
+    if (points == NULL || scalars == NULL || state.aux == NULL || state.pre_a == NULL || state.ps == NULL) {
         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
         return 0;
     }

From 80e91931f4ad7fba4c9eccb8193392c74738de1a Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Sat, 27 Feb 2021 08:36:22 -0500
Subject: [PATCH 5/8] Eliminate na_1 and na_lam state fields from
 ecmult_strauss_wnaf.

---
 src/ecmult_impl.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 0e806f41a3..c0aab33a56 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -220,7 +220,6 @@ static int secp256k1_ecmult_wnaf(int *wnaf, int len, const secp256k1_scalar *a,
 }
 
 struct secp256k1_strauss_point_state {
-    secp256k1_scalar na_1, na_lam;
     int wnaf_na_1[129];
     int wnaf_na_lam[129];
     int bits_na_1;
@@ -250,16 +249,17 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
     size_t no = 0;
 
     for (np = 0; np < num; ++np) {
+        secp256k1_scalar na_1, na_lam;
         if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
             continue;
         }
         state->ps[no].input_pos = np;
         /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
-        secp256k1_scalar_split_lambda(&state->ps[no].na_1, &state->ps[no].na_lam, &na[np]);
+        secp256k1_scalar_split_lambda(&na_1, &na_lam, &na[np]);
 
         /* build wnaf representation for na_1 and na_lam. */
-        state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   129, &state->ps[no].na_1,   WINDOW_A);
-        state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 129, &state->ps[no].na_lam, WINDOW_A);
+        state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   129, &na_1,   WINDOW_A);
+        state->ps[no].bits_na_lam = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_lam, 129, &na_lam, WINDOW_A);
         VERIFY_CHECK(state->ps[no].bits_na_1 <= 129);
         VERIFY_CHECK(state->ps[no].bits_na_lam <= 129);
         if (state->ps[no].bits_na_1 > bits) {

From 5aea2e854e3db0d60416b3118eb5949b315103ef Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Sat, 27 Feb 2021 14:07:45 -0500
Subject: [PATCH 6/8] Eliminate input_pos state field from ecmult_strauss_wnaf.

---
 src/ecmult_impl.h | 46 +++++++++++++++++++++-------------------------
 1 file changed, 21 insertions(+), 25 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index c0aab33a56..854ac6db9d 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -224,7 +224,6 @@ struct secp256k1_strauss_point_state {
     int wnaf_na_lam[129];
     int bits_na_1;
     int bits_na_lam;
-    size_t input_pos;
 };
 
 struct secp256k1_strauss_state {
@@ -248,12 +247,13 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
     size_t np;
     size_t no = 0;
 
+    secp256k1_fe_set_int(&Z, 1);
     for (np = 0; np < num; ++np) {
+        secp256k1_gej tmp;
         secp256k1_scalar na_1, na_lam;
         if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
             continue;
         }
-        state->ps[no].input_pos = np;
         /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
         secp256k1_scalar_split_lambda(&na_1, &na_lam, &na[np]);
 
@@ -268,37 +268,33 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
         if (state->ps[no].bits_na_lam > bits) {
             bits = state->ps[no].bits_na_lam;
         }
-        ++no;
-    }
 
-    /* Calculate odd multiples of a.
-     * All multiples are brought to the same Z 'denominator', which is stored
-     * in Z. Due to secp256k1' isomorphism we can do all operations pretending
-     * that the Z coordinate was 1, use affine addition formulae, and correct
-     * the Z coordinate of the result once at the end.
-     * The exception is the precomputed G table points, which are actually
-     * affine. Compared to the base used for other points, they have a Z ratio
-     * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
-     * isomorphism to efficiently add with a known Z inverse.
-     */
-    if (no > 0) {
-        /* Compute the odd multiples in Jacobian form. */
-        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a, state->aux, &Z, &a[state->ps[0].input_pos]);
-        for (np = 1; np < no; ++np) {
-            secp256k1_gej tmp = a[state->ps[np].input_pos];
+        /* Calculate odd multiples of a.
+         * All multiples are brought to the same Z 'denominator', which is stored
+         * in Z. Due to secp256k1' isomorphism we can do all operations pretending
+         * that the Z coordinate was 1, use affine addition formulae, and correct
+         * the Z coordinate of the result once at the end.
+         * The exception is the precomputed G table points, which are actually
+         * affine. Compared to the base used for other points, they have a Z ratio
+         * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
+         * isomorphism to efficiently add with a known Z inverse.
+         */
+        tmp = a[np];
+        if (no) {
 #ifdef VERIFY
             secp256k1_fe_normalize_var(&Z);
 #endif
             secp256k1_gej_rescale(&tmp, &Z);
-            secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + np * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
-            secp256k1_fe_mul(state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + np * ECMULT_TABLE_SIZE(WINDOW_A), &(a[state->ps[np].input_pos].z));
         }
-        /* Bring them to the same Z denominator. */
-        secp256k1_ge_globalz_fixup_table(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->aux);
-    } else {
-        secp256k1_fe_set_int(&Z, 1);
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
+        if (no) secp256k1_fe_mul(state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &(a[np].z));
+
+        ++no;
     }
 
+    /* Bring them to the same Z denominator. */
+    secp256k1_ge_globalz_fixup_table(ECMULT_TABLE_SIZE(WINDOW_A) * no, state->pre_a, state->aux);
+
     for (np = 0; np < no; ++np) {
         for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
             secp256k1_fe_mul(&state->aux[np * ECMULT_TABLE_SIZE(WINDOW_A) + i], &state->pre_a[np * ECMULT_TABLE_SIZE(WINDOW_A) + i].x, &secp256k1_const_beta);

From 8242379f222636aa5a189ab5b392ab741d611725 Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Sat, 27 Feb 2021 18:53:51 -0500
Subject: [PATCH 7/8] Eliminate the use of points and scalars from
 secp256k1_ecmult_strauss_batch. We have secp256k1_ecmult_strauss_wnaf invoke
 the callback itself.

---
 src/ecmult_impl.h | 79 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 60 insertions(+), 19 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 854ac6db9d..4c49316b91 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -233,7 +233,9 @@ struct secp256k1_strauss_state {
     struct secp256k1_strauss_point_state* ps;
 };
 
-static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *state, secp256k1_gej *r, size_t num, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
+typedef int (secp256k1_ecmult_strauss_multi_callback)(secp256k1_scalar *sc, secp256k1_gej *pt, size_t idx, void *data);
+
+static int secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *state, secp256k1_gej *r, size_t num, secp256k1_ecmult_strauss_multi_callback cb, void *cbdata, size_t cb_offset, const secp256k1_scalar *ng) {
     secp256k1_ge tmpa;
     secp256k1_fe Z;
     /* Split G factors. */
@@ -249,13 +251,17 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
 
     secp256k1_fe_set_int(&Z, 1);
     for (np = 0; np < num; ++np) {
-        secp256k1_gej tmp;
-        secp256k1_scalar na_1, na_lam;
-        if (secp256k1_scalar_is_zero(&na[np]) || secp256k1_gej_is_infinity(&a[np])) {
+        secp256k1_gej a;
+        secp256k1_fe az;
+        secp256k1_scalar na, na_1, na_lam;
+
+        if (!cb(&na, &a, np + cb_offset, cbdata)) return 0;
+        if (secp256k1_scalar_is_zero(&na) || secp256k1_gej_is_infinity(&a)) {
             continue;
         }
+
         /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
-        secp256k1_scalar_split_lambda(&na_1, &na_lam, &na[np]);
+        secp256k1_scalar_split_lambda(&na_1, &na_lam, &na);
 
         /* build wnaf representation for na_1 and na_lam. */
         state->ps[no].bits_na_1   = secp256k1_ecmult_wnaf(state->ps[no].wnaf_na_1,   129, &na_1,   WINDOW_A);
@@ -279,15 +285,15 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
          * of 1/Z, so we can use secp256k1_gej_add_zinv_var, which uses the same
          * isomorphism to efficiently add with a known Z inverse.
          */
-        tmp = a[np];
+        az = a.z;
         if (no) {
 #ifdef VERIFY
             secp256k1_fe_normalize_var(&Z);
 #endif
-            secp256k1_gej_rescale(&tmp, &Z);
+            secp256k1_gej_rescale(&a, &Z);
         }
-        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &tmp);
-        if (no) secp256k1_fe_mul(state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &(a[np].z));
+        secp256k1_ecmult_odd_multiples_table(ECMULT_TABLE_SIZE(WINDOW_A), state->pre_a + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &Z, &a);
+        if (no) secp256k1_fe_mul(state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), state->aux + no * ECMULT_TABLE_SIZE(WINDOW_A), &az);
 
         ++no;
     }
@@ -344,6 +350,24 @@ static void secp256k1_ecmult_strauss_wnaf(const struct secp256k1_strauss_state *
     if (!r->infinity) {
         secp256k1_fe_mul(&r->z, &r->z, &Z);
     }
+
+    return 1;
+}
+
+struct secp256k1_ecmult_array_cb_data {
+    const secp256k1_scalar *na;
+    const secp256k1_gej *a;
+};
+
+static int secp256k1_ecmult_array_cb(secp256k1_scalar *sc, secp256k1_gej *pt, size_t idx, void *data) {
+    struct secp256k1_ecmult_array_cb_data *array_data = data;
+    *sc = array_data->na[idx];
+    if (array_data->a) {
+        *pt = array_data->a[idx];
+        return 1;
+    } else {
+        return secp256k1_scalar_is_zero(sc);
+    }
 }
 
 static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const secp256k1_scalar *na, const secp256k1_scalar *ng) {
@@ -351,11 +375,14 @@ static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const sec
     secp256k1_ge pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
     struct secp256k1_strauss_point_state ps[1];
     struct secp256k1_strauss_state state;
+    struct secp256k1_ecmult_array_cb_data data;
 
     state.aux = aux;
     state.pre_a = pre_a;
     state.ps = ps;
-    secp256k1_ecmult_strauss_wnaf(&state, r, 1, a, na, ng);
+    data.na = na;
+    data.a = a;
+    secp256k1_ecmult_strauss_wnaf(&state, r, 1, &secp256k1_ecmult_array_cb, &data, 0, ng);
 }
 
 static size_t secp256k1_strauss_scratch_size(size_t n_points) {
@@ -363,11 +390,28 @@ static size_t secp256k1_strauss_scratch_size(size_t n_points) {
     return n_points*point_size;
 }
 
+struct secp256k1_ecmult_adaptor_cb_data {
+    secp256k1_ecmult_multi_callback *cb;
+    void *data;
+};
+
+static int secp256k1_ecmult_adaptor_cb(secp256k1_scalar *sc, secp256k1_gej *pt, size_t idx, void *data) {
+    secp256k1_ge tmp;
+    struct secp256k1_ecmult_adaptor_cb_data *adaptor_data = data;
+    int result = adaptor_data->cb(sc, &tmp, idx, adaptor_data->data);
+
+    if (result) {
+        secp256k1_gej_set_ge(pt, &tmp);
+    }
+
+    return result;
+}
+
 static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
     secp256k1_gej* points;
     secp256k1_scalar* scalars;
+    struct secp256k1_ecmult_adaptor_cb_data adaptor_data;
     struct secp256k1_strauss_state state;
-    size_t i;
     const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch);
 
     secp256k1_gej_set_infinity(r);
@@ -380,6 +424,8 @@ static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callba
      * constant and strauss_scratch_size accordingly. */
     points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej));
     scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
+    adaptor_data.cb = cb;
+    adaptor_data.data = cbdata;
     state.aux = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
@@ -389,15 +435,10 @@ static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callba
         return 0;
     }
 
-    for (i = 0; i < n_points; i++) {
-        secp256k1_ge point;
-        if (!cb(&scalars[i], &point, i+cb_offset, cbdata)) {
-            secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
-            return 0;
-        }
-        secp256k1_gej_set_ge(&points[i], &point);
+    if (!secp256k1_ecmult_strauss_wnaf(&state, r, n_points, &secp256k1_ecmult_adaptor_cb, &adaptor_data, cb_offset, inp_g_sc)) {
+        secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
+        return 0;
     }
-    secp256k1_ecmult_strauss_wnaf(&state, r, n_points, points, scalars, inp_g_sc);
     secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
     return 1;
 }

From 29a1b7e60bd5335476e0437421ebb00efd47c7c2 Mon Sep 17 00:00:00 2001
From: Russell O'Connor <roconnor@blockstream.io>
Date: Sat, 27 Feb 2021 19:00:00 -0500
Subject: [PATCH 8/8] Remove the unused points and scalars allocation from
 secp256k1_ecmult_strauss_batch.

---
 src/ecmult_impl.h | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 4c49316b91..dad182d966 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -47,7 +47,7 @@
 
 /* The number of objects allocated on the scratch space for ecmult_multi algorithms */
 #define PIPPENGER_SCRATCH_OBJECTS 6
-#define STRAUSS_SCRATCH_OBJECTS 5
+#define STRAUSS_SCRATCH_OBJECTS 3
 
 #define PIPPENGER_MAX_BUCKET_WINDOW 12
 
@@ -386,7 +386,7 @@ static void secp256k1_ecmult(secp256k1_gej *r, const secp256k1_gej *a, const sec
 }
 
 static size_t secp256k1_strauss_scratch_size(size_t n_points) {
-    static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state) + sizeof(secp256k1_gej) + sizeof(secp256k1_scalar);
+    static const size_t point_size = (sizeof(secp256k1_ge) + sizeof(secp256k1_fe)) * ECMULT_TABLE_SIZE(WINDOW_A) + sizeof(struct secp256k1_strauss_point_state);
     return n_points*point_size;
 }
 
@@ -408,8 +408,6 @@ static int secp256k1_ecmult_adaptor_cb(secp256k1_scalar *sc, secp256k1_gej *pt,
 }
 
 static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callback, secp256k1_scratch *scratch, secp256k1_gej *r, const secp256k1_scalar *inp_g_sc, secp256k1_ecmult_multi_callback cb, void *cbdata, size_t n_points, size_t cb_offset) {
-    secp256k1_gej* points;
-    secp256k1_scalar* scalars;
     struct secp256k1_ecmult_adaptor_cb_data adaptor_data;
     struct secp256k1_strauss_state state;
     const size_t scratch_checkpoint = secp256k1_scratch_checkpoint(error_callback, scratch);
@@ -422,20 +420,14 @@ static int secp256k1_ecmult_strauss_batch(const secp256k1_callback* error_callba
     /* We allocate STRAUSS_SCRATCH_OBJECTS objects on the scratch space. If these
      * allocations change, make sure to update the STRAUSS_SCRATCH_OBJECTS
      * constant and strauss_scratch_size accordingly. */
-    points = (secp256k1_gej*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_gej));
-    scalars = (secp256k1_scalar*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(secp256k1_scalar));
     adaptor_data.cb = cb;
     adaptor_data.data = cbdata;
     state.aux = (secp256k1_fe*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_fe));
     state.pre_a = (secp256k1_ge*)secp256k1_scratch_alloc(error_callback, scratch, n_points * ECMULT_TABLE_SIZE(WINDOW_A) * sizeof(secp256k1_ge));
     state.ps = (struct secp256k1_strauss_point_state*)secp256k1_scratch_alloc(error_callback, scratch, n_points * sizeof(struct secp256k1_strauss_point_state));
 
-    if (points == NULL || scalars == NULL || state.aux == NULL || state.pre_a == NULL || state.ps == NULL) {
-        secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
-        return 0;
-    }
-
-    if (!secp256k1_ecmult_strauss_wnaf(&state, r, n_points, &secp256k1_ecmult_adaptor_cb, &adaptor_data, cb_offset, inp_g_sc)) {
+    if (state.aux == NULL || state.pre_a == NULL || state.ps == NULL ||
+            !secp256k1_ecmult_strauss_wnaf(&state, r, n_points, &secp256k1_ecmult_adaptor_cb, &adaptor_data, cb_offset, inp_g_sc)) {
         secp256k1_scratch_apply_checkpoint(error_callback, scratch, scratch_checkpoint);
         return 0;
     }