Skip to content

Commit 40f50d0

Browse files
Merge #1184: Signed-digit based ecmult_const algorithm
355bbdf Add changelog entry for signed-digit ecmult_const algorithm (Pieter Wuille) 21f49d9 Remove unused secp256k1_scalar_shr_int (Pieter Wuille) 115fdc7 Remove unused secp256k1_wnaf_const (Pieter Wuille) aa9f3a3 ecmult_const: add/improve tests (Jonas Nick) 4d16e90 Signed-digit based ecmult_const algorithm (Pieter Wuille) ba523be make SECP256K1_SCALAR_CONST reduce modulo exhaustive group order (Pieter Wuille) 2140da9 Add secp256k1_scalar_half for halving scalars (+ tests/benchmarks). (Pieter Wuille) Pull request description: Using some insights learned from #1058, this replaces the fixed-wnaf ecmult_const algorithm with a signed-digit based one. Conceptually both algorithms are very similar, in that they boil down to summing precomputed odd multiples of the input points. Practically however, the new algorithm is simpler because it's just using scalar operations, rather than relying on wnaf machinery with skew terms to guarantee odd multipliers. The idea is that we can compute $q \cdot A$ as follows: * Let $s = f(q)$, for some function $f()$. * Compute $(s_1, s_2)$ such that $s = s_1 + \lambda s_2$, using `secp256k1_scalar_lambda_split`. * Let $v_1 = s_1 + 2^{128}$ and $v_2 = s_2 + 2^{128}$ (such that the $v_i$ are positive and $n$ bits long). * Computing the result as $$\sum_{i=0}^{n-1} (2v_1[i]-1) 2^i A + \sum_{i=0}^{n-1} (2v_2[i]-1) 2^i \lambda A$$ where $x[i]$ stands for the *i*'th bit of $x$, so summing positive and negative powers of two times $A$, based on the bits of $v_1.$ The comments in `ecmult_const_impl.h` show that if $f(q) = (q + (1+\lambda)(2^n - 2^{129} - 1))/2 \mod n$, the result will equal $q \cdot A$. This last step can be performed in groups of multiple bits at once, by looking up entries in a precomputed table of odd multiples of $A$ and $\lambda A$, and then multiplying by a power of two before proceeding to the next group. The result is slightly faster (I measure ~2% speedup), but significantly simpler as it only uses scalar arithmetic to determine the table lookup values. The speedup is due to the fact that no skew corrections at the end are needed, and less overhead to determine table indices. The precomputed table sizes are also made independent from the `ecmult` ones, after observing that the optimal table size is bigger here (which also gives a small speedup). ACKs for top commit: jonasnick: ACK 355bbdf siv2r: ACK 355bbdf real-or-random: ACK 355bbdf Tree-SHA512: 13db572cb7f9be00bf0931c65fcd8bc8b5545be86a8c8700bd6a79ad9e4d9e5e79e7f763f92ca6a91d9717a355f8162204b0ea821b6ae99d58cb400497ddc656
2 parents 1f1bb78 + 355bbdf commit 40f50d0

9 files changed

+438
-336
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
#### Changed
11+
- The point multiplication algorithm used for ECDH operations (module `ecdh`) was replaced with a slightly faster one.
12+
1013
## [0.4.0] - 2023-09-04
1114

1215
#### Added

src/bench_internal.c

+13-14
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include "field_impl.h"
1515
#include "group_impl.h"
1616
#include "scalar_impl.h"
17-
#include "ecmult_const_impl.h"
1817
#include "ecmult_impl.h"
1918
#include "bench.h"
2019

@@ -98,6 +97,18 @@ static void bench_scalar_negate(void* arg, int iters) {
9897
}
9998
}
10099

100+
static void bench_scalar_half(void* arg, int iters) {
101+
int i;
102+
bench_inv *data = (bench_inv*)arg;
103+
secp256k1_scalar s = data->scalar[0];
104+
105+
for (i = 0; i < iters; i++) {
106+
secp256k1_scalar_half(&s, &s);
107+
}
108+
109+
data->scalar[0] = s;
110+
}
111+
101112
static void bench_scalar_mul(void* arg, int iters) {
102113
int i;
103114
bench_inv *data = (bench_inv*)arg;
@@ -309,18 +320,6 @@ static void bench_ecmult_wnaf(void* arg, int iters) {
309320
CHECK(bits <= 256*iters);
310321
}
311322

312-
static void bench_wnaf_const(void* arg, int iters) {
313-
int i, bits = 0, overflow = 0;
314-
bench_inv *data = (bench_inv*)arg;
315-
316-
for (i = 0; i < iters; i++) {
317-
bits += secp256k1_wnaf_const(data->wnaf, &data->scalar[0], WINDOW_A, 256);
318-
overflow += secp256k1_scalar_add(&data->scalar[0], &data->scalar[0], &data->scalar[1]);
319-
}
320-
CHECK(overflow >= 0);
321-
CHECK(bits <= 256*iters);
322-
}
323-
324323
static void bench_sha256(void* arg, int iters) {
325324
int i;
326325
bench_inv *data = (bench_inv*)arg;
@@ -370,6 +369,7 @@ int main(int argc, char **argv) {
370369
int d = argc == 1; /* default */
371370
print_output_table_header_row();
372371

372+
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "half")) run_benchmark("scalar_half", bench_scalar_half, bench_setup, NULL, &data, 10, iters*100);
373373
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "add")) run_benchmark("scalar_add", bench_scalar_add, bench_setup, NULL, &data, 10, iters*100);
374374
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "negate")) run_benchmark("scalar_negate", bench_scalar_negate, bench_setup, NULL, &data, 10, iters*100);
375375
if (d || have_flag(argc, argv, "scalar") || have_flag(argc, argv, "mul")) run_benchmark("scalar_mul", bench_scalar_mul, bench_setup, NULL, &data, 10, iters*10);
@@ -394,7 +394,6 @@ int main(int argc, char **argv) {
394394
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_zinv_var", bench_group_add_zinv_var, bench_setup, NULL, &data, 10, iters*10);
395395
if (d || have_flag(argc, argv, "group") || have_flag(argc, argv, "to_affine")) run_benchmark("group_to_affine_var", bench_group_to_affine_var, bench_setup, NULL, &data, 10, iters);
396396

397-
if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, iters);
398397
if (d || have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, iters);
399398

400399
if (d || have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, iters);

src/ecmult_const_impl.h

+215-162
Large diffs are not rendered by default.

src/scalar.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ static void secp256k1_scalar_clear(secp256k1_scalar *r);
2525
/** Access bits from a scalar. All requested bits must belong to the same 32-bit limb. */
2626
static unsigned int secp256k1_scalar_get_bits(const secp256k1_scalar *a, unsigned int offset, unsigned int count);
2727

28-
/** Access bits from a scalar. Not constant time. */
28+
/** Access bits from a scalar. Not constant time in offset and count. */
2929
static unsigned int secp256k1_scalar_get_bits_var(const secp256k1_scalar *a, unsigned int offset, unsigned int count);
3030

3131
/** Set a scalar from a big endian byte array. The scalar will be reduced modulo group order `n`.
@@ -54,10 +54,6 @@ static void secp256k1_scalar_cadd_bit(secp256k1_scalar *r, unsigned int bit, int
5454
/** Multiply two scalars (modulo the group order). */
5555
static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a, const secp256k1_scalar *b);
5656

57-
/** Shift a scalar right by some amount strictly between 0 and 16, returning
58-
* the low bits that were shifted off */
59-
static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n);
60-
6157
/** Compute the inverse of a scalar (modulo the group order). */
6258
static void secp256k1_scalar_inverse(secp256k1_scalar *r, const secp256k1_scalar *a);
6359

@@ -67,6 +63,9 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
6763
/** Compute the complement of a scalar (modulo the group order). */
6864
static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar *a);
6965

66+
/** Multiply a scalar with the multiplicative inverse of 2. */
67+
static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a);
68+
7069
/** Check whether a scalar equals zero. */
7170
static int secp256k1_scalar_is_zero(const secp256k1_scalar *a);
7271

src/scalar_4x64_impl.h

+41-16
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,47 @@ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar
199199
secp256k1_scalar_verify(r);
200200
}
201201

202+
static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
203+
/* Writing `/` for field division and `//` for integer division, we compute
204+
*
205+
* a/2 = (a - (a&1))/2 + (a&1)/2
206+
* = (a >> 1) + (a&1 ? 1/2 : 0)
207+
* = (a >> 1) + (a&1 ? n//2+1 : 0),
208+
*
209+
* where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
210+
* For n//2, we have the constants SECP256K1_N_H_0, ...
211+
*
212+
* This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
213+
* - the left summand is: a >> 1 = (a - a&1)/2 = (n-2-1)//2 = (n-3)//2
214+
* - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
215+
* Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
216+
*/
217+
uint64_t mask = -(uint64_t)(a->d[0] & 1U);
218+
secp256k1_uint128 t;
219+
secp256k1_scalar_verify(a);
220+
221+
secp256k1_u128_from_u64(&t, (a->d[0] >> 1) | (a->d[1] << 63));
222+
secp256k1_u128_accum_u64(&t, (SECP256K1_N_H_0 + 1U) & mask);
223+
r->d[0] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
224+
secp256k1_u128_accum_u64(&t, (a->d[1] >> 1) | (a->d[2] << 63));
225+
secp256k1_u128_accum_u64(&t, SECP256K1_N_H_1 & mask);
226+
r->d[1] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
227+
secp256k1_u128_accum_u64(&t, (a->d[2] >> 1) | (a->d[3] << 63));
228+
secp256k1_u128_accum_u64(&t, SECP256K1_N_H_2 & mask);
229+
r->d[2] = secp256k1_u128_to_u64(&t); secp256k1_u128_rshift(&t, 64);
230+
r->d[3] = secp256k1_u128_to_u64(&t) + (a->d[3] >> 1) + (SECP256K1_N_H_3 & mask);
231+
#ifdef VERIFY
232+
/* The line above only computed the bottom 64 bits of r->d[3]; redo the computation
233+
* in full 128 bits to make sure the top 64 bits are indeed zero. */
234+
secp256k1_u128_accum_u64(&t, a->d[3] >> 1);
235+
secp256k1_u128_accum_u64(&t, SECP256K1_N_H_3 & mask);
236+
secp256k1_u128_rshift(&t, 64);
237+
VERIFY_CHECK(secp256k1_u128_to_u64(&t) == 0);
238+
239+
secp256k1_scalar_verify(r);
240+
#endif
241+
}
242+
202243
SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
203244
secp256k1_scalar_verify(a);
204245

@@ -809,22 +850,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
809850
secp256k1_scalar_verify(r);
810851
}
811852

812-
static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
813-
int ret;
814-
secp256k1_scalar_verify(r);
815-
VERIFY_CHECK(n > 0);
816-
VERIFY_CHECK(n < 16);
817-
818-
ret = r->d[0] & ((1 << n) - 1);
819-
r->d[0] = (r->d[0] >> n) + (r->d[1] << (64 - n));
820-
r->d[1] = (r->d[1] >> n) + (r->d[2] << (64 - n));
821-
r->d[2] = (r->d[2] >> n) + (r->d[3] << (64 - n));
822-
r->d[3] = (r->d[3] >> n);
823-
824-
secp256k1_scalar_verify(r);
825-
return ret;
826-
}
827-
828853
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
829854
secp256k1_scalar_verify(k);
830855

src/scalar_8x32_impl.h

+49-20
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,55 @@ static void secp256k1_scalar_negate(secp256k1_scalar *r, const secp256k1_scalar
245245
secp256k1_scalar_verify(r);
246246
}
247247

248+
static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
249+
/* Writing `/` for field division and `//` for integer division, we compute
250+
*
251+
* a/2 = (a - (a&1))/2 + (a&1)/2
252+
* = (a >> 1) + (a&1 ? 1/2 : 0)
253+
* = (a >> 1) + (a&1 ? n//2+1 : 0),
254+
*
255+
* where n is the group order and in the last equality we have used 1/2 = n//2+1 (mod n).
256+
* For n//2, we have the constants SECP256K1_N_H_0, ...
257+
*
258+
* This sum does not overflow. The most extreme case is a = -2, the largest odd scalar. Here:
259+
* - the left summand is: a >> 1 = (a - a&1)/2 = (n-2-1)//2 = (n-3)//2
260+
* - the right summand is: a&1 ? n//2+1 : 0 = n//2+1 = (n-1)//2 + 2//2 = (n+1)//2
261+
* Together they sum to (n-3)//2 + (n+1)//2 = (2n-2)//2 = n - 1, which is less than n.
262+
*/
263+
uint32_t mask = -(uint32_t)(a->d[0] & 1U);
264+
uint64_t t = (uint32_t)((a->d[0] >> 1) | (a->d[1] << 31));
265+
secp256k1_scalar_verify(a);
266+
267+
t += (SECP256K1_N_H_0 + 1U) & mask;
268+
r->d[0] = t; t >>= 32;
269+
t += (uint32_t)((a->d[1] >> 1) | (a->d[2] << 31));
270+
t += SECP256K1_N_H_1 & mask;
271+
r->d[1] = t; t >>= 32;
272+
t += (uint32_t)((a->d[2] >> 1) | (a->d[3] << 31));
273+
t += SECP256K1_N_H_2 & mask;
274+
r->d[2] = t; t >>= 32;
275+
t += (uint32_t)((a->d[3] >> 1) | (a->d[4] << 31));
276+
t += SECP256K1_N_H_3 & mask;
277+
r->d[3] = t; t >>= 32;
278+
t += (uint32_t)((a->d[4] >> 1) | (a->d[5] << 31));
279+
t += SECP256K1_N_H_4 & mask;
280+
r->d[4] = t; t >>= 32;
281+
t += (uint32_t)((a->d[5] >> 1) | (a->d[6] << 31));
282+
t += SECP256K1_N_H_5 & mask;
283+
r->d[5] = t; t >>= 32;
284+
t += (uint32_t)((a->d[6] >> 1) | (a->d[7] << 31));
285+
t += SECP256K1_N_H_6 & mask;
286+
r->d[6] = t; t >>= 32;
287+
r->d[7] = (uint32_t)t + (uint32_t)(a->d[7] >> 1) + (SECP256K1_N_H_7 & mask);
288+
#ifdef VERIFY
289+
/* The line above only computed the bottom 32 bits of r->d[7]. Redo the computation
290+
* in full 64 bits to make sure the top 32 bits are indeed zero. */
291+
VERIFY_CHECK((t + (a->d[7] >> 1) + (SECP256K1_N_H_7 & mask)) >> 32 == 0);
292+
293+
secp256k1_scalar_verify(r);
294+
#endif
295+
}
296+
248297
SECP256K1_INLINE static int secp256k1_scalar_is_one(const secp256k1_scalar *a) {
249298
secp256k1_scalar_verify(a);
250299

@@ -613,26 +662,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
613662
secp256k1_scalar_verify(r);
614663
}
615664

616-
static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
617-
int ret;
618-
secp256k1_scalar_verify(r);
619-
VERIFY_CHECK(n > 0);
620-
VERIFY_CHECK(n < 16);
621-
622-
ret = r->d[0] & ((1 << n) - 1);
623-
r->d[0] = (r->d[0] >> n) + (r->d[1] << (32 - n));
624-
r->d[1] = (r->d[1] >> n) + (r->d[2] << (32 - n));
625-
r->d[2] = (r->d[2] >> n) + (r->d[3] << (32 - n));
626-
r->d[3] = (r->d[3] >> n) + (r->d[4] << (32 - n));
627-
r->d[4] = (r->d[4] >> n) + (r->d[5] << (32 - n));
628-
r->d[5] = (r->d[5] >> n) + (r->d[6] << (32 - n));
629-
r->d[6] = (r->d[6] >> n) + (r->d[7] << (32 - n));
630-
r->d[7] = (r->d[7] >> n);
631-
632-
secp256k1_scalar_verify(r);
633-
return ret;
634-
}
635-
636665
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *k) {
637666
secp256k1_scalar_verify(k);
638667

src/scalar_low.h

+9-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/***********************************************************************
2-
* Copyright (c) 2015 Andrew Poelstra *
2+
* Copyright (c) 2015, 2022 Andrew Poelstra, Pieter Wuille *
33
* Distributed under the MIT software license, see the accompanying *
44
* file COPYING or https://www.opensource.org/licenses/mit-license.php.*
55
***********************************************************************/
@@ -12,6 +12,13 @@
1212
/** A scalar modulo the group order of the secp256k1 curve. */
1313
typedef uint32_t secp256k1_scalar;
1414

15-
#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) (d0)
15+
/* A compile-time constant equal to 2^32 (modulo order). */
16+
#define SCALAR_2P32 ((0xffffffffUL % EXHAUSTIVE_TEST_ORDER) + 1U)
17+
18+
/* Compute a*2^32 + b (modulo order). */
19+
#define SCALAR_HORNER(a, b) (((uint64_t)(a) * SCALAR_2P32 + (b)) % EXHAUSTIVE_TEST_ORDER)
20+
21+
/* Evaluates to the provided 256-bit constant reduced modulo order. */
22+
#define SECP256K1_SCALAR_CONST(d7, d6, d5, d4, d3, d2, d1, d0) SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER(SCALAR_HORNER((d7), (d6)), (d5)), (d4)), (d3)), (d2)), (d1)), (d0))
1623

1724
#endif /* SECP256K1_SCALAR_REPR_H */

src/scalar_low_impl.h

+8-13
Original file line numberDiff line numberDiff line change
@@ -139,19 +139,6 @@ static void secp256k1_scalar_mul(secp256k1_scalar *r, const secp256k1_scalar *a,
139139
secp256k1_scalar_verify(r);
140140
}
141141

142-
static int secp256k1_scalar_shr_int(secp256k1_scalar *r, int n) {
143-
int ret;
144-
secp256k1_scalar_verify(r);
145-
VERIFY_CHECK(n > 0);
146-
VERIFY_CHECK(n < 16);
147-
148-
ret = *r & ((1 << n) - 1);
149-
*r >>= n;
150-
151-
secp256k1_scalar_verify(r);
152-
return ret;
153-
}
154-
155142
static void secp256k1_scalar_split_128(secp256k1_scalar *r1, secp256k1_scalar *r2, const secp256k1_scalar *a) {
156143
secp256k1_scalar_verify(a);
157144

@@ -205,4 +192,12 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar *r, const secp256k1_sc
205192
secp256k1_scalar_verify(r);
206193
}
207194

195+
static void secp256k1_scalar_half(secp256k1_scalar *r, const secp256k1_scalar *a) {
196+
secp256k1_scalar_verify(a);
197+
198+
*r = (*a + ((-(uint32_t)(*a & 1)) & EXHAUSTIVE_TEST_ORDER)) >> 1;
199+
200+
secp256k1_scalar_verify(r);
201+
}
202+
208203
#endif /* SECP256K1_SCALAR_REPR_IMPL_H */

0 commit comments

Comments
 (0)