Merge #1512: msan: notate more variable assignments from assembly code

real-or-random · real-or-random · commit 4b77fec67a80 · 2024-04-03T19:06:22.000+02:00
f7f0184 msan: notate more variable assignments from assembly code (Cory Fields) a613391 change inconsistent array param to pointer (Cory Fields) Pull request description: This was missed in 31ba404 because older versions of clang did not complain about it. But clang-17, at least, does. The array-as-a-param makes this annoying because `sizeof(l)` is not helpful. I'd be happy to change the size calculation if there are any better suggestions or strong preferences. ACKs for top commit: sipa: utACK f7f0184 real-or-random: ACK f7f0184 tests work fine with clang 17 and `./configure CFLAGS="-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-omit-frame-pointer -g -O1 -fno-optimize-sibling-calls" CC=clang` Tree-SHA512: 8ab22209ef322a10f500b123c82ae5e7141ae1da0e7a890cbf90bd7d2eb11f397db4ccfe15a1666f2f49228585cccbf5bec741effebd1e2c6012cb7ea1689675
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
@@ -681,7 +681,7 @@ static void secp256k1_scalar_reduce_512(secp256k1_scalar *r, const uint64_t *l)
     secp256k1_scalar_reduce(r, c + secp256k1_scalar_check_overflow(r));
 }
 
-static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, const secp256k1_scalar *b) {
+static void secp256k1_scalar_mul_512(uint64_t *l8, const secp256k1_scalar *a, const secp256k1_scalar *b) {
 #ifdef USE_ASM_X86_64
     const uint64_t *pb = b->d;
     __asm__ __volatile__(
@@ -696,7 +696,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
     /* (rax,rdx) = a0 * b0 */
     "movq %%r15, %%rax\n"
     "mulq %%r11\n"
-    /* Extract l0 */
+    /* Extract l8[0] */
     "movq %%rax, 0(%%rsi)\n"
     /* (r8,r9,r10) = (rdx) */
     "movq %%rdx, %%r8\n"
@@ -714,7 +714,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
     "addq %%rax, %%r8\n"
     "adcq %%rdx, %%r9\n"
     "adcq $0, %%r10\n"
-    /* Extract l1 */
+    /* Extract l8[1] */
     "movq %%r8, 8(%%rsi)\n"
     "xorq %%r8, %%r8\n"
     /* (r9,r10,r8) += a0 * b2 */
@@ -735,7 +735,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
     "addq %%rax, %%r9\n"
     "adcq %%rdx, %%r10\n"
     "adcq $0, %%r8\n"
-    /* Extract l2 */
+    /* Extract l8[2] */
     "movq %%r9, 16(%%rsi)\n"
     "xorq %%r9, %%r9\n"
     /* (r10,r8,r9) += a0 * b3 */
@@ -764,7 +764,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
     "addq %%rax, %%r10\n"
     "adcq %%rdx, %%r8\n"
     "adcq $0, %%r9\n"
-    /* Extract l3 */
+    /* Extract l8[3] */
     "movq %%r10, 24(%%rsi)\n"
     "xorq %%r10, %%r10\n"
     /* (r8,r9,r10) += a1 * b3 */
@@ -785,7 +785,7 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
     "addq %%rax, %%r8\n"
     "adcq %%rdx, %%r9\n"
     "adcq $0, %%r10\n"
-    /* Extract l4 */
+    /* Extract l8[4] */
     "movq %%r8, 32(%%rsi)\n"
     "xorq %%r8, %%r8\n"
     /* (r9,r10,r8) += a2 * b3 */
@@ -800,51 +800,54 @@ static void secp256k1_scalar_mul_512(uint64_t l[8], const secp256k1_scalar *a, c
     "addq %%rax, %%r9\n"
     "adcq %%rdx, %%r10\n"
     "adcq $0, %%r8\n"
-    /* Extract l5 */
+    /* Extract l8[5] */
     "movq %%r9, 40(%%rsi)\n"
     /* (r10,r8) += a3 * b3 */
     "movq %%r15, %%rax\n"
     "mulq %%r14\n"
     "addq %%rax, %%r10\n"
     "adcq %%rdx, %%r8\n"
-    /* Extract l6 */
+    /* Extract l8[6] */
     "movq %%r10, 48(%%rsi)\n"
-    /* Extract l7 */
+    /* Extract l8[7] */
     "movq %%r8, 56(%%rsi)\n"
     : "+d"(pb)
-    : "S"(l), "D"(a->d)
+    : "S"(l8), "D"(a->d)
     : "rax", "rbx", "rcx", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", "cc", "memory");
+
+    SECP256K1_CHECKMEM_MSAN_DEFINE(l8, sizeof(*l8) * 8);
+
 #else
     /* 160 bit accumulator. */
     uint64_t c0 = 0, c1 = 0;
     uint32_t c2 = 0;
 
-    /* l[0..7] = a[0..3] * b[0..3]. */
+    /* l8[0..7] = a[0..3] * b[0..3]. */
     muladd_fast(a->d[0], b->d[0]);
-    extract_fast(l[0]);
+    extract_fast(l8[0]);
     muladd(a->d[0], b->d[1]);
     muladd(a->d[1], b->d[0]);
-    extract(l[1]);
+    extract(l8[1]);
     muladd(a->d[0], b->d[2]);
     muladd(a->d[1], b->d[1]);
     muladd(a->d[2], b->d[0]);
-    extract(l[2]);
+    extract(l8[2]);
     muladd(a->d[0], b->d[3]);
     muladd(a->d[1], b->d[2]);
     muladd(a->d[2], b->d[1]);
     muladd(a->d[3], b->d[0]);
-    extract(l[3]);
+    extract(l8[3]);
     muladd(a->d[1], b->d[3]);
     muladd(a->d[2], b->d[2]);
     muladd(a->d[3], b->d[1]);
-    extract(l[4]);
+    extract(l8[4]);
     muladd(a->d[2], b->d[3]);
     muladd(a->d[3], b->d[2]);
-    extract(l[5]);
+    extract(l8[5]);
     muladd_fast(a->d[3], b->d[3]);
-    extract_fast(l[6]);
+    extract_fast(l8[6]);
     VERIFY_CHECK(c1 == 0);
-    l[7] = c0;
+    l8[7] = c0;
 #endif
 }