Skip to content

Commit d6b7199

Browse files
committed
Change example to use FMA
1 parent 28f811a commit d6b7199

File tree

3 files changed

+5
-3
lines changed

3 files changed

+5
-3
lines changed

examples/vector_add/main.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ __global__ void my_kernel(int length, const __half* input, double constant, floa
1818

1919
if (i * N < length) {
2020
auto a = kf::read_aligned<N>(input + i * N);
21-
auto b = (a * a) * constant;
21+
auto b = kf::fma(a, a, kf::cast<__half>(constant));
2222
kf::write_aligned<N>(output + i * N, b);
2323
}
2424
}

include/kernel_float/meta.h

+1
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,7 @@ struct enable_if_impl<true, T> {
270270
template<bool C, typename T = void>
271271
using enable_if_t = typename detail::enable_if_impl<C, T>::type;
272272

273+
KERNEL_FLOAT_INLINE
273274
constexpr size_t round_up_to_power_of_two(size_t n) {
274275
size_t result = 1;
275276
while (result < n) {

single_include/kernel_float.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@
1616

1717
//================================================================================
1818
// this file has been auto-generated, do not modify its contents!
19-
// date: 2023-10-24 14:03:49.849025
20-
// git hash: a35b9f600525b2253f9b2e1fb3cb91d382ac2a7d
19+
// date: 2023-10-24 14:14:37.228322
20+
// git hash: 28f811af866d73bef37acd541bac6a95df9a94c3
2121
//================================================================================
2222

2323
#ifndef KERNEL_FLOAT_MACROS_H
@@ -354,6 +354,7 @@ struct enable_if_impl<true, T> {
354354
template<bool C, typename T = void>
355355
using enable_if_t = typename detail::enable_if_impl<C, T>::type;
356356

357+
KERNEL_FLOAT_INLINE
357358
constexpr size_t round_up_to_power_of_two(size_t n) {
358359
size_t result = 1;
359360
while (result < n) {

0 commit comments

Comments
 (0)