@@ -62,6 +62,40 @@ class CodeGen_OpenCL_Dev : public CodeGen_GPU_Dev {
62
62
: CodeGen_GPU_C(s, t) {
63
63
integer_suffix_style = IntegerSuffixStyle::OpenCL;
64
64
vector_declaration_style = VectorDeclarationStyle::OpenCLSyntax;
65
+
66
+ #define alias (x, y ) \
67
+ extern_function_name_map[x " _f16" ] = y; \
68
+ extern_function_name_map[x " _f32" ] = y; \
69
+ extern_function_name_map[x " _f64" ] = y
70
+ alias (" sqrt" , " sqrt" );
71
+ alias (" sin" , " sin" );
72
+ alias (" cos" , " cos" );
73
+ alias (" exp" , " exp" );
74
+ alias (" log" , " log" );
75
+ alias (" abs" , " fabs" ); // f-prefix! (although it's handled as an intrinsic).
76
+ alias (" floor" , " floor" );
77
+ alias (" ceil" , " ceil" );
78
+ alias (" trunc" , " trunc" );
79
+ alias (" pow" , " pow" );
80
+ alias (" asin" , " asin" );
81
+ alias (" acos" , " acos" );
82
+ alias (" tan" , " tan" );
83
+ alias (" atan" , " atan" );
84
+ alias (" atan2" , " atan2" );
85
+ alias (" sinh" , " sinh" );
86
+ alias (" asinh" , " asinh" );
87
+ alias (" cosh" , " cosh" );
88
+ alias (" acosh" , " acosh" );
89
+ alias (" tanh" , " tanh" );
90
+ alias (" atanh" , " atanh" );
91
+
92
+ alias (" is_nan" , " isnan" );
93
+ alias (" is_inf" , " isinf" );
94
+ alias (" is_finite" , " isfinite" );
95
+
96
+ alias (" fast_inverse" , " native_recip" );
97
+ alias (" fast_inverse_sqrt" , " native_rsqrt" );
98
+ #undef alias
65
99
}
66
100
void add_kernel (Stmt stmt,
67
101
const std::string &name,
@@ -300,16 +334,6 @@ void CodeGen_OpenCL_Dev::CodeGen_OpenCL_C::visit(const Call *op) {
300
334
ostringstream rhs;
301
335
rhs << " select(" << false_val << " , " << true_val << " , " << cond << " )" ;
302
336
print_assignment (op->type , rhs.str ());
303
- } else if (op->is_intrinsic (Call::abs )) {
304
- if (op->type .is_float ()) {
305
- ostringstream rhs;
306
- rhs << " abs_f" << op->type .bits () << " (" << print_expr (op->args [0 ]) << " )" ;
307
- print_assignment (op->type , rhs.str ());
308
- } else {
309
- ostringstream rhs;
310
- rhs << " abs(" << print_expr (op->args [0 ]) << " )" ;
311
- print_assignment (op->type , rhs.str ());
312
- }
313
337
} else if (op->is_intrinsic (Call::absd)) {
314
338
ostringstream rhs;
315
339
rhs << " abs_diff(" << print_expr (op->args [0 ]) << " , " << print_expr (op->args [1 ]) << " )" ;
@@ -466,13 +490,7 @@ void CodeGen_OpenCL_Dev::CodeGen_OpenCL_C::visit(const Call *op) {
466
490
467
491
string CodeGen_OpenCL_Dev::CodeGen_OpenCL_C::print_extern_call (const Call *op) {
468
492
internal_assert (!function_takes_user_context (op->name )) << op->name ;
469
- vector<string> args (op->args .size ());
470
- for (size_t i = 0 ; i < op->args .size (); i++) {
471
- args[i] = print_expr (op->args [i]);
472
- }
473
- ostringstream rhs;
474
- rhs << op->name << " (" << with_commas (args) << " )" ;
475
- return rhs.str ();
493
+ return CodeGen_GPU_C::print_extern_call (op);
476
494
}
477
495
478
496
string CodeGen_OpenCL_Dev::CodeGen_OpenCL_C::print_array_access (const string &name,
@@ -1123,64 +1141,14 @@ void CodeGen_OpenCL_Dev::init_module() {
1123
1141
src_stream << " inline float float_from_bits(unsigned int x) {return as_float(x);}\n "
1124
1142
<< " inline float nan_f32() { return NAN; }\n "
1125
1143
<< " inline float neg_inf_f32() { return -INFINITY; }\n "
1126
- << " inline float inf_f32() { return INFINITY; }\n "
1127
- << " inline bool is_nan_f32(float x) {return isnan(x); }\n "
1128
- << " inline bool is_inf_f32(float x) {return isinf(x); }\n "
1129
- << " inline bool is_finite_f32(float x) {return isfinite(x); }\n "
1130
- << " #define sqrt_f32 sqrt \n "
1131
- << " #define sin_f32 sin \n "
1132
- << " #define cos_f32 cos \n "
1133
- << " #define exp_f32 exp \n "
1134
- << " #define log_f32 log \n "
1135
- << " #define abs_f32 fabs \n "
1136
- << " #define floor_f32 floor \n "
1137
- << " #define ceil_f32 ceil \n "
1138
- << " #define trunc_f32 trunc \n "
1139
- << " #define pow_f32 pow\n "
1140
- << " #define asin_f32 asin \n "
1141
- << " #define acos_f32 acos \n "
1142
- << " #define tan_f32 tan \n "
1143
- << " #define atan_f32 atan \n "
1144
- << " #define atan2_f32 atan2\n "
1145
- << " #define sinh_f32 sinh \n "
1146
- << " #define asinh_f32 asinh \n "
1147
- << " #define cosh_f32 cosh \n "
1148
- << " #define acosh_f32 acosh \n "
1149
- << " #define tanh_f32 tanh \n "
1150
- << " #define atanh_f32 atanh \n "
1151
- << " #define fast_inverse_f32 native_recip \n "
1152
- << " #define fast_inverse_sqrt_f32 native_rsqrt \n " ;
1144
+ << " inline float inf_f32() { return INFINITY; }\n " ;
1153
1145
1154
1146
// There does not appear to be a reliable way to safely ignore unused
1155
1147
// variables in OpenCL C. See https://github.com/halide/Halide/issues/4918.
1156
1148
src_stream << " #define halide_maybe_unused(x)\n " ;
1157
1149
1158
1150
if (target.has_feature (Target::CLDoubles)) {
1159
- src_stream << " #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n "
1160
- << " inline bool is_nan_f64(double x) {return isnan(x); }\n "
1161
- << " inline bool is_inf_f64(double x) {return isinf(x); }\n "
1162
- << " inline bool is_finite_f64(double x) {return isfinite(x); }\n "
1163
- << " #define sqrt_f64 sqrt\n "
1164
- << " #define sin_f64 sin\n "
1165
- << " #define cos_f64 cos\n "
1166
- << " #define exp_f64 exp\n "
1167
- << " #define log_f64 log\n "
1168
- << " #define abs_f64 fabs\n "
1169
- << " #define floor_f64 floor\n "
1170
- << " #define ceil_f64 ceil\n "
1171
- << " #define trunc_f64 trunc\n "
1172
- << " #define pow_f64 pow\n "
1173
- << " #define asin_f64 asin\n "
1174
- << " #define acos_f64 acos\n "
1175
- << " #define tan_f64 tan\n "
1176
- << " #define atan_f64 atan\n "
1177
- << " #define atan2_f64 atan2\n "
1178
- << " #define sinh_f64 sinh\n "
1179
- << " #define asinh_f64 asinh\n "
1180
- << " #define cosh_f64 cosh\n "
1181
- << " #define acosh_f64 acosh\n "
1182
- << " #define tanh_f64 tanh\n "
1183
- << " #define atanh_f64 atanh\n " ;
1151
+ src_stream << " #pragma OPENCL EXTENSION cl_khr_fp64 : enable\n " ;
1184
1152
}
1185
1153
1186
1154
if (target.has_feature (Target::CLHalf)) {
@@ -1192,31 +1160,7 @@ void CodeGen_OpenCL_Dev::init_module() {
1192
1160
<< " inline half half_from_bits(unsigned short x) {return __builtin_astype(x, half);}\n "
1193
1161
<< " inline half nan_f16() { return half_from_bits(" << nan_f16 << " ); }\n "
1194
1162
<< " inline half neg_inf_f16() { return half_from_bits(" << neg_inf_f16 << " ); }\n "
1195
- << " inline half inf_f16() { return half_from_bits(" << inf_f16 << " ); }\n "
1196
- << " inline bool is_nan_f16(half x) {return isnan(x); }\n "
1197
- << " inline bool is_inf_f16(half x) {return isinf(x); }\n "
1198
- << " inline bool is_finite_f16(half x) {return isfinite(x); }\n "
1199
- << " #define sqrt_f16 sqrt\n "
1200
- << " #define sin_f16 sin\n "
1201
- << " #define cos_f16 cos\n "
1202
- << " #define exp_f16 exp\n "
1203
- << " #define log_f16 log\n "
1204
- << " #define abs_f16 fabs\n "
1205
- << " #define floor_f16 floor\n "
1206
- << " #define ceil_f16 ceil\n "
1207
- << " #define trunc_f16 trunc\n "
1208
- << " #define pow_f16 pow\n "
1209
- << " #define asin_f16 asin\n "
1210
- << " #define acos_f16 acos\n "
1211
- << " #define tan_f16 tan\n "
1212
- << " #define atan_f16 atan\n "
1213
- << " #define atan2_f16 atan2\n "
1214
- << " #define sinh_f16 sinh\n "
1215
- << " #define asinh_f16 asinh\n "
1216
- << " #define cosh_f16 cosh\n "
1217
- << " #define acosh_f16 acosh\n "
1218
- << " #define tanh_f16 tanh\n "
1219
- << " #define atanh_f16 atanh\n " ;
1163
+ << " inline half inf_f16() { return half_from_bits(" << inf_f16 << " ); }\n " ;
1220
1164
}
1221
1165
1222
1166
if (target.has_feature (Target::CLAtomics64)) {
0 commit comments