@@ -75,6 +75,11 @@ using namespace clang::driver::tools;
75
75
using namespace clang;
76
76
using namespace llvm::opt;
77
77
78
+ static bool isSYCLCudaCompatEnabled(const ArgList &Args) {
79
+ return Args.hasFlag(options::OPT_fsycl_cuda_compat,
80
+ options::OPT_fno_sycl_cuda_compat, false);
81
+ }
82
+
78
83
static void CheckPreprocessingOptions(const Driver &D, const ArgList &Args) {
79
84
if (Arg *A = Args.getLastArg(clang::driver::options::OPT_C, options::OPT_CC,
80
85
options::OPT_fminimize_whitespace,
@@ -1176,7 +1181,8 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA,
1176
1181
1177
1182
if (JA.isOffloading(Action::OFK_SYCL)) {
1178
1183
getToolChain().addSYCLIncludeArgs(Args, CmdArgs);
1179
- if (Inputs[0].getType() == types::TY_CUDA) {
1184
+ if (Inputs[0].getType() == types::TY_CUDA ||
1185
+ isSYCLCudaCompatEnabled(Args)) {
1180
1186
// Include __clang_cuda_runtime_wrapper.h in .cu SYCL compilation.
1181
1187
getToolChain().AddCudaIncludeArgs(Args, CmdArgs);
1182
1188
}
@@ -5463,6 +5469,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
5463
5469
bool IsFPGASYCLOffloadDevice =
5464
5470
IsSYCLDevice && Triple.getSubArch() == llvm::Triple::SPIRSubArch_fpga;
5465
5471
const bool IsSYCLNativeCPU = isSYCLNativeCPU(TC);
5472
+ const bool IsSYCLCUDACompat = isSYCLCudaCompatEnabled(Args);
5466
5473
5467
5474
// Perform the SYCL host compilation using an external compiler if the user
5468
5475
// requested.
@@ -5832,6 +5839,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
5832
5839
CmdArgs.push_back("-fno-sycl-esimd-build-host-code");
5833
5840
}
5834
5841
5842
+ if (IsSYCLCUDACompat) {
5843
+ Args.addOptInFlag(CmdArgs, options::OPT_fsycl_cuda_compat,
5844
+ options::OPT_fno_sycl_cuda_compat);
5845
+ // FIXME: clang's CUDA headers require this ...
5846
+ // remove when clang/lib/Headers/__clang_cuda_builtin_vars.h no longer
5847
+ // requires it.
5848
+ CmdArgs.push_back("-fdeclspec");
5849
+ // Note: assumes CUDA 9.0 or more (required by SYCL for CUDA)
5850
+ CmdArgs.push_back("-fcuda-allow-variadic-functions");
5851
+ }
5852
+
5835
5853
// Set options for both host and device
5836
5854
if (SYCLStdArg) {
5837
5855
SYCLStdArg->render(Args, CmdArgs);
@@ -5898,6 +5916,19 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
5898
5916
bool HasFPGA = false;
5899
5917
for (auto TI = SYCLTCRange.first, TE = SYCLTCRange.second; TI != TE; ++TI) {
5900
5918
llvm::Triple SYCLTriple = TI->second->getTriple();
5919
+ if (SYCLTriple.isNVPTX() && IsSYCLCUDACompat && !IsSYCLDevice) {
5920
+ CmdArgs.push_back("-aux-triple");
5921
+ CmdArgs.push_back(Args.MakeArgString(SYCLTriple.normalize()));
5922
+ // We need to figure out which CUDA version we're compiling for, as that
5923
+ // determines how we load and launch GPU kernels.
5924
+ auto *CTC = static_cast<const toolchains::CudaToolChain *>(TI->second);
5925
+ assert(CTC && "Expected valid CUDA Toolchain.");
5926
+ if (CTC->CudaInstallation.version() != CudaVersion::UNKNOWN)
5927
+ CmdArgs.push_back(Args.MakeArgString(
5928
+ Twine("-target-sdk-version=") +
5929
+ CudaVersionToString(CTC->CudaInstallation.version())));
5930
+ break;
5931
+ }
5901
5932
if (SYCLTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) {
5902
5933
HasFPGA = true;
5903
5934
if (!IsSYCLDevice) {
0 commit comments