diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 29781399cbab4..be7851adecea6 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -294,9 +294,10 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles); bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs); bool UseG0 = false; - const char *Exec = Args.MakeArgString(HTC.GetLinkerPath()); - bool UseLLD = (llvm::sys::path::filename(Exec).equals_insensitive("ld.lld") || - llvm::sys::path::stem(Exec).equals_insensitive("ld.lld")); + bool UseLLD = false; + const char *Exec = Args.MakeArgString(HTC.GetLinkerPath(&UseLLD)); + UseLLD = UseLLD || llvm::sys::path::filename(Exec).ends_with("ld.lld") || + llvm::sys::path::stem(Exec).ends_with("ld.lld"); bool UseShared = IsShared && !IsStatic; StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt index 0a2d60757c216..85efa4b0f984f 100644 --- a/clang/lib/Interpreter/CMakeLists.txt +++ b/clang/lib/Interpreter/CMakeLists.txt @@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS if (EMSCRIPTEN AND "lld" IN_LIST LLVM_ENABLE_PROJECTS) set(WASM_SRC Wasm.cpp) set(WASM_LINK lldWasm) + set(COMMON_LINK lldCommon) endif() add_clang_library(clangInterpreter @@ -45,6 +46,7 @@ add_clang_library(clangInterpreter clangSema clangSerialization ${WASM_LINK} + ${COMMON_LINK} ) if ((MINGW OR CYGWIN) AND BUILD_SHARED_LIBS) diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index 7954cde36588b..dbd61f0b8b1eb 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -56,7 +56,7 @@ class IncrementalExecutor { virtual llvm::Error addModule(PartialTranslationUnit &PTU); virtual llvm::Error removeModule(PartialTranslationUnit &PTU); virtual llvm::Error runCtors() const; - llvm::Error cleanUp(); + virtual llvm::Error cleanUp(); llvm::Expected getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const; diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index b4882ab5d2236..985d0b7c0ef31 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -192,8 +192,8 @@ IncrementalCompilerBuilder::CreateCpp() { #ifdef __EMSCRIPTEN__ Argv.push_back("-target"); Argv.push_back("wasm32-unknown-emscripten"); - Argv.push_back("-pie"); Argv.push_back("-shared"); + Argv.push_back("-fvisibility=default"); #endif Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp index 1001410aa0f27..aa10b160ccf84 100644 --- a/clang/lib/Interpreter/Wasm.cpp +++ b/clang/lib/Interpreter/Wasm.cpp @@ -23,6 +23,31 @@ #include namespace lld { +enum Flavor { + Invalid, + Gnu, // -flavor gnu + MinGW, // -flavor gnu MinGW + WinLink, // -flavor link + Darwin, // -flavor darwin + Wasm, // -flavor wasm +}; + +using Driver = bool (*)(llvm::ArrayRef, llvm::raw_ostream &, + llvm::raw_ostream &, bool, bool); + +struct DriverDef { + Flavor f; + Driver d; +}; + +struct Result { + int retCode; + bool canRunAgain; +}; + +Result lldMain(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS, + llvm::raw_ostream &stderrOS, llvm::ArrayRef drivers); + namespace wasm { bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput); @@ -51,13 +76,14 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) { llvm::TargetMachine *TargetMachine = Target->createTargetMachine( PTU.TheModule->getTargetTriple(), "", "", TO, llvm::Reloc::Model::PIC_); PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); - std::string OutputFileName = PTU.TheModule->getName().str() + ".wasm"; + std::string ObjectFileName = PTU.TheModule->getName().str() + ".o"; + std::string BinaryFileName = PTU.TheModule->getName().str() + ".wasm"; std::error_code Error; - llvm::raw_fd_ostream OutputFile(llvm::StringRef(OutputFileName), Error); + llvm::raw_fd_ostream ObjectFileOutput(llvm::StringRef(ObjectFileName), Error); llvm::legacy::PassManager PM; - if (TargetMachine->addPassesToEmitFile(PM, OutputFile, nullptr, + if (TargetMachine->addPassesToEmitFile(PM, ObjectFileOutput, nullptr, llvm::CodeGenFileType::ObjectFile)) { return llvm::make_error( "Wasm backend cannot produce object.", llvm::inconvertibleErrorCode()); @@ -69,27 +95,30 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) { llvm::inconvertibleErrorCode()); } - OutputFile.close(); + ObjectFileOutput.close(); std::vector LinkerArgs = {"wasm-ld", - "-pie", + "-shared", "--import-memory", - "--no-entry", - "--export-all", "--experimental-pic", - "--no-export-dynamic", "--stack-first", - OutputFileName.c_str(), + "--allow-undefined", + ObjectFileName.c_str(), "-o", - OutputFileName.c_str()}; - int Result = - lld::wasm::link(LinkerArgs, llvm::outs(), llvm::errs(), false, false); - if (!Result) + BinaryFileName.c_str()}; + + const lld::DriverDef WasmDriver = {lld::Flavor::Wasm, &lld::wasm::link}; + std::vector WasmDriverArgs; + WasmDriverArgs.push_back(WasmDriver); + lld::Result Result = + lld::lldMain(LinkerArgs, llvm::outs(), llvm::errs(), WasmDriverArgs); + + if (Result.retCode) return llvm::make_error( "Failed to link incremental module", llvm::inconvertibleErrorCode()); void *LoadedLibModule = - dlopen(OutputFileName.c_str(), RTLD_NOW | RTLD_GLOBAL); + dlopen(BinaryFileName.c_str(), RTLD_NOW | RTLD_GLOBAL); if (LoadedLibModule == nullptr) { llvm::errs() << dlerror() << '\n'; return llvm::make_error( @@ -109,6 +138,12 @@ llvm::Error WasmIncrementalExecutor::runCtors() const { return llvm::Error::success(); } +llvm::Error WasmIncrementalExecutor::cleanUp() { + // Can't call cleanUp through IncrementalExecutor as it + // tries to deinitialize JIT which hasn't been initialized + return llvm::Error::success(); +} + WasmIncrementalExecutor::~WasmIncrementalExecutor() = default; -} // namespace clang +} // namespace clang \ No newline at end of file diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h index b1fd88024f14d..4632613326d39 100644 --- a/clang/lib/Interpreter/Wasm.h +++ b/clang/lib/Interpreter/Wasm.h @@ -28,6 +28,7 @@ class WasmIncrementalExecutor : public IncrementalExecutor { llvm::Error addModule(PartialTranslationUnit &PTU) override; llvm::Error removeModule(PartialTranslationUnit &PTU) override; llvm::Error runCtors() const override; + llvm::Error cleanUp() override; ~WasmIncrementalExecutor() override; }; diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py index 3ed42a187fd80..d76c6ede3fe5a 100644 --- a/clang/utils/perf-training/perf-helper.py +++ b/clang/utils/perf-training/perf-helper.py @@ -36,7 +36,7 @@ def clean(args): + "\tRemoves all files with extension from ." ) return 1 - for path in args[1:-1]: + for path in args[0:-1]: for filename in findFilesWithExtension(path, args[-1]): os.remove(filename) return 0 diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 6ccb934aef436..93d36736439b1 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 4) + set(LLVM_VERSION_PATCH 6) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp index 9277fe0b23516..38e99cf685945 100644 --- a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp +++ b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp @@ -9,7 +9,7 @@ // static build, there won't be any clang_rt DLLs. // RUN: not grep cl""ang_rt %t || \ // RUN: grep cl""ang_rt %t | xargs which | \ -// RUN: xargs llvm-readobj --coff-imports | not grep dbghelp.dll %t +// RUN: xargs llvm-readobj --coff-imports | not grep dbghelp.dll extern "C" int puts(const char *); diff --git a/libcxx/include/__config b/libcxx/include/__config index a929db5d0f2d1..e97669bca411e 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -27,7 +27,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190104 +# define _LIBCPP_VERSION 190106 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 56cf96fd17704..8bcd28309f8b3 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -329,7 +329,7 @@ void Hexagon::relocate(uint8_t *loc, const Relocation &rel, case R_HEX_B22_PCREL: case R_HEX_GD_PLT_B22_PCREL: case R_HEX_PLT_B22_PCREL: - checkInt(loc, val, 22, rel); + checkInt(loc, val, 24, rel); or32le(loc, applyMask(0x1ff3ffe, val >> 2)); break; case R_HEX_B22_PCREL_X: diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s index 28b879f758468..cfa8df4d8e2fe 100644 --- a/lld/test/ELF/emulation-loongarch.s +++ b/lld/test/ELF/emulation-loongarch.s @@ -37,7 +37,7 @@ # LA32-NEXT: StringTableSectionIndex: # LA32-NEXT: } -# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+d %s -o %t.o # RUN: ld.lld %t.o -o %t # RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s # RUN: ld.lld -m elf64loongarch %t.o -o %t diff --git a/lld/test/ELF/hexagon-jump-error.s b/lld/test/ELF/hexagon-jump-error.s index fec873827e573..53860b5daf2b1 100644 --- a/lld/test/ELF/hexagon-jump-error.s +++ b/lld/test/ELF/hexagon-jump-error.s @@ -25,7 +25,7 @@ if (p0) jump #1f .section b15, "ax" 1: -# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-2097152, 2097151] +# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-8388608, 8388607] jump #1f .space (1<<23) .section b22, "ax" diff --git a/lld/test/ELF/hexagon.s b/lld/test/ELF/hexagon.s index 8ef9b8eead8f1..b1576fb47d81a 100644 --- a/lld/test/ELF/hexagon.s +++ b/lld/test/ELF/hexagon.s @@ -1,7 +1,9 @@ # REQUIRES: hexagon # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %S/Inputs/hexagon.s -o %t1.o -# RUN: ld.lld %t.o %t1.o -o %t +# RUN: ld.lld %t.o %t1.o -o %t --Ttext=0x200b4 --section-start=b_1000000=0x1000000 \ +# RUN: --section-start=b_1000400=0x1000400 --section-start=b_1004000=0x1004000 \ +# RUN: --section-start=b_1010000=0x1010000 --section-start=b_1800000=0x1800000 # RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s # Note: 131584 == 0x20200 @@ -221,3 +223,40 @@ r0 = memw(r1+##_start) memw(r0+##_start) = r1 # CHECK: memw(r0+##131644) = r1 + + +## Tests for maximum branch ranges reachable without trampolines. + +.section b_1000000, "ax" +## The nop makes sure the first jump is within range. +nop +{ r0 = #0; jump #b_1000400 } // R_HEX_B9_PCREL +if (r0==#0) jump:t #b_1004000 // R_HEX_B13_PCREL +if (p0) jump #b_1010000 // R_HEX_B15_PCREL +jump #b_1800000 // R_HEX_B22_PCREL + +.section b_1000400, "ax" +nop + +.section b_1004000, "ax" +nop + +.section b_1010000, "ax" +nop + +.section b_1800000, "ax" +nop + +## Make sure we got the right relocations. +# RUN: llvm-readelf -r %t.o | FileCheck %s --check-prefix=REL +# REL: R_HEX_B9_PCREL 00000000 b_1000400 +# REL: R_HEX_B13_PCREL 00000000 b_1004000 +# REL: R_HEX_B15_PCREL 00000000 b_1010000 +# REL: R_HEX_B22_PCREL 00000000 b_1800000 + +# CHECK: 01000000 : +# CHECK-NEXT: 1000000: {{.*}} { nop } +# CHECK-NEXT: 1000004: {{.*}} { r0 = #0 ; jump 0x1000400 } +# CHECK-NEXT: 1000008: {{.*}} { if (r0==#0) jump:t 0x1004000 } +# CHECK-NEXT: 100000c: {{.*}} { if (p0) jump:nt 0x1010000 } +# CHECK-NEXT: 1000010: {{.*}} { jump 0x1800000 } diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test index 44e5d03409a47..15c8318512660 100644 --- a/lld/test/ELF/loongarch-interlink.test +++ b/lld/test/ELF/loongarch-interlink.test @@ -3,9 +3,9 @@ # RUN: yaml2obj %t/blob.yaml -o %t/blob.o # RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o -# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/start.s -o %t/v1-lp64d.o # RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o -# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/bar.s -o %t/v1-b-lp64d.o ## Check that binary input results in e_flags=0 output. # RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index f02f55519a251..72d08b849d8e8 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -587,8 +587,7 @@ void ElemSection::writeBody() { initExpr.Inst.Value.Global = WasmSym::tableBase->getGlobalIndex(); } else { bool is64 = config->is64.value_or(false); - initExpr.Inst.Opcode = is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST; - initExpr.Inst.Value.Int32 = config->tableBase; + initExpr = intConst(config->tableBase, is64); } writeInitExpr(os, initExpr); diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index d4da3ef1146db..f598dedea75fd 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -192,6 +192,11 @@ class MemorySSAUpdater { const BasicBlock *BB, MemorySSA::InsertionPlace Point); + MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, + MemorySSA::InsertionPlace Point, + bool CreationMustSucceed); + /// Create a MemoryAccess in MemorySSA before an existing MemoryAccess. /// /// See createMemoryAccessInBB() for usage details. diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index aa550f0b6a7bf..94061c949b7f8 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -1404,8 +1404,17 @@ void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point) { - MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition); - MSSA->insertIntoListsForBlock(NewAccess, BB, Point); + return createMemoryAccessInBB(I, Definition, BB, Point, + /*CreationMustSucceed=*/true); +} + +MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( + Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, + MemorySSA::InsertionPlace Point, bool CreationMustSucceed) { + MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess( + I, Definition, /*Template=*/nullptr, CreationMustSucceed); + if (NewAccess) + MSSA->insertIntoListsForBlock(NewAccess, BB, Point); return NewAccess; } diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 51cffac808768..412cfe73d3e55 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6313,8 +6313,10 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { return getConstantMultiple(Z->getOperand()).zext(BitWidth); } case scSignExtend: { + // Only multiples that are a power of 2 will hold after sext. const SCEVSignExtendExpr *E = cast(S); - return getConstantMultiple(E->getOperand()).sext(BitWidth); + uint32_t TZ = getMinTrailingZeros(E->getOperand()); + return GetShiftedByZeros(TZ); } case scMulExpr: { const SCEVMulExpr *M = cast(S); diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index f24ab187ef400..21a02a6f09478 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1474,7 +1474,7 @@ void MachineLICMBase::InitializeLoadsHoistableLoops() { if (!AllowedToHoistLoads[Loop]) continue; for (auto &MI : *MBB) { - if (!MI.mayStore() && !MI.isCall() && + if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() && !(MI.mayLoad() && MI.hasOrderedMemoryRef())) continue; for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop()) diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index f25dc92fa235a..8526469245676 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1326,6 +1326,22 @@ static bool isInSymtab(const MCSymbolWasm &Sym) { return true; } +static bool isSectionReferenced(MCAssembler &Asm, MCSectionWasm &Section) { + StringRef SectionName = Section.getName(); + + for (const MCSymbol &S : Asm.symbols()) { + const auto &WS = static_cast(S); + if (WS.isData() && WS.isInSection()) { + auto &RefSection = static_cast(WS.getSection()); + if (RefSection.getName() == SectionName) { + return true; + } + } + } + + return false; +} + void WasmObjectWriter::prepareImports( SmallVectorImpl &Imports, MCAssembler &Asm) { // For now, always emit the memory import, since loads and stores are not @@ -1482,8 +1498,10 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, LLVM_DEBUG(dbgs() << "Processing Section " << SectionName << " group " << Section.getGroup() << "\n";); - // .init_array sections are handled specially elsewhere. - if (SectionName.starts_with(".init_array")) + // .init_array sections are handled specially elsewhere, include them in + // data segments if and only if referenced by a symbol. + if (SectionName.starts_with(".init_array") && + !isSectionReferenced(Asm, Section)) continue; // Code is handled separately @@ -1853,49 +1871,54 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); - const MCFragment &AlignFrag = *EmptyFrag.getNext(); - if (AlignFrag.getKind() != MCFragment::FT_Align) - report_fatal_error(".init_array section should be aligned"); - if (cast(AlignFrag).getAlignment() != - Align(is64Bit() ? 8 : 4)) - report_fatal_error(".init_array section should be aligned for pointers"); - - const MCFragment &Frag = *AlignFrag.getNext(); - if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) - report_fatal_error("only data supported in .init_array section"); - - uint16_t Priority = UINT16_MAX; - unsigned PrefixLength = strlen(".init_array"); - if (WS.getName().size() > PrefixLength) { - if (WS.getName()[PrefixLength] != '.') + const MCFragment *nextFrag = EmptyFrag.getNext(); + while (nextFrag != nullptr) { + const MCFragment &AlignFrag = *nextFrag; + if (AlignFrag.getKind() != MCFragment::FT_Align) + report_fatal_error(".init_array section should be aligned"); + if (cast(AlignFrag).getAlignment() != + Align(is64Bit() ? 8 : 4)) report_fatal_error( - ".init_array section priority should start with '.'"); - if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority)) - report_fatal_error("invalid .init_array section priority"); - } - const auto &DataFrag = cast(Frag); - const SmallVectorImpl &Contents = DataFrag.getContents(); - for (const uint8_t * - P = (const uint8_t *)Contents.data(), - *End = (const uint8_t *)Contents.data() + Contents.size(); - P != End; ++P) { - if (*P != 0) - report_fatal_error("non-symbolic data in .init_array section"); - } - for (const MCFixup &Fixup : DataFrag.getFixups()) { - assert(Fixup.getKind() == - MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); - const MCExpr *Expr = Fixup.getValue(); - auto *SymRef = dyn_cast(Expr); - if (!SymRef) - report_fatal_error("fixups in .init_array should be symbol references"); - const auto &TargetSym = cast(SymRef->getSymbol()); - if (TargetSym.getIndex() == InvalidIndex) - report_fatal_error("symbols in .init_array should exist in symtab"); - if (!TargetSym.isFunction()) - report_fatal_error("symbols in .init_array should be for functions"); - InitFuncs.push_back( - std::make_pair(Priority, TargetSym.getIndex())); + ".init_array section should be aligned for pointers"); + + const MCFragment &Frag = *AlignFrag.getNext(); + nextFrag = Frag.getNext(); + if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) + report_fatal_error("only data supported in .init_array section"); + + uint16_t Priority = UINT16_MAX; + unsigned PrefixLength = strlen(".init_array"); + if (WS.getName().size() > PrefixLength) { + if (WS.getName()[PrefixLength] != '.') + report_fatal_error( + ".init_array section priority should start with '.'"); + if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority)) + report_fatal_error("invalid .init_array section priority"); + } + const auto &DataFrag = cast(Frag); + const SmallVectorImpl &Contents = DataFrag.getContents(); + for (const uint8_t * + P = (const uint8_t *)Contents.data(), + *End = (const uint8_t *)Contents.data() + Contents.size(); + P != End; ++P) { + if (*P != 0) + report_fatal_error("non-symbolic data in .init_array section"); + } + for (const MCFixup &Fixup : DataFrag.getFixups()) { + assert(Fixup.getKind() == + MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); + const MCExpr *Expr = Fixup.getValue(); + auto *SymRef = dyn_cast(Expr); + if (!SymRef) + report_fatal_error( + "fixups in .init_array should be symbol references"); + const auto &TargetSym = cast(SymRef->getSymbol()); + if (TargetSym.getIndex() == InvalidIndex) + report_fatal_error("symbols in .init_array should exist in symtab"); + if (!TargetSym.isFunction()) + report_fatal_error("symbols in .init_array should be for functions"); + InitFuncs.push_back(std::make_pair(Priority, TargetSym.getIndex())); + } } } diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 87e057a468afd..c183ffd384c22 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) { !MF.getSubtarget().hasSVE(); } +static bool requiresSaveVG(MachineFunction &MF) { + AArch64FunctionInfo *AFI = MF.getInfo(); + // For Darwin platforms we don't save VG for non-SVE functions, even if SME + // is enabled with streaming mode changes. + if (!AFI->hasStreamingModeChanges()) + return false; + auto &ST = MF.getSubtarget(); + if (ST.isTargetDarwin()) + return ST.hasSVE(); + return true; +} + bool isVGInstruction(MachineBasicBlock::iterator MBBI) { unsigned Opc = MBBI->getOpcode(); if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || @@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // functions, we need to do this for both the streaming and non-streaming // vector length. Move past these instructions if necessary. MachineFunction &MF = *MBB.getParent(); - AArch64FunctionInfo *AFI = MF.getInfo(); - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -1936,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { - // Move past instructions generated to calculate VG - if (AFI->hasStreamingModeChanges()) - while (isVGInstruction(MBBI)) - ++MBBI; - - if (CombineSPBump) + if (CombineSPBump && + // Only fix-up frame-setup load/store instructions. + (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); ++MBBI; @@ -2848,7 +2856,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { return Subtarget.isTargetMachO() && !(Subtarget.getTargetLowering()->supportSwiftError() && Attrs.hasAttrSomewhere(Attribute::SwiftError)) && - MF.getFunction().getCallingConv() != CallingConv::SwiftTail; + MF.getFunction().getCallingConv() != CallingConv::SwiftTail && + !requiresSaveVG(MF); } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, @@ -3720,7 +3729,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // non-streaming VG value. const Function &F = MF.getFunction(); SMEAttrs Attrs(F); - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) CSStackSize += 16; else @@ -3873,7 +3882,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } // Insert VG into the list of CSRs, immediately before LR if saved. - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { std::vector VGSaves; SMEAttrs Attrs(MF.getFunction()); @@ -4602,10 +4611,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { - AArch64FunctionInfo *AFI = MF.getInfo(); for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) II = emitVGSaveRestore(II, this); if (StackTaggingMergeSetTag) II = tryMergeAdjacentSTG(II, this, RS); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 62078822c89b1..ef2789e96213b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - - Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), Chain); - InGlue = Chain.getValue(1); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), Chain); + InGlue = Chain.getValue(1); + } SDValue NewChain = changeStreamingMode( DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue, @@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Result = changeStreamingMode( DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue, getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); - InGlue = Result.getValue(1); - Result = - DAG.getNode(AArch64ISD::VG_RESTORE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + InGlue = Result.getValue(1); + Result = + DAG.getNode(AArch64ISD::VG_RESTORE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + } } if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs)) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index c136f5b3e515d..e680dda7374d0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -721,7 +721,7 @@ bool LoongArchExpandPseudo::expandFunctionCALL( IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; - bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + bool UseGOT = Func.getTargetFlags() == LoongArchII::MO_CALL_PLT; unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 0580683c3ce30..0233baecf6dd9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -67,8 +67,7 @@ class VecCondgetValueType(0).getVectorElementType(); @@ -109,8 +108,7 @@ def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; }]>; -def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), - (bitconvert (v4i32 (build_vector)))], [{ +def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector)], [{ APInt Imm; EVT EltTy = N->getValueType(0).getVectorElementType(); diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index e40981f5b5cd5..0712cc01ea038 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -55,7 +55,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() { static MCSubtargetInfo * createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (CPU.empty() || CPU == "generic") - CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; + CPU = TT.isArch64Bit() ? "generic-la64" : "generic-la32"; return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index f6f32fde3b777..a9ffd2bedf21e 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -220,6 +220,10 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { return false; } +bool MipsDAGToDAGISel::selectVSplatImmEq1(SDValue N) const { + llvm_unreachable("Unimplemented function."); +} + /// Convert vector addition with vector subtraction if that allows to encode /// constant as an immediate and thus avoid extra 'ldi' instruction. /// add X, <-1, -1...> --> sub X, <1, 1...> diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h index 6135f96807854..3485300a782c9 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h @@ -120,6 +120,9 @@ class MipsDAGToDAGISel : public SelectionDAGISel { /// starting at bit zero. virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is 1. + virtual bool selectVSplatImmEq1(SDValue N) const; + /// Convert vector addition with vector subtraction if that allows to encode /// constant as an immediate and thus avoid extra 'ldi' instruction. /// add X, <-1, -1...> --> sub X, <1, 1...> diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index c4abccb24c6f3..f4c32c9dcd421 100644 --- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -198,14 +198,8 @@ def vsplati32 : PatFrag<(ops node:$e0), (v4i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ - APInt Imm; - SDNode *BV = N->getOperand(0).getNode(); - EVT EltTy = N->getValueType(0).getVectorElementType(); - - return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -}]>; +// Any build_vector that is a constant splat with a value that equals 1 +def vsplat_imm_eq_1 : ComplexPattern; def vsplati64 : PatFrag<(ops node:$e0), (v2i64 (build_vector node:$e0, node:$e0))>; @@ -217,7 +211,7 @@ def vsplati64_splat_d : PatFrag<(ops node:$e0), node:$e0, node:$e0, node:$e0)), - vsplati64_imm_eq_1))))>; + (vsplat_imm_eq_1)))))>; def vsplatf32 : PatFrag<(ops node:$e0), (v4f32 (build_vector node:$e0, node:$e0, @@ -352,46 +346,35 @@ def vsplat_maskr_bits_uimm6 : SplatComplexPattern; -// Any build_vector that is a constant splat with a value that equals 1 -// FIXME: These should be a ComplexPattern but we can't use them because the -// ISel generator requires the uses to have a name, but providing a name -// causes other errors ("used in pattern but not operand list") -def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ - APInt Imm; - EVT EltTy = N->getValueType(0).getVectorElementType(); - - return selectVSplat(N, Imm, EltTy.getSizeInBits()) && - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -}]>; def vbclr_b : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_h : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_w : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_d : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1), + (and node:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)), node:$wt)))>; def vbneg_b : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_h : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_w : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_d : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (xor node:$ws, (shl (v2i64 (vsplat_imm_eq_1)), node:$wt))>; def vbset_b : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_h : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_w : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_d : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (or node:$ws, (shl (v2i64 (vsplat_imm_eq_1)), node:$wt))>; def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt), @@ -3842,7 +3825,7 @@ class MSAShiftPat : (VT (Insn VT:$ws, VT:$wt))>; class MSABitPat : - MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))), + MSAPat<(VT (Node VT:$ws, (shl (vsplat_imm_eq_1), (Frag VT:$wt)))), (VT (Insn VT:$ws, VT:$wt))>; multiclass MSAShiftPats { @@ -3861,7 +3844,7 @@ multiclass MSABitPats { def : MSABitPat(Insn#_B), vsplati8imm7>; def : MSABitPat(Insn#_H), vsplati16imm15>; def : MSABitPat(Insn#_W), vsplati32imm31>; - def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1), + def : MSAPat<(Node v2i64:$ws, (shl (v2i64 (vsplat_imm_eq_1)), (vsplati64imm63 v2i64:$wt))), (v2i64 (!cast(Insn#_D) v2i64:$ws, v2i64:$wt))>; } @@ -3872,16 +3855,16 @@ defm : MSAShiftPats; defm : MSABitPats; defm : MSABitPats; -def : MSAPat<(and v16i8:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v16i8:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati8imm7 v16i8:$wt)))), (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>; -def : MSAPat<(and v8i16:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v8i16:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati16imm15 v8i16:$wt)))), (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>; -def : MSAPat<(and v4i32:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v4i32:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati32imm31 v4i32:$wt)))), (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>; -def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1), +def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)), (vsplati64imm63 v2i64:$wt)))), (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>; diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7ad300c6cccd4..66c034a889c60 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -730,6 +730,18 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, return false; } +// Select const vector splat of 1. +bool MipsSEDAGToDAGISel::selectVSplatImmEq1(SDValue N) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + return selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits() && ImmValue == 1; +} + bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 7b843b0e0b255..22d8e924ac534 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -124,6 +124,9 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { /// starting at bit zero. bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override; + /// Select constant vector splats whose value is 1. + bool selectVSplatImmEq1(SDValue N) const override; + bool trySelect(SDNode *Node) override; // Emits proper ABI for _mcount profiling calls. diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6975412ce5d35..b2153a7afe736 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -229,6 +229,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, // v*i8 are formally lowered as v4i8 EltVT = MVT::v4i8; NumElts = (NumElts + 3) / 4; + } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) { + // v2i8 is promoted to v2i16 + NumElts = 1; + EltVT = MVT::v2i16; } for (unsigned j = 0; j != NumElts; ++j) { ValueVTs.push_back(EltVT); diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index d5a38ec17a2a8..1d23ec8ced204 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -811,8 +811,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets( DL, Offset1, /* AllowNonInbounds */ true); - Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, - Builder.getInt32(Offset1.getZExtValue())); + Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1)); } // Generate wider load. NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(), diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9d2990c98ce27..3223fccbcf49a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -506,8 +506,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. if (II.hasOneUse() && match(Op1, m_Zero()) && - match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) + match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) { + II.dropUBImplyingAttrsAndMetadata(); return IC.replaceOperand(II, 1, IC.Builder.getTrue()); + } Constant *C; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8a6ec3076ac62..b9d06b5936850 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1004,7 +1004,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; if (MaskedGEPIndex != GEPIndex) { - auto *GEP = cast(II->getArgOperand(0)); + auto *GEP = cast(II->getArgOperand(0)); Builder.SetInsertPoint(I); Type *GEPIndexType = DL.getIndexType(GEP->getPointerOperand()->getType()); diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 37022104d0a9b..d1c80aa671243 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1033,9 +1033,9 @@ void State::addInfoForInductions(BasicBlock &BB) { DTN, CmpInst::ICMP_SLT, PN, B, ConditionTy(CmpInst::ICMP_SLE, StartValue, B))); - // Try to add condition from header to the exit blocks. When exiting either - // with EQ or NE in the header, we know that the induction value must be u<= - // B, as other exits may only exit earlier. + // Try to add condition from header to the dedicated exit blocks. When exiting + // either with EQ or NE in the header, we know that the induction value must + // be u<= B, as other exits may only exit earlier. assert(!StepOffset.isNegative() && "induction must be increasing"); assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) && "unsupported predicate"); @@ -1043,8 +1043,11 @@ void State::addInfoForInductions(BasicBlock &BB) { SmallVector ExitBBs; L->getExitBlocks(ExitBBs); for (BasicBlock *EB : ExitBBs) { - WorkList.emplace_back(FactOrCheck::getConditionFact( - DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond)); + // Bail out on non-dedicated exits. + if (DT.dominates(&BB, EB)) { + WorkList.emplace_back(FactOrCheck::getConditionFact( + DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond)); + } } } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 91ef2b4b7c183..ca03eff7a4e25 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1464,8 +1464,11 @@ static Instruction *cloneInstructionInExitBlock( if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. + // After running some passes, MemorySSA might be outdated, and the + // instruction `I` may have become a non-memory touching instruction. MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB( - New, nullptr, New->getParent(), MemorySSA::Beginning); + New, nullptr, New->getParent(), MemorySSA::Beginning, + /*CreationMustSucceed=*/false); if (NewMemAcc) { if (auto *MemDef = dyn_cast(NewMemAcc)) MSSAU.insertDef(MemDef, /*RenameUses=*/true); diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index c235d2fb2a5bd..f99f4487c5540 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1249,8 +1249,9 @@ static BasicBlock *buildClonedLoopBlocks( assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!"); // Forget SCEVs based on exit phis in case SCEV looked through the phi. - if (SE && isa(I)) - SE->forgetValue(&I); + if (SE) + if (auto *PN = dyn_cast(&I)) + SE->forgetLcssaPhiWithNewPredecessor(&L, PN); BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt(); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 7192efe3f16b9..f68cbf62b9825 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1028,7 +1028,13 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, if (!BB->hasNPredecessorsOrMore(2)) return false; - // Get single common predecessors of both BB and Succ + if (any_of(BBPreds, [](const BasicBlock *Pred) { + return isa(Pred->getTerminator()); + })) + return false; + + // Get the single common predecessor of both BB and Succ. Return false + // when there are more than one common predecessors. for (BasicBlock *SuccPred : SuccPreds) { if (BBPreds.count(SuccPred)) { if (CommonPred) @@ -1133,7 +1139,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds); - // Even if we can not fold bB into Succ, we may be able to redirect the + // Even if we can not fold BB into Succ, we may be able to redirect the // predecessors of BB to Succ. bool BBPhisMergeable = BBKillable || diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab2b96cdc42db..746ba51a981fe 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote( MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL))); }); }; + auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + unsigned SignBits = OrigBitWidth - BitWidth; + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1); + unsigned Op0SignBits = + ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT); + return SignBits <= Op0SignBits && + ((SignBits != Op0SignBits && + !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) || + MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL))); + }); + }; if (ID != Intrinsic::abs) { Operands.push_back(getOperandEntry(&E, 1)); CallChecker = CompChecker; + } else { + CallChecker = AbsChecker; } InstructionCost BestCost = std::numeric_limits::max(); diff --git a/llvm/test/Analysis/ScalarEvolution/pr116483.ll b/llvm/test/Analysis/ScalarEvolution/pr116483.ll new file mode 100644 index 0000000000000..cc2334e9c64f9 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr116483.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s + +define i16 @test() { +; CHECK-LABEL: 'test' +; CHECK-NEXT: Classifying expressions for: @test +; CHECK-NEXT: %xor = xor i32 0, 3 +; CHECK-NEXT: --> %xor U: [3,4) S: [3,4) +; CHECK-NEXT: %mul = mul i32 %xor, 329 +; CHECK-NEXT: --> (329 * %xor) U: [987,988) S: [987,988) +; CHECK-NEXT: %conv = trunc i32 %mul to i16 +; CHECK-NEXT: --> (329 * (trunc i32 %xor to i16)) U: [987,988) S: [987,988) +; CHECK-NEXT: %sext = shl i16 %conv, 8 +; CHECK-NEXT: --> (18688 * (trunc i32 %xor to i16)) U: [-9472,-9471) S: [-9472,-9471) +; CHECK-NEXT: %conv1 = ashr i16 %sext, 8 +; CHECK-NEXT: --> (sext i8 (73 * (trunc i32 %xor to i8)) to i16) U: [-37,-36) S: [-37,-36) +; CHECK-NEXT: Determining loop execution counts for: @test +; +entry: + %xor = xor i32 0, 3 + %mul = mul i32 %xor, 329 + %conv = trunc i32 %mul to i16 + %sext = shl i16 %conv, 8 + %conv1 = ashr i16 %sext, 8 + ret i16 %conv1 +} diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index e8dafd5e8fbab..17f8263560430 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -497,6 +497,35 @@ for.exit: ; preds = %for.body ret i64 %spec.select } +@a = external local_unnamed_addr global i32, align 4 + +; Make sure the load is not hoisted out of the loop across memory barriers. +define i32 @load_between_memory_barriers() { +; CHECK-LABEL: load_between_memory_barriers: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, :got:a +; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] +; CHECK-NEXT: .LBB8_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: //MEMBARRIER +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: //MEMBARRIER +; CHECK-NEXT: cbz w0, .LBB8_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret + br label %loop + +loop: + fence syncscope("singlethread") acq_rel + %l = load i32, ptr @a, align 4 + fence syncscope("singlethread") acq_rel + %c = icmp eq i32 %l, 0 + br i1 %c, label %loop, label %exit + +exit: + ret i32 %l +} + declare i32 @bcmp(ptr, ptr, i64) declare i32 @memcmp(ptr, ptr, i64) declare void @func() diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll new file mode 100644 index 0000000000000..36a300fea25e5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -o - %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx14.0.0" + +; Check we don't crash on Darwin and that we don't try to save VG +; when only SME (and not SVE) is enabled. + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define noundef i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: bl __ZL9sme_crashv +; CHECK-NEXT: smstop sm +; CHECK-NEXT: mov w0, #0 ; =0x0 +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +entry: + tail call fastcc void @_ZL9sme_crashv() #4 + ret i32 0 +} + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 { +; CHECK-LABEL: _ZL9sme_crashv: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #80 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w27, -24 +; CHECK-NEXT: .cfi_offset w28, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: sub x9, sp, #160 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff00 +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str x8, [sp, #152] +; CHECK-NEXT: mov z0.b, #0 ; =0x0 +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [sp, #152] +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %entry +; CHECK-NEXT: sub sp, x29, #80 +; CHECK-NEXT: .cfi_def_cfa wsp, 96 +; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %entry +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl ___stack_chk_fail +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5 +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2 +entry: + %uu = alloca [16 x float], align 256 + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false) + call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5 + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" } +attributes #5 = { nounwind } diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll new file mode 100644 index 0000000000000..c32e9cbc05393 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +declare void @normal_callee(); + +define void @locally_streaming_fn() #0 { +; CHECK-LABEL: locally_streaming_fn: +; CHECK: ; %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: lsr x9, x9, #3 +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x30, x9, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .cfi_offset vg, -24 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl _normal_callee +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] ; 8-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret + call void @normal_callee() + ret void +} + +attributes #0 = { "aarch64_pstate_sm_body" uwtable(async) } diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index fa8f92cb0a2c9..38666a05c20f8 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ret void } +; The algorithm that fixes up the offsets of the callee-save/restore +; instructions must jump over the instructions that instantiate the current +; 'VG' value. We must make sure that it doesn't consider any RDSVL in +; user-code as if it is part of the frame-setup when doing so. +define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { +; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue: +; NO-SVE-CHECK: // %bb.0: +; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x9, x0 +; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg +; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x0, x9 +; NO-SVE-CHECK-NEXT: rdsvl x8, #1 +; NO-SVE-CHECK-NEXT: add x29, sp, #64 +; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 +; NO-SVE-CHECK-NEXT: mov x1, x0 +; NO-SVE-CHECK-NEXT: smstart sm +; NO-SVE-CHECK-NEXT: mov x0, x8 +; NO-SVE-CHECK-NEXT: bl bar +; NO-SVE-CHECK-NEXT: smstop sm +; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ret + %some_alloc = alloca i64, align 8 + %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() + call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled" + ret void +} + +declare void @bar(i64, i64) + ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables ; if the function contains a streaming-mode change. diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll index 4b2b72afaee17..4eb1e5e596fd3 100644 --- a/llvm/test/CodeGen/LoongArch/code-models.ll +++ b/llvm/test/CodeGen/LoongArch/code-models.ll @@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) { ; LARGE-NEXT: .cfi_offset 1, -8 ; LARGE-NEXT: ori $a2, $zero, 1000 ; LARGE-NEXT: move $a1, $zero -; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset) -; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset) -; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset) -; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset) -; LARGE-NEXT: add.d $ra, $t8, $ra +; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(memset) +; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(memset) +; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(memset) +; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(memset) +; LARGE-NEXT: ldx.d $ra, $t8, $ra ; LARGE-NEXT: jirl $ra, $ra, 0 ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LARGE-NEXT: addi.d $sp, $sp, 16 diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll index 2feb9d832bca9..9b2dc87eb353d 100644 --- a/llvm/test/CodeGen/LoongArch/e_flags.ll +++ b/llvm/test/CodeGen/LoongArch/e_flags.ll @@ -1,3 +1,6 @@ +; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32s +; RUN: llvm-readelf -h %t-la32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines + ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s -o %t-la32 ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines @@ -10,6 +13,9 @@ ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d ; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines +; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64d +; RUN: llvm-readelf -h %t-la64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + ; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64 ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines diff --git a/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll new file mode 100644 index 0000000000000..ba8ffc3493189 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define <4 x i32> @xor_shl_splat_vec_one(i32 %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: xor_shl_splat_vec_one: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.w $vr1, $a0 +; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 0 +; CHECK-NEXT: ret +entry: + %ins = insertelement <4 x i32> poison, i32 %x, i64 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + %shl = shl <4 x i32> %splat, %y + %xor = xor <4 x i32> %shl, splat (i32 1) + ret <4 x i32> %xor +} diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll index ed1a24e82b4e4..29348fe0d641e 100644 --- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll +++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll @@ -282,11 +282,11 @@ define void @test_la_tls_ld(i32 signext %n) { ; LA64LARGE-NEXT: .LBB3_1: # %loop ; LA64LARGE-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64LARGE-NEXT: move $a0, $s0 -; LA64LARGE-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGE-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGE-NEXT: add.d $ra, $t8, $ra +; LA64LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGE-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGE-NEXT: jirl $ra, $ra, 0 ; LA64LARGE-NEXT: ld.w $zero, $a0, 0 ; LA64LARGE-NEXT: addi.w $s1, $s1, 1 @@ -448,11 +448,11 @@ define void @test_la_tls_gd(i32 signext %n) nounwind { ; LA64LARGE-NEXT: .LBB5_1: # %loop ; LA64LARGE-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64LARGE-NEXT: move $a0, $s0 -; LA64LARGE-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGE-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGE-NEXT: add.d $ra, $t8, $ra +; LA64LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGE-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGE-NEXT: jirl $ra, $ra, 0 ; LA64LARGE-NEXT: ld.w $zero, $a0, 0 ; LA64LARGE-NEXT: addi.w $s1, $s1, 1 diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll index 6a15d3a9cda30..75f494f32e476 100644 --- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll @@ -105,11 +105,11 @@ define void @foo() nounwind { ; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(gd) ; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(gd) ; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_NO_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_NO_SCH-NEXT: ld.d $zero, $a0, 0 ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) @@ -117,11 +117,11 @@ define void @foo() nounwind { ; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_NO_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_NO_SCH-NEXT: pcalau12i $a1, %ie_pc_hi20(ie) ; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) @@ -162,11 +162,11 @@ define void @foo() nounwind { ; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(gd) ; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(gd) ; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_SCH-NEXT: ld.d $zero, $a0, 0 ; LARGE_SCH-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) @@ -174,11 +174,11 @@ define void @foo() nounwind { ; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_SCH-NEXT: pcalau12i $a1, %ie_pc_hi20(ie) ; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll index bb89794d1c843..04600ffeb37ee 100644 --- a/llvm/test/CodeGen/LoongArch/tls-models.ll +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -55,11 +55,11 @@ define ptr @f1() nounwind { ; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(unspecified) ; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(unspecified) ; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 -; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra +; LA64LARGEPIC-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 @@ -169,11 +169,11 @@ define ptr @f2() nounwind { ; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 -; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra +; LA64LARGEPIC-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll index f320f6fc5660c..bb059f1ee8453 100644 --- a/llvm/test/CodeGen/Mips/lcb5.ll +++ b/llvm/test/CodeGen/Mips/lcb5.ll @@ -186,7 +186,7 @@ if.end: ; preds = %if.then, %entry } ; ci: .ent z3 -; ci: bteqz $BB6_3 +; ci: bteqz $BB6_2 ; ci: .end z3 ; Function Attrs: nounwind optsize @@ -210,7 +210,7 @@ if.end: ; preds = %if.then, %entry ; ci: .ent z4 ; ci: btnez $BB7_1 # 16 bit inst -; ci: jal $BB7_3 # branch +; ci: jal $BB7_2 # branch ; ci: nop ; ci: $BB7_1: ; ci: .p2align 2 diff --git a/llvm/test/MC/WebAssembly/init-array-label.s b/llvm/test/MC/WebAssembly/init-array-label.s new file mode 100644 index 0000000000000..0b4a5ea2da0b5 --- /dev/null +++ b/llvm/test/MC/WebAssembly/init-array-label.s @@ -0,0 +1,91 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck %s + +init1: + .functype init1 () -> () + end_function + +init2: + .functype init2 () -> () + end_function + + .section .init_array.42,"",@ + .p2align 2, 0x0 + .int32 init1 + + .section .init_array,"",@ + .globl p_init1 + .p2align 2, 0x0 +p_init1: + .int32 init1 + .size p_init1, 4 + + .section .init_array,"",@ + .globl p_init2 + .p2align 2, 0x0 +p_init2: + .int32 init1 + .int32 init2 + .size p_init2, 8 + +# CHECK: - Type: FUNCTION +# CHECK-NEXT: FunctionTypes: [ 0, 0 ] +# CHECK-NEXT: - Type: DATACOUNT +# CHECK-NEXT: Count: 1 +# CHECK-NEXT: - Type: CODE +# CHECK-NEXT: Functions: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Type: DATA +# CHECK-NEXT: Segments: +# CHECK-NEXT: - SectionOffset: 6 +# CHECK-NEXT: InitFlags: 0 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Content: '000000000000000000000000' +# CHECK-NEXT: - Type: CUSTOM +# CHECK-NEXT: Name: linking +# CHECK-NEXT: Version: 2 +# CHECK-NEXT: SymbolTable: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init1 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 0 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init2 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 1 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Kind: DATA +# CHECK-NEXT: Name: p_init1 +# CHECK-NEXT: Flags: [ ] +# CHECK-NEXT: Segment: 0 +# CHECK-NEXT: Size: 4 +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Kind: DATA +# CHECK-NEXT: Name: p_init2 +# CHECK-NEXT: Flags: [ ] +# CHECK-NEXT: Segment: 0 +# CHECK-NEXT: Offset: 4 +# CHECK-NEXT: Size: 8 +# CHECK-NEXT: SegmentInfo: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: .init_array +# CHECK-NEXT: Alignment: 2 +# CHECK-NEXT: Flags: [ ] +# CHECK-NEXT: InitFunctions: +# CHECK-NEXT: - Priority: 42 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 1 +# CHECK-NEXT: ... diff --git a/llvm/test/MC/WebAssembly/init-array.s b/llvm/test/MC/WebAssembly/init-array.s new file mode 100644 index 0000000000000..e79fb453ec12a --- /dev/null +++ b/llvm/test/MC/WebAssembly/init-array.s @@ -0,0 +1,49 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck %s + +init1: + .functype init1 () -> () + end_function + +init2: + .functype init2 () -> () + end_function + + .section .init_array,"",@ + .p2align 2, 0 + .int32 init1 + + .section .init_array,"",@ + .p2align 2 + .int32 init2 + +# CHECK: - Type: FUNCTION +# CHECK-NEXT: FunctionTypes: [ 0, 0 ] +# CHECK-NEXT: - Type: CODE +# CHECK-NEXT: Functions: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Type: CUSTOM +# CHECK-NEXT: Name: linking +# CHECK-NEXT: Version: 2 +# CHECK-NEXT: SymbolTable: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init1 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 0 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init2 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 1 +# CHECK-NEXT: InitFunctions: +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 1 +# CHECK-NEXT: ... +# diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll index 1400ee7f703ca..10c4c9b0ca4c9 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1121,19 +1121,19 @@ entry: define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) { ; LE-LABEL: @loadCombine_4consecutive_metadata( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0 -; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope [[META0:![0-9]+]] +; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_metadata( ; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 ; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 -; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META0:![0-9]+]] +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope [[META0]] +; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 ; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 ; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 @@ -1869,7 +1869,7 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert3( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 ; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; @@ -2088,7 +2088,7 @@ define i32 @loadCombine_4consecutive_badinsert6(ptr %p) { define void @nested_gep(ptr %p, ptr %dest) { ; LE-LABEL: @nested_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 @@ -2128,7 +2128,7 @@ define void @nested_gep(ptr %p, ptr %dest) { define void @bitcast_gep(ptr %p, ptr %dest) { ; LE-LABEL: @bitcast_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index 0aa6f9ecdf884..1b53c8f71222b 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1205,19 +1205,19 @@ entry: define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) { ; LE-LABEL: @loadCombine_4consecutive_metadata( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0 -; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope [[META0:![0-9]+]] +; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_metadata( ; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 ; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 -; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META0:![0-9]+]] +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope [[META0]] +; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 ; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 ; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 @@ -2005,7 +2005,7 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert3( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 ; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; @@ -2306,7 +2306,7 @@ define i64 @loadCombine_nonConstShift2(ptr %arg, i8 %b) { define void @nested_gep(ptr %p, ptr %dest) { ; LE-LABEL: @nested_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 @@ -2346,7 +2346,7 @@ define void @nested_gep(ptr %p, ptr %dest) { define void @bitcast_gep(ptr %p, ptr %dest) { ; LE-LABEL: @bitcast_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 @@ -2382,3 +2382,35 @@ define void @bitcast_gep(ptr %p, ptr %dest) { store i32 %trunc, ptr %dest, align 4 ret void } + +define i32 @loadcombine_consecutive_idx_64(ptr %data) { +; LE-LABEL: @loadcombine_consecutive_idx_64( +; LE-NEXT: entry: +; LE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DATA:%.*]], i64 2149675576 +; LE-NEXT: [[VAL_2:%.*]] = load i16, ptr [[TMP0]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[VAL_2]] to i32 +; LE-NEXT: ret i32 [[TMP1]] +; +; BE-LABEL: @loadcombine_consecutive_idx_64( +; BE-NEXT: entry: +; BE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA:%.*]], i64 2149675577 +; BE-NEXT: [[VAL:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; BE-NEXT: [[CONV:%.*]] = zext i8 [[VAL]] to i32 +; BE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2149675576 +; BE-NEXT: [[VAL_2:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1 +; BE-NEXT: [[CONV_2:%.*]] = zext i8 [[VAL_2]] to i32 +; BE-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV]], 8 +; BE-NEXT: [[OR:%.*]] = or disjoint i32 [[SHL]], [[CONV_2]] +; BE-NEXT: ret i32 [[OR]] +; +entry: + %arrayidx = getelementptr inbounds nuw i8, ptr %data, i64 2149675577 + %val = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %val to i32 + %arrayidx.2 = getelementptr inbounds nuw i8, ptr %data, i64 2149675576 + %val.2 = load i8, ptr %arrayidx.2, align 1 + %conv.2 = zext i8 %val.2 to i32 + %shl = shl nuw nsw i32 %conv, 8 + %or = or disjoint i32 %shl, %conv.2 + ret i32 %or +} diff --git a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll index 15e1d84372627..a04b06e1bf0a5 100644 --- a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll +++ b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll @@ -763,3 +763,47 @@ exit.2: %t.2 = icmp ult i32 %iv, %N ret i1 %t.2 } + +define i1 @test_non_dedicated_exit(i16 %n) { +; CHECK-LABEL: define i1 @test_non_dedicated_exit( +; CHECK-SAME: i16 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[N]], 1 +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[SUB:%.*]] = add nsw i16 [[N]], -1 +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i16 [[SUB]] to i32 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[EXT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[INDVAR_INC]] = add nuw nsw i32 [[INDVAR]], 1 +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[N]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %cond = icmp slt i16 %n, 1 + br i1 %cond, label %exit, label %loop.preheader + +loop.preheader: + %sub = add nsw i16 %n, -1 + %ext = zext nneg i16 %sub to i32 + br label %loop + +loop: + %indvar = phi i32 [ %indvar.inc, %loop.latch ], [ 0, %loop.preheader ] + %exitcond = icmp eq i32 %indvar, %ext + br i1 %exitcond, label %exit, label %loop.latch + +loop.latch: + %indvar.inc = add nuw nsw i32 %indvar, 1 + br label %loop + +exit: + %cmp = icmp sgt i16 %n, 0 + ret i1 %cmp +} diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll new file mode 100644 index 0000000000000..ae108a525223e --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=indvars < %s | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: define i32 @test() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 0, 3 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[XOR]], 329 +; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[MUL]] to i16 +; CHECK-NEXT: [[SEXT:%.*]] = shl i16 [[CONV]], 8 +; CHECK-NEXT: [[CONV1:%.*]] = ashr i16 [[SEXT]], 8 +; CHECK-NEXT: br label %[[LOOP_BODY:.*]] +; CHECK: [[LOOP_BODY]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CONV3:%.*]] = zext i16 [[CONV1]] to i32 +; CHECK-NEXT: ret i32 [[CONV3]] +; +entry: + %xor = xor i32 0, 3 + %mul = mul i32 %xor, 329 + %conv = trunc i32 %mul to i16 + %sext = shl i16 %conv, 8 + %conv1 = ashr i16 %sext, 8 + %conv3 = zext i16 %conv1 to i32 + br label %loop.body + +loop.body: + %indvar = phi i32 [ %indvar.inc, %loop.body ], [ 1, %entry ] + %indvar.inc = add nuw i32 %indvar, 1 + %exitcond = icmp eq i32 %indvar, %conv3 + br i1 %exitcond, label %exit, label %loop.body + +exit: + ret i32 %conv3 +} diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll index 4631b81cd1ce1..cd998bac3f9f0 100644 --- a/llvm/test/Transforms/InstCombine/ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -578,3 +578,16 @@ define ptr @ptrmask_is_useless_fail1(i64 %i, i64 %m) { %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) ret ptr %r } + +@GC_arrays = external global { i8, i8, i64 } + +define ptr @ptrmask_demandedbits_constantexpr() { +; CHECK-LABEL: define ptr @ptrmask_demandedbits_constantexpr() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr nonnull @GC_arrays, i64 -8) +; CHECK-NEXT: ret ptr [[ALIGNED_RESULT]] +; +entry: + %aligned_result = call ptr @llvm.ptrmask.p0.i64(ptr getelementptr inbounds (i8, ptr @GC_arrays, i64 1), i64 -8) + ret ptr %aligned_result +} diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll index 1c381d0839071..63caec9501325 100644 --- a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll +++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll @@ -15,6 +15,22 @@ entry: ret i32 %res } +; Make sure that noundef is dropped. + +define i32 @shl_cttz_false_noundef(i32 %x, i32 %y) { +; CHECK-LABEL: define i32 @shl_cttz_false_noundef( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[Y]], i1 true) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X]], [[CTTZ]] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %cttz = call noundef i32 @llvm.cttz.i32(i32 %y, i1 false) + %res = shl i32 %x, %cttz + ret i32 %res +} + define i32 @shl_ctlz_false(i32 %x, i32 %y) { ; CHECK-LABEL: define i32 @shl_ctlz_false( ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { diff --git a/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll new file mode 100644 index 0000000000000..a040c3cc6947c --- /dev/null +++ b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='loop-mssa(simple-loop-unswitch,licm)' -verify-memoryssa -S < %s | FileCheck %s + +; Check that running LICM after SimpleLoopUnswitch does not result in a crash. + +define i32 @foo(i1 %arg, ptr %arg1) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[ARG_FR:%.*]] = freeze i1 [[ARG]] +; CHECK-NEXT: br i1 [[ARG_FR]], label %[[START_SPLIT_US:.*]], label %[[START_SPLIT:.*]] +; CHECK: [[START_SPLIT_US]]: +; CHECK-NEXT: br label %[[LOOP_US:.*]] +; CHECK: [[LOOP_US]]: +; CHECK-NEXT: br label %[[BB0:.*]] +; CHECK: [[BB0]]: +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi ptr [ [[ARG1]], %[[BB0]] ] +; CHECK-NEXT: [[I3_US:%.*]] = call i32 [[UNSWITCHED_SELECT_US]]() +; CHECK-NEXT: br i1 true, label %[[LOOP_US]], label %[[RET_SPLIT_US:.*]] +; CHECK: [[RET_SPLIT_US]]: +; CHECK-NEXT: [[I3_LCSSA_US:%.*]] = phi i32 [ [[I3_US]], %[[BB1]] ] +; CHECK-NEXT: br label %[[RET:.*]] +; CHECK: [[START_SPLIT]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[RET_SPLIT:.*]] +; CHECK: [[RET_SPLIT]]: +; CHECK-NEXT: [[I3_LE:%.*]] = call i32 @bar() +; CHECK-NEXT: br label %[[RET]] +; CHECK: [[RET]]: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[I3_LE]], %[[RET_SPLIT]] ], [ [[I3_LCSSA_US]], %[[RET_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +start: + br label %loop + +loop: ; preds = %loop, %bb + %i = select i1 %arg, ptr %arg1, ptr @bar + %i3 = call i32 %i() + br i1 %arg, label %loop, label %ret + +ret: ; preds = %loop + ret i32 %i3 +} + +declare i32 @bar() nounwind willreturn memory(none) diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll new file mode 100644 index 0000000000000..51b635837d3b5 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s + +define i32 @test(i32 %n) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 +; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[RES1]] +; +entry: + %n1 = add i32 %n, 1 + %zn1 = zext nneg i32 %n1 to i64 + %m1 = mul nuw nsw i64 %zn1, 273837369 + %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true) + %t1 = trunc i64 %a1 to i32 + %n2 = add i32 %n, 2 + %zn2 = zext nneg i32 %n2 to i64 + %m2 = mul nuw nsw i64 %zn2, 273837369 + %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true) + %t2 = trunc i64 %a2 to i32 + %res1 = add i32 %t1, %t2 + ret i32 %res1 +} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll new file mode 100644 index 0000000000000..fd61cfab164d3 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='print,simple-loop-unswitch,print' -verify-scev < %s 2>/dev/null | FileCheck %s + +; Make sure we don't assert due to insufficient SCEV invalidation. + +define void @test(ptr %p) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CHECK:%.*]] = icmp eq ptr [[P]], null +; CHECK-NEXT: br i1 [[CHECK]], label %[[ENTRY_SPLIT_US:.*]], label %[[ENTRY_SPLIT:.*]] +; CHECK: [[ENTRY_SPLIT_US]]: +; CHECK-NEXT: br label %[[BB0_US:.*]] +; CHECK: [[BB0_US]]: +; CHECK-NEXT: br label %[[LOOP0_US:.*]] +; CHECK: [[LOOP0_US]]: +; CHECK-NEXT: [[V_US:%.*]] = load atomic i32, ptr [[P]] unordered, align 8 +; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[V_US]], 3 +; CHECK-NEXT: br i1 true, label %[[PREHEADER_SPLIT_US:.*]], label %[[BB0_US]] +; CHECK: [[PREHEADER_SPLIT_US]]: +; CHECK-NEXT: [[ADD_LCSSA_US:%.*]] = phi i32 [ [[ADD_US]], %[[LOOP0_US]] ] +; CHECK-NEXT: br label %[[PREHEADER:.*]] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: br label %[[BB0:.*]] +; CHECK: [[BB0]]: +; CHECK-NEXT: br label %[[LATCH:.*]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: br i1 false, label %[[EXIT0:.*]], label %[[LOOP0:.*]] +; CHECK: [[EXIT0]]: +; CHECK-NEXT: ret void +; CHECK: [[LOOP0]]: +; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[P]] unordered, align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[V]], 3 +; CHECK-NEXT: br i1 true, label %[[PREHEADER_SPLIT:.*]], label %[[BB0]] +; CHECK: [[PREHEADER_SPLIT]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[LOOP0]] ] +; CHECK-NEXT: br label %[[PREHEADER]] +; CHECK: [[PREHEADER]]: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[ADD_LCSSA]], %[[PREHEADER_SPLIT]] ], [ [[ADD_LCSSA_US]], %[[PREHEADER_SPLIT_US]] ] +; CHECK-NEXT: br label %[[LOOP1:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[DOTUS_PHI]], %[[PREHEADER]] ], [ [[IV1_NEXT:%.*]], %[[BACKEDGE:.*]] ] +; CHECK-NEXT: [[IV1_NEXT]] = add i32 [[IV1]], -33 +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[BACKEDGE]]: +; CHECK-NEXT: br i1 true, label %[[EXIT1:.*]], label %[[LOOP1]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: [[IV0:%.*]] = phi i32 [ [[IV1]], %[[LOOP1]] ], [ [[IV0_NEXT:%.*]], %[[LOOP2]] ] +; CHECK-NEXT: [[IV0_NEXT]] = add nsw i32 [[IV0]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[IV0_NEXT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[BACKEDGE]], label %[[LOOP2]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void +; +entry: + %check = icmp eq ptr %p, null + br label %bb0 + +bb0: ; preds = %loop0, %entry + br i1 %check, label %loop0, label %latch + +latch: ; preds = %bb0 + br i1 %check, label %exit0, label %loop0 + +exit0: ; preds = %latch + ret void + +loop0: ; preds = %latch, %bb0 + %v = load atomic i32, ptr %p unordered, align 8 + %add = add i32 %v, 3 + br i1 true, label %preheader, label %bb0 + +preheader: ; preds = %loop0 + br label %loop1 + +loop1: ; preds = %backedge, %preheader + %iv1 = phi i32 [ %add, %preheader ], [ %iv1.next, %backedge ] + %iv1.next = add i32 %iv1, -33 + br label %loop2 + +backedge: ; preds = %loop2 + br i1 true, label %exit1, label %loop1 + +loop2: ; preds = %loop2, %loop1 + %iv0 = phi i32 [ %iv1, %loop1 ], [ %iv0.next, %loop2 ] + %iv0.next = add nsw i32 %iv0, 1 + %cmp = icmp sgt i32 %iv0.next, 0 + br i1 %cmp, label %backedge, label %loop2 + +exit1: ; preds = %backedge + ret void +} diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll new file mode 100644 index 0000000000000..d3713be8358db --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name pref --version 5 +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +define i32 @foo.1(i32 %arg, ptr %arg1) { +; CHECK-LABEL: define i32 @foo.1( +; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]]) { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x ptr], align 16 +; CHECK-NEXT: store ptr blockaddress(@foo.1, %[[BB8:.*]]), ptr [[ALLOCA]], align 16 +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: store ptr blockaddress(@foo.1, %[[BB16:.*]]), ptr [[GETELEMENTPTR]], align 8 +; CHECK-NEXT: br label %[[PREFBB2:.*]] +; CHECK: [[PREFBB2]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI14:%.*]], %[[BB13:.*]] ] +; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI15:%.*]], %[[BB13]] ] +; CHECK-NEXT: switch i32 [[PHI]], label %[[BB13]] [ +; CHECK-NEXT: i32 0, label %[[PREFBB18:.*]] +; CHECK-NEXT: i32 1, label %[[BB8]] +; CHECK-NEXT: i32 2, label %[[PREFBB11:.*]] +; CHECK-NEXT: ] +; CHECK: [[BB8]]: +; CHECK-NEXT: [[PHI10:%.*]] = phi i32 [ [[ARG]], %[[PREFBB18]] ], [ [[PHI3]], %[[PREFBB2]] ] +; CHECK-NEXT: br label %[[BB13]] +; CHECK: [[PREFBB11]]: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @wombat(i32 noundef [[PHI3]]) +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[PHI3]], 1 +; CHECK-NEXT: br label %[[PREFBB18]] +; CHECK: [[BB13]]: +; CHECK-NEXT: [[PHI14]] = phi i32 [ [[PHI]], %[[PREFBB2]] ], [ 2, %[[BB8]] ] +; CHECK-NEXT: [[PHI15]] = phi i32 [ [[PHI3]], %[[PREFBB2]] ], [ [[PHI10]], %[[BB8]] ] +; CHECK-NEXT: br label %[[PREFBB2]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[CALL17:%.*]] = call i32 @wombat(i32 noundef [[ARG]]) +; CHECK-NEXT: ret i32 0 +; CHECK: [[PREFBB18]]: +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG1]], align 8 +; CHECK-NEXT: indirectbr ptr [[LOAD]], [label %[[BB8]], label %bb16] +; +bb: + %alloca = alloca [2 x ptr], align 16 + store ptr blockaddress(@foo.1, %bb8), ptr %alloca, align 16 + %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1 + store ptr blockaddress(@foo.1, %bb16), ptr %getelementptr, align 8 + br label %bb2 + +bb2: ; preds = %bb13, %bb + %phi = phi i32 [ 0, %bb ], [ %phi14, %bb13 ] + %phi3 = phi i32 [ 0, %bb ], [ %phi15, %bb13 ] + switch i32 %phi, label %bb13 [ + i32 0, label %bb5 + i32 1, label %bb8 + i32 2, label %bb11 + ] + +bb5: ; preds = %bb2 + br label %bb18 + +bb8: ; preds = %bb18, %bb2 + %phi10 = phi i32 [ %arg, %bb18 ], [ %phi3, %bb2 ] + br label %bb13 + +bb11: ; preds = %bb2 + %call = call i32 @wombat(i32 noundef %phi3) + %add = add nsw i32 %phi3, 1 + br label %bb18 + +bb13: ; preds = %bb8, %bb2 + %phi14 = phi i32 [ %phi, %bb2 ], [ 2, %bb8 ] + %phi15 = phi i32 [ %phi3, %bb2 ], [ %phi10, %bb8 ] + br label %bb2 + +bb16: ; preds = %bb18 + %call17 = call i32 @wombat(i32 noundef %arg) + ret i32 0 + +bb18: ; preds = %bb11, %bb5 + %load = load ptr, ptr %arg1, align 8 + indirectbr ptr %load, [label %bb8, label %bb16] +} + +declare i32 @wombat(i32) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index a8cecca0d4a54..ca71569008d5e 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -3042,6 +3042,14 @@ static bool SimplifyTree(TreePatternNodePtr &N) { !N->getExtType(0).empty() && N->getExtType(0) == N->getChild(0).getExtType(0) && N->getName().empty()) { + if (!N->getPredicateCalls().empty()) { + std::string Str; + raw_string_ostream OS(Str); + OS << *N + << "\n trivial bitconvert node should not have predicate calls\n"; + PrintFatalError(Str); + return false; + } N = N->getChildShared(0); SimplifyTree(N); return true; diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 0c2804f70a147..c46d2abdb8ef2 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 19 llvm_version_minor = 1 -llvm_version_patch = 4 +llvm_version_patch = 6 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 81b74db977b08..ee0a3b2240e1e 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = "Daniel Dunbar" __email__ = "daniel@minormatter.com" -__versioninfo__ = (19, 1, 4) +__versioninfo__ = (19, 1, 6) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index 77fe60a0b1590..cec9ca8b2f648 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (19, 1, 4) +__versioninfo__ = (19, 1, 6) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev" diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt index 4cd97a6a5ff63..959d6260bc749 100644 --- a/offload/CMakeLists.txt +++ b/offload/CMakeLists.txt @@ -127,6 +127,7 @@ include(LibomptargetGetDependencies) # Set up testing infrastructure. include(OpenMPTesting) +include(CheckCXXCompilerFlag) check_cxx_compiler_flag(-Werror=global-constructors OFFLOAD_HAVE_WERROR_CTOR) # LLVM source tree is required at build time for libomptarget @@ -282,6 +283,26 @@ if(OPENMP_STANDALONE_BUILD) ${LLVM_LIBRARY_DIRS} REQUIRED ) + + find_path ( + LIBOMP_INCLUDE_DIR + NAMES + omp.h + HINTS + ${COMPILER_RESOURCE_DIR}/include + ${CMAKE_INSTALL_PREFIX}/include + ) + + get_filename_component(LIBOMP_LIBRARY_DIR ${LIBOMP_STANDALONE} DIRECTORY) + + set(OPENMP_TEST_FLAGS "" CACHE STRING + "Extra compiler flags to send to the test compiler.") + set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING + "OpenMP compiler flag to use for testing OpenMP runtime libraries.") + set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${LIBOMP_INCLUDE_DIR}" CACHE STRING + "Path to folder containing omp.h") + set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "${LIBOMP_LIBRARY_DIR}" CACHE STRING + "Path to folder containing libomp.so, and libLLVMSupport.so with profiling enabled") endif() macro(pythonize_bool var) diff --git a/offload/cmake/OpenMPTesting.cmake b/offload/cmake/OpenMPTesting.cmake index 11eafeb764260..3e04a3423c4d6 100644 --- a/offload/cmake/OpenMPTesting.cmake +++ b/offload/cmake/OpenMPTesting.cmake @@ -124,7 +124,7 @@ if (${OPENMP_STANDALONE_BUILD}) # project is built which is too late for detecting the compiler... file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler) execute_process( - COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_LIST_DIR}/DetectTestCompiler + COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_SOURCE_DIR}/../openmp/cmake/DetectTestCompiler -DCMAKE_C_COMPILER=${OPENMP_TEST_C_COMPILER} -DCMAKE_CXX_COMPILER=${OPENMP_TEST_CXX_COMPILER} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index 284f98875170c..aea20c6ec3143 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -11,13 +11,15 @@ add_dependencies(PluginCommon intrinsics_gen) # Only enable JIT for those targets that LLVM can support. set(supported_jit_targets AMDGPU NVPTX) -foreach(target IN LISTS supported_jit_targets) - if("${target}" IN_LIST LLVM_TARGETS_TO_BUILD) - target_compile_definitions(PluginCommon PRIVATE "LIBOMPTARGET_JIT_${target}") - llvm_map_components_to_libnames(llvm_libs ${target}) - target_link_libraries(PluginCommon PRIVATE ${llvm_libs}) - endif() -endforeach() +if (NOT LLVM_LINK_LLVM_DYLIB) + foreach(target IN LISTS supported_jit_targets) + if("${target}" IN_LIST LLVM_TARGETS_TO_BUILD) + target_compile_definitions(PluginCommon PRIVATE "LIBOMPTARGET_JIT_${target}") + llvm_map_components_to_libnames(llvm_libs ${target}) + target_link_libraries(PluginCommon PRIVATE ${llvm_libs}) + endif() + endforeach() +endif() # Include the RPC server from the `libc` project if availible. if(TARGET llvmlibc_rpc_server AND ${LIBOMPTARGET_GPU_LIBC_SUPPORT}) diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt index 3ac5d7907e2cc..c90ed26389faf 100644 --- a/offload/test/CMakeLists.txt +++ b/offload/test/CMakeLists.txt @@ -22,6 +22,11 @@ if(CUDAToolkit_FOUND) get_filename_component(CUDA_LIBDIR "${CUDA_cudart_static_LIBRARY}" DIRECTORY) endif() +set(OMP_DEPEND) +if(TARGET omp) + set(OMP_DEPEND omp) +endif() + string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}") foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS) string(STRIP "${CURRENT_TARGET}" CURRENT_TARGET) @@ -29,7 +34,7 @@ foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS) add_offload_testsuite(check-libomptarget-${CURRENT_TARGET} "Running libomptarget tests" ${CMAKE_CURRENT_BINARY_DIR}/${CURRENT_TARGET} - DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) list(APPEND LIBOMPTARGET_LIT_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CURRENT_TARGET}) @@ -43,12 +48,12 @@ add_offload_testsuite(check-libomptarget "Running libomptarget tests" ${LIBOMPTARGET_LIT_TESTSUITES} EXCLUDE_FROM_CHECK_ALL - DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) add_offload_testsuite(check-offload "Running libomptarget tests" ${LIBOMPTARGET_LIT_TESTSUITES} EXCLUDE_FROM_CHECK_ALL - DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST})