From 51dee6b64fda7c49a68af78555135c8f51a308cf Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Tue, 19 Nov 2024 13:43:39 +0100 Subject: [PATCH 01/38] Bump version to 19.1.5 --- cmake/Modules/LLVMVersion.cmake | 2 +- libcxx/include/__config | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- llvm/utils/mlgo-utils/mlgo/__init__.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 6ccb934aef436..9b39550118c49 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 4) + set(LLVM_VERSION_PATCH 5) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/libcxx/include/__config b/libcxx/include/__config index a929db5d0f2d1..33e0043136fee 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -27,7 +27,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190104 +# define _LIBCPP_VERSION 190105 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index 0c2804f70a147..c32a040dcba67 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 19 llvm_version_minor = 1 -llvm_version_patch = 4 +llvm_version_patch = 5 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 81b74db977b08..8557e5a061d1d 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = "Daniel Dunbar" __email__ = "daniel@minormatter.com" -__versioninfo__ = (19, 1, 4) +__versioninfo__ = (19, 1, 5) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index 77fe60a0b1590..e2f8ca88d91ec 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (19, 1, 4) +__versioninfo__ = (19, 1, 5) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev" From 02930b87faeb490505b22f588757a18744248b6f Mon Sep 17 00:00:00 2001 From: Manasij Mukherjee Date: Fri, 4 Oct 2024 15:15:30 -0600 Subject: [PATCH 02/38] [NVPTX] Promote v2i8 to v2i16 (#111189) Promote v2i8 to v2i16, fixes a crash. Re-enable a test in NVPTX/vector-returns.ll Partial cherry-pick of fda2fea w/o the test which does not exist in release/19.x https://github.com/llvm/llvm-project/issues/104864 --- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 6975412ce5d35..b2153a7afe736 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -229,6 +229,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL, // v*i8 are formally lowered as v4i8 EltVT = MVT::v4i8; NumElts = (NumElts + 3) / 4; + } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) { + // v2i8 is promoted to v2i16 + NumElts = 1; + EltVT = MVT::v2i16; } for (unsigned j = 0; j != NumElts; ++j) { ValueVTs.push_back(EltVT); From e032d7ad80f8d185626b39751f672932f92dc033 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Tue, 19 Nov 2024 13:37:40 +0530 Subject: [PATCH 03/38] [clang-repl] Improve flags responsible for generating shared wasm binaries (#116735) There are a couple changes in this PR that help getting clang-repl to run in the browser. Using a jupyterlite instance for the example pasted below 1) Updating flags responsible for generating shared wasm binaries that need to be dynamically loaded Most Importantly as can be seen in the changes `shared` and `allow-undefined` are crucial. ![image](https://github.com/user-attachments/assets/1183fd44-8951-496a-899a-e4af39a48447) 2) While exiting we encounter this. ![image](https://github.com/user-attachments/assets/9487a3f4-7200-471d-ba88-09e98ccbc47a) Now as can be seen here https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/Interpreter.cpp#L421-L430 We call cleanUP in the destructor. Now cleanUP through IncrementalExecutor tries to deinitialize the JIT which wasn't even intialized as runCtors in wasm.cpp is a no-op https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/IncrementalExecutor.cpp#L94-L101 https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/Wasm.cpp#L107-L109 (cherry picked from commit 752dbd6112affa418e33910ac08bf9921f9c270b) --- clang/lib/Interpreter/IncrementalExecutor.h | 2 +- clang/lib/Interpreter/Interpreter.cpp | 1 - clang/lib/Interpreter/Wasm.cpp | 10 ++++++++-- clang/lib/Interpreter/Wasm.h | 1 + 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index 7954cde36588b..dbd61f0b8b1eb 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -56,7 +56,7 @@ class IncrementalExecutor { virtual llvm::Error addModule(PartialTranslationUnit &PTU); virtual llvm::Error removeModule(PartialTranslationUnit &PTU); virtual llvm::Error runCtors() const; - llvm::Error cleanUp(); + virtual llvm::Error cleanUp(); llvm::Expected getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const; diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index b4882ab5d2236..c0b8bfebb4343 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -192,7 +192,6 @@ IncrementalCompilerBuilder::CreateCpp() { #ifdef __EMSCRIPTEN__ Argv.push_back("-target"); Argv.push_back("wasm32-unknown-emscripten"); - Argv.push_back("-pie"); Argv.push_back("-shared"); #endif Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp index 1001410aa0f27..79efbaa03982d 100644 --- a/clang/lib/Interpreter/Wasm.cpp +++ b/clang/lib/Interpreter/Wasm.cpp @@ -72,13 +72,13 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) { OutputFile.close(); std::vector LinkerArgs = {"wasm-ld", - "-pie", + "-shared", "--import-memory", "--no-entry", "--export-all", "--experimental-pic", - "--no-export-dynamic", "--stack-first", + "--allow-undefined", OutputFileName.c_str(), "-o", OutputFileName.c_str()}; @@ -109,6 +109,12 @@ llvm::Error WasmIncrementalExecutor::runCtors() const { return llvm::Error::success(); } +llvm::Error WasmIncrementalExecutor::cleanUp() const { + // Can't call cleanUp through IncrementalExecutor as it + // tries to deinitialize JIT which hasn't been initialized + return llvm::Error::success(); +} + WasmIncrementalExecutor::~WasmIncrementalExecutor() = default; } // namespace clang diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h index b1fd88024f14d..4632613326d39 100644 --- a/clang/lib/Interpreter/Wasm.h +++ b/clang/lib/Interpreter/Wasm.h @@ -28,6 +28,7 @@ class WasmIncrementalExecutor : public IncrementalExecutor { llvm::Error addModule(PartialTranslationUnit &PTU) override; llvm::Error removeModule(PartialTranslationUnit &PTU) override; llvm::Error runCtors() const override; + llvm::Error cleanUp() override; ~WasmIncrementalExecutor() override; }; From fb6b195cae03ba6e5b50870031d710ca6886c5bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Sun, 20 Oct 2024 13:51:50 +0300 Subject: [PATCH 04/38] [compiler-rt] [test] Remove an unintended grep parameter This parameter seems unintentional here; we're trying to grep the input on stdin, from the earlier stage in the pipeline. Since a recent update on Github Actions runners, the previous form (grepping a file, while piping in data on stdin) would fail running the test, with the test runner Python script throwing an exception when evaluating it: File "D:\a\llvm-mingw\llvm-mingw\llvm-project\llvm\utils\lit\lit\TestRunner.py", line 935, in _executeShCmd out = procs[i].stdout.read() ^^^^^^^^^^^^^^^^^^^^^^ File "C:\hostedtoolcache\windows\Python\3.12.7\x64\Lib\encodings\cp1252.py", line 23, in decode return codecs.charmap_decode(input,self.errors,decoding_table)[0] ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TypeError: a bytes-like object is required, not 'NoneType' (cherry picked from commit c2717a89b8437d041d532c7b2c535ca4f4b35872) --- compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp index 9277fe0b23516..38e99cf685945 100644 --- a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp +++ b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp @@ -9,7 +9,7 @@ // static build, there won't be any clang_rt DLLs. // RUN: not grep cl""ang_rt %t || \ // RUN: grep cl""ang_rt %t | xargs which | \ -// RUN: xargs llvm-readobj --coff-imports | not grep dbghelp.dll %t +// RUN: xargs llvm-readobj --coff-imports | not grep dbghelp.dll extern "C" int puts(const char *); From ea960400213ad6a619d40536ae8965fca70eac89 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 16 Nov 2024 20:55:33 -0800 Subject: [PATCH 05/38] [Mips] Change vsplat_imm_eq_1 to a ComplexPattern. (#116471) Resolves a FIXME and avoids needing to workaround #116075. Adding parentheses around the (vsplat_imm_eq_1) fixes the error cited in the FIXME by changing the ComplexPattern from a leaf node to an operator. (cherry picked from commit 2f4572f5e7e2d7f4626e825404c11f07d191fb05) --- llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp | 4 ++ llvm/lib/Target/Mips/MipsISelDAGToDAG.h | 3 ++ llvm/lib/Target/Mips/MipsMSAInstrInfo.td | 59 ++++++++------------- llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 12 +++++ llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h | 3 ++ 5 files changed, 43 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp index f6f32fde3b777..a9ffd2bedf21e 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -220,6 +220,10 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { return false; } +bool MipsDAGToDAGISel::selectVSplatImmEq1(SDValue N) const { + llvm_unreachable("Unimplemented function."); +} + /// Convert vector addition with vector subtraction if that allows to encode /// constant as an immediate and thus avoid extra 'ldi' instruction. /// add X, <-1, -1...> --> sub X, <1, 1...> diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h index 6135f96807854..3485300a782c9 100644 --- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h @@ -120,6 +120,9 @@ class MipsDAGToDAGISel : public SelectionDAGISel { /// starting at bit zero. virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; + /// Select constant vector splats whose value is 1. + virtual bool selectVSplatImmEq1(SDValue N) const; + /// Convert vector addition with vector subtraction if that allows to encode /// constant as an immediate and thus avoid extra 'ldi' instruction. /// add X, <-1, -1...> --> sub X, <1, 1...> diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td index c4abccb24c6f3..f4c32c9dcd421 100644 --- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td @@ -198,14 +198,8 @@ def vsplati32 : PatFrag<(ops node:$e0), (v4i32 (build_vector node:$e0, node:$e0, node:$e0, node:$e0))>; -def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{ - APInt Imm; - SDNode *BV = N->getOperand(0).getNode(); - EVT EltTy = N->getValueType(0).getVectorElementType(); - - return selectVSplat(BV, Imm, EltTy.getSizeInBits()) && - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -}]>; +// Any build_vector that is a constant splat with a value that equals 1 +def vsplat_imm_eq_1 : ComplexPattern; def vsplati64 : PatFrag<(ops node:$e0), (v2i64 (build_vector node:$e0, node:$e0))>; @@ -217,7 +211,7 @@ def vsplati64_splat_d : PatFrag<(ops node:$e0), node:$e0, node:$e0, node:$e0)), - vsplati64_imm_eq_1))))>; + (vsplat_imm_eq_1)))))>; def vsplatf32 : PatFrag<(ops node:$e0), (v4f32 (build_vector node:$e0, node:$e0, @@ -352,46 +346,35 @@ def vsplat_maskr_bits_uimm6 : SplatComplexPattern; -// Any build_vector that is a constant splat with a value that equals 1 -// FIXME: These should be a ComplexPattern but we can't use them because the -// ISel generator requires the uses to have a name, but providing a name -// causes other errors ("used in pattern but not operand list") -def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{ - APInt Imm; - EVT EltTy = N->getValueType(0).getVectorElementType(); - - return selectVSplat(N, Imm, EltTy.getSizeInBits()) && - Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1; -}]>; def vbclr_b : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_h : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_w : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>; + (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>; def vbclr_d : PatFrag<(ops node:$ws, node:$wt), - (and node:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1), + (and node:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)), node:$wt)))>; def vbneg_b : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_h : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_w : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbneg_d : PatFrag<(ops node:$ws, node:$wt), - (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (xor node:$ws, (shl (v2i64 (vsplat_imm_eq_1)), node:$wt))>; def vbset_b : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_h : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_w : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>; + (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>; def vbset_d : PatFrag<(ops node:$ws, node:$wt), - (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1), + (or node:$ws, (shl (v2i64 (vsplat_imm_eq_1)), node:$wt))>; def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt), @@ -3842,7 +3825,7 @@ class MSAShiftPat : (VT (Insn VT:$ws, VT:$wt))>; class MSABitPat : - MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))), + MSAPat<(VT (Node VT:$ws, (shl (vsplat_imm_eq_1), (Frag VT:$wt)))), (VT (Insn VT:$ws, VT:$wt))>; multiclass MSAShiftPats { @@ -3861,7 +3844,7 @@ multiclass MSABitPats { def : MSABitPat(Insn#_B), vsplati8imm7>; def : MSABitPat(Insn#_H), vsplati16imm15>; def : MSABitPat(Insn#_W), vsplati32imm31>; - def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1), + def : MSAPat<(Node v2i64:$ws, (shl (v2i64 (vsplat_imm_eq_1)), (vsplati64imm63 v2i64:$wt))), (v2i64 (!cast(Insn#_D) v2i64:$ws, v2i64:$wt))>; } @@ -3872,16 +3855,16 @@ defm : MSAShiftPats; defm : MSABitPats; defm : MSABitPats; -def : MSAPat<(and v16i8:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v16i8:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati8imm7 v16i8:$wt)))), (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>; -def : MSAPat<(and v8i16:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v8i16:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati16imm15 v8i16:$wt)))), (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>; -def : MSAPat<(and v4i32:$ws, (vnot (shl vsplat_imm_eq_1, +def : MSAPat<(and v4i32:$ws, (vnot (shl (vsplat_imm_eq_1), (vsplati32imm31 v4i32:$wt)))), (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>; -def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1), +def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)), (vsplati64imm63 v2i64:$wt)))), (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>; diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 7ad300c6cccd4..66c034a889c60 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -730,6 +730,18 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N, return false; } +// Select const vector splat of 1. +bool MipsSEDAGToDAGISel::selectVSplatImmEq1(SDValue N) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + return selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) && + ImmValue.getBitWidth() == EltTy.getSizeInBits() && ImmValue == 1; +} + bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h index 7b843b0e0b255..22d8e924ac534 100644 --- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -124,6 +124,9 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel { /// starting at bit zero. bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override; + /// Select constant vector splats whose value is 1. + bool selectVSplatImmEq1(SDValue N) const override; + bool trySelect(SDNode *Node) override; // Emits proper ABI for _mcount profiling calls. From 3d12f45e50b68ac908ef05571e5cc52f4b966d94 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Nov 2024 21:24:40 +0800 Subject: [PATCH 06/38] [SDAG][ISel][TableGen][LoongArch] Report error for trivial bitcasts when there are predicate calls (#116075) On loongarch64 with lsx extension, we select `VBITREV_W` for `v4i32 (xor X, (shl splat(1), Y))`: https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td#L1583-L1584 And `vsplat_imm_eq_1` is defined as: https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td#L77-L87 For the `(bitconvert (v4i32 (build_vector)))` case, the pattern is expected to be: ``` PATTERN: (xor:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, (shl:{ *:[v4i32] } (bitconvert:{ *:[v4i32] } (build_vector:{ *:[v4i32] }))<>, v4i32:{ *:[v4i32] }:$vk)) RESULT: (VBITREV_W:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, v4i32:{ *:[v4i32] }:$vk) ``` However, `simplifyTree` drops the `bitconvert` node and its predicates: https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp#L3036-L3062 Then llvm will match `vsplat_imm_eq_1` for any v4i32 splats and cause a miscompilation: ``` PATTERN: (xor:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, (shl:{ *:[v4i32] } (build_vector:{ *:[v4i32] }), v4i32:{ *:[v4i32] }:$vk)) RESULT: (VBITREV_W:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, v4i32:{ *:[v4i32] }:$vk) ``` This patch adds additional checks for predicates associated with the trivial bitconvert node. Unused patterns in the LoongArch target are also removed. Fixes https://github.com/llvm/llvm-project/issues/116008. (cherry picked from commit c727b48287cc96888f9e262f23d53cf635cf3b3d) --- .../Target/LoongArch/LoongArchLSXInstrInfo.td | 6 ++---- llvm/test/CodeGen/LoongArch/lsx/pr116008.ll | 17 +++++++++++++++++ .../TableGen/Common/CodeGenDAGPatterns.cpp | 8 ++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/lsx/pr116008.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 0580683c3ce30..0233baecf6dd9 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -67,8 +67,7 @@ class VecCondgetValueType(0).getVectorElementType(); @@ -109,8 +108,7 @@ def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{ return selectVSplat(N, Imm, EltTy.getSizeInBits()) && Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31; }]>; -def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector), - (bitconvert (v4i32 (build_vector)))], [{ +def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector)], [{ APInt Imm; EVT EltTy = N->getValueType(0).getVectorElementType(); diff --git a/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll new file mode 100644 index 0000000000000..ba8ffc3493189 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s + +define <4 x i32> @xor_shl_splat_vec_one(i32 %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: xor_shl_splat_vec_one: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vreplgr2vr.w $vr1, $a0 +; CHECK-NEXT: vsll.w $vr0, $vr1, $vr0 +; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 0 +; CHECK-NEXT: ret +entry: + %ins = insertelement <4 x i32> poison, i32 %x, i64 0 + %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer + %shl = shl <4 x i32> %splat, %y + %xor = xor <4 x i32> %shl, splat (i32 1) + ret <4 x i32> %xor +} diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index a8cecca0d4a54..ca71569008d5e 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -3042,6 +3042,14 @@ static bool SimplifyTree(TreePatternNodePtr &N) { !N->getExtType(0).empty() && N->getExtType(0) == N->getChild(0).getExtType(0) && N->getName().empty()) { + if (!N->getPredicateCalls().empty()) { + std::string Str; + raw_string_ostream OS(Str); + OS << *N + << "\n trivial bitconvert node should not have predicate calls\n"; + PrintFatalError(Str); + return false; + } N = N->getChildShared(0); SimplifyTree(N); return true; From f9ae37c670d4bcf4713278ac94d2c8991a326f9e Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Nov 2024 22:17:24 +0800 Subject: [PATCH 07/38] [InstCombine] Handle constant GEP expr in `SimplifyDemandedUseBits` (#116794) Closes https://github.com/llvm/llvm-project/issues/116775. (cherry picked from commit 03d8831fa8ef5b7e32172c718b550a454645faea) --- .../InstCombine/InstCombineSimplifyDemanded.cpp | 2 +- llvm/test/Transforms/InstCombine/ptrmask.ll | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8a6ec3076ac62..b9d06b5936850 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1004,7 +1004,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I, uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex; if (MaskedGEPIndex != GEPIndex) { - auto *GEP = cast(II->getArgOperand(0)); + auto *GEP = cast(II->getArgOperand(0)); Builder.SetInsertPoint(I); Type *GEPIndexType = DL.getIndexType(GEP->getPointerOperand()->getType()); diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll index 4631b81cd1ce1..cd998bac3f9f0 100644 --- a/llvm/test/Transforms/InstCombine/ptrmask.ll +++ b/llvm/test/Transforms/InstCombine/ptrmask.ll @@ -578,3 +578,16 @@ define ptr @ptrmask_is_useless_fail1(i64 %i, i64 %m) { %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0) ret ptr %r } + +@GC_arrays = external global { i8, i8, i64 } + +define ptr @ptrmask_demandedbits_constantexpr() { +; CHECK-LABEL: define ptr @ptrmask_demandedbits_constantexpr() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALIGNED_RESULT:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr nonnull @GC_arrays, i64 -8) +; CHECK-NEXT: ret ptr [[ALIGNED_RESULT]] +; +entry: + %aligned_result = call ptr @llvm.ptrmask.p0.i64(ptr getelementptr inbounds (i8, ptr @GC_arrays, i64 1), i64 -8) + ret ptr %aligned_result +} From e80925b5eb4c824fe97a055d49faa586de16c2b9 Mon Sep 17 00:00:00 2001 From: Alexey Karyakin Date: Tue, 19 Nov 2024 09:27:01 -0600 Subject: [PATCH 08/38] [lld][Hexagon] Fix R_HEX_B22_PCREL range checks (#115925) Range checks for R_HEX_B22_PCREL did not account for the fact that offset is measured in instructions, not bytes. Add a test for all range-checked relocations. --- lld/ELF/Arch/Hexagon.cpp | 2 +- lld/test/ELF/hexagon-jump-error.s | 2 +- lld/test/ELF/hexagon.s | 41 ++++++++++++++++++++++++++++++- 3 files changed, 42 insertions(+), 3 deletions(-) diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp index 56cf96fd17704..8bcd28309f8b3 100644 --- a/lld/ELF/Arch/Hexagon.cpp +++ b/lld/ELF/Arch/Hexagon.cpp @@ -329,7 +329,7 @@ void Hexagon::relocate(uint8_t *loc, const Relocation &rel, case R_HEX_B22_PCREL: case R_HEX_GD_PLT_B22_PCREL: case R_HEX_PLT_B22_PCREL: - checkInt(loc, val, 22, rel); + checkInt(loc, val, 24, rel); or32le(loc, applyMask(0x1ff3ffe, val >> 2)); break; case R_HEX_B22_PCREL_X: diff --git a/lld/test/ELF/hexagon-jump-error.s b/lld/test/ELF/hexagon-jump-error.s index fec873827e573..53860b5daf2b1 100644 --- a/lld/test/ELF/hexagon-jump-error.s +++ b/lld/test/ELF/hexagon-jump-error.s @@ -25,7 +25,7 @@ if (p0) jump #1f .section b15, "ax" 1: -# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-2097152, 2097151] +# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-8388608, 8388607] jump #1f .space (1<<23) .section b22, "ax" diff --git a/lld/test/ELF/hexagon.s b/lld/test/ELF/hexagon.s index 8ef9b8eead8f1..b1576fb47d81a 100644 --- a/lld/test/ELF/hexagon.s +++ b/lld/test/ELF/hexagon.s @@ -1,7 +1,9 @@ # REQUIRES: hexagon # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %S/Inputs/hexagon.s -o %t1.o -# RUN: ld.lld %t.o %t1.o -o %t +# RUN: ld.lld %t.o %t1.o -o %t --Ttext=0x200b4 --section-start=b_1000000=0x1000000 \ +# RUN: --section-start=b_1000400=0x1000400 --section-start=b_1004000=0x1004000 \ +# RUN: --section-start=b_1010000=0x1010000 --section-start=b_1800000=0x1800000 # RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s # Note: 131584 == 0x20200 @@ -221,3 +223,40 @@ r0 = memw(r1+##_start) memw(r0+##_start) = r1 # CHECK: memw(r0+##131644) = r1 + + +## Tests for maximum branch ranges reachable without trampolines. + +.section b_1000000, "ax" +## The nop makes sure the first jump is within range. +nop +{ r0 = #0; jump #b_1000400 } // R_HEX_B9_PCREL +if (r0==#0) jump:t #b_1004000 // R_HEX_B13_PCREL +if (p0) jump #b_1010000 // R_HEX_B15_PCREL +jump #b_1800000 // R_HEX_B22_PCREL + +.section b_1000400, "ax" +nop + +.section b_1004000, "ax" +nop + +.section b_1010000, "ax" +nop + +.section b_1800000, "ax" +nop + +## Make sure we got the right relocations. +# RUN: llvm-readelf -r %t.o | FileCheck %s --check-prefix=REL +# REL: R_HEX_B9_PCREL 00000000 b_1000400 +# REL: R_HEX_B13_PCREL 00000000 b_1004000 +# REL: R_HEX_B15_PCREL 00000000 b_1010000 +# REL: R_HEX_B22_PCREL 00000000 b_1800000 + +# CHECK: 01000000 : +# CHECK-NEXT: 1000000: {{.*}} { nop } +# CHECK-NEXT: 1000004: {{.*}} { r0 = #0 ; jump 0x1000400 } +# CHECK-NEXT: 1000008: {{.*}} { if (r0==#0) jump:t 0x1004000 } +# CHECK-NEXT: 100000c: {{.*}} { if (p0) jump:nt 0x1010000 } +# CHECK-NEXT: 1000010: {{.*}} { jump 0x1800000 } From 5bd0474d1c45d58e472f25bf8292570ac78b5c15 Mon Sep 17 00:00:00 2001 From: DianQK Date: Wed, 20 Nov 2024 19:52:51 +0800 Subject: [PATCH 09/38] [LICM] allow MemoryAccess creation failure (#116813) Fixes #116809. After running some passes (SimpleLoopUnswitch, LoopInstSimplify, etc.), MemorySSA might be outdated, and the instruction `I` may have become a non-memory touching instruction. LICM has already handled this, but it does not pass `CreationMustSucceed=false` to `createDefinedAccess`. (cherry picked from commit 18b02bbf441660683df7f3925946984203d49bab) --- llvm/include/llvm/Analysis/MemorySSAUpdater.h | 5 ++ llvm/lib/Analysis/MemorySSAUpdater.cpp | 13 ++++- llvm/lib/Transforms/Scalar/LICM.cpp | 5 +- .../LICM/PR116813-memoryssa-outdated.ll | 50 +++++++++++++++++++ 4 files changed, 70 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h index d4da3ef1146db..f598dedea75fd 100644 --- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h +++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h @@ -192,6 +192,11 @@ class MemorySSAUpdater { const BasicBlock *BB, MemorySSA::InsertionPlace Point); + MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition, + const BasicBlock *BB, + MemorySSA::InsertionPlace Point, + bool CreationMustSucceed); + /// Create a MemoryAccess in MemorySSA before an existing MemoryAccess. /// /// See createMemoryAccessInBB() for usage details. diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp index aa550f0b6a7bf..94061c949b7f8 100644 --- a/llvm/lib/Analysis/MemorySSAUpdater.cpp +++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp @@ -1404,8 +1404,17 @@ void MemorySSAUpdater::changeToUnreachable(const Instruction *I) { MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, MemorySSA::InsertionPlace Point) { - MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition); - MSSA->insertIntoListsForBlock(NewAccess, BB, Point); + return createMemoryAccessInBB(I, Definition, BB, Point, + /*CreationMustSucceed=*/true); +} + +MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB( + Instruction *I, MemoryAccess *Definition, const BasicBlock *BB, + MemorySSA::InsertionPlace Point, bool CreationMustSucceed) { + MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess( + I, Definition, /*Template=*/nullptr, CreationMustSucceed); + if (NewAccess) + MSSA->insertIntoListsForBlock(NewAccess, BB, Point); return NewAccess; } diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp index 91ef2b4b7c183..ca03eff7a4e25 100644 --- a/llvm/lib/Transforms/Scalar/LICM.cpp +++ b/llvm/lib/Transforms/Scalar/LICM.cpp @@ -1464,8 +1464,11 @@ static Instruction *cloneInstructionInExitBlock( if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) { // Create a new MemoryAccess and let MemorySSA set its defining access. + // After running some passes, MemorySSA might be outdated, and the + // instruction `I` may have become a non-memory touching instruction. MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB( - New, nullptr, New->getParent(), MemorySSA::Beginning); + New, nullptr, New->getParent(), MemorySSA::Beginning, + /*CreationMustSucceed=*/false); if (NewMemAcc) { if (auto *MemDef = dyn_cast(NewMemAcc)) MSSAU.insertDef(MemDef, /*RenameUses=*/true); diff --git a/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll new file mode 100644 index 0000000000000..a040c3cc6947c --- /dev/null +++ b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll @@ -0,0 +1,50 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes='loop-mssa(simple-loop-unswitch,licm)' -verify-memoryssa -S < %s | FileCheck %s + +; Check that running LICM after SimpleLoopUnswitch does not result in a crash. + +define i32 @foo(i1 %arg, ptr %arg1) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) { +; CHECK-NEXT: [[START:.*:]] +; CHECK-NEXT: [[ARG_FR:%.*]] = freeze i1 [[ARG]] +; CHECK-NEXT: br i1 [[ARG_FR]], label %[[START_SPLIT_US:.*]], label %[[START_SPLIT:.*]] +; CHECK: [[START_SPLIT_US]]: +; CHECK-NEXT: br label %[[LOOP_US:.*]] +; CHECK: [[LOOP_US]]: +; CHECK-NEXT: br label %[[BB0:.*]] +; CHECK: [[BB0]]: +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[UNSWITCHED_SELECT_US:%.*]] = phi ptr [ [[ARG1]], %[[BB0]] ] +; CHECK-NEXT: [[I3_US:%.*]] = call i32 [[UNSWITCHED_SELECT_US]]() +; CHECK-NEXT: br i1 true, label %[[LOOP_US]], label %[[RET_SPLIT_US:.*]] +; CHECK: [[RET_SPLIT_US]]: +; CHECK-NEXT: [[I3_LCSSA_US:%.*]] = phi i32 [ [[I3_US]], %[[BB1]] ] +; CHECK-NEXT: br label %[[RET:.*]] +; CHECK: [[START_SPLIT]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: br label %[[BB2:.*]] +; CHECK: [[BB2]]: +; CHECK-NEXT: br i1 false, label %[[LOOP]], label %[[RET_SPLIT:.*]] +; CHECK: [[RET_SPLIT]]: +; CHECK-NEXT: [[I3_LE:%.*]] = call i32 @bar() +; CHECK-NEXT: br label %[[RET]] +; CHECK: [[RET]]: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[I3_LE]], %[[RET_SPLIT]] ], [ [[I3_LCSSA_US]], %[[RET_SPLIT_US]] ] +; CHECK-NEXT: ret i32 [[DOTUS_PHI]] +; +start: + br label %loop + +loop: ; preds = %loop, %bb + %i = select i1 %arg, ptr %arg1, ptr @bar + %i3 = call i32 %i() + br i1 %arg, label %loop, label %ret + +ret: ; preds = %loop + ret i32 %i3 +} + +declare i32 @bar() nounwind willreturn memory(none) From 336f87753b510aed840daf87f8d3a4996e6c8f15 Mon Sep 17 00:00:00 2001 From: wanglei Date: Thu, 21 Nov 2024 09:31:12 +0800 Subject: [PATCH 10/38] [LoongArch] Fix GOT usage for `non-dso_local` function calls in large code model This commit fixes an issue in the large code model where non-dso_local function calls did not use the GOT as expected in PIC mode. Instead, direct PC-relative access was incorrectly applied, leading to linker errors when building shared libraries. For `ExternalSymbol`, it is not possible to determine whether it is dso_local during pseudo-instruction expansion. We use target flags to differentiate whether GOT should be used. Cherry-picked from #117099, used for fix linker errors when bulding shared libraries with large code model. --- .../LoongArch/LoongArchExpandPseudoInsts.cpp | 2 +- llvm/test/CodeGen/LoongArch/code-models.ll | 10 ++--- .../LoongArch/machinelicm-address-pseudos.ll | 20 +++++----- .../LoongArch/psabi-restricted-scheduling.ll | 40 +++++++++---------- llvm/test/CodeGen/LoongArch/tls-models.ll | 20 +++++----- 5 files changed, 46 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp index c136f5b3e515d..e680dda7374d0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp @@ -721,7 +721,7 @@ bool LoongArchExpandPseudo::expandFunctionCALL( IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL; Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1; - bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal(); + bool UseGOT = Func.getTargetFlags() == LoongArchII::MO_CALL_PLT; unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO; unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D; expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg, diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll index 4b2b72afaee17..4eb1e5e596fd3 100644 --- a/llvm/test/CodeGen/LoongArch/code-models.ll +++ b/llvm/test/CodeGen/LoongArch/code-models.ll @@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) { ; LARGE-NEXT: .cfi_offset 1, -8 ; LARGE-NEXT: ori $a2, $zero, 1000 ; LARGE-NEXT: move $a1, $zero -; LARGE-NEXT: pcalau12i $ra, %pc_hi20(memset) -; LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(memset) -; LARGE-NEXT: lu32i.d $t8, %pc64_lo20(memset) -; LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(memset) -; LARGE-NEXT: add.d $ra, $t8, $ra +; LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(memset) +; LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(memset) +; LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(memset) +; LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(memset) +; LARGE-NEXT: ldx.d $ra, $t8, $ra ; LARGE-NEXT: jirl $ra, $ra, 0 ; LARGE-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LARGE-NEXT: addi.d $sp, $sp, 16 diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll index ed1a24e82b4e4..29348fe0d641e 100644 --- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll +++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll @@ -282,11 +282,11 @@ define void @test_la_tls_ld(i32 signext %n) { ; LA64LARGE-NEXT: .LBB3_1: # %loop ; LA64LARGE-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64LARGE-NEXT: move $a0, $s0 -; LA64LARGE-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGE-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGE-NEXT: add.d $ra, $t8, $ra +; LA64LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGE-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGE-NEXT: jirl $ra, $ra, 0 ; LA64LARGE-NEXT: ld.w $zero, $a0, 0 ; LA64LARGE-NEXT: addi.w $s1, $s1, 1 @@ -448,11 +448,11 @@ define void @test_la_tls_gd(i32 signext %n) nounwind { ; LA64LARGE-NEXT: .LBB5_1: # %loop ; LA64LARGE-NEXT: # =>This Inner Loop Header: Depth=1 ; LA64LARGE-NEXT: move $a0, $s0 -; LA64LARGE-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGE-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGE-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGE-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGE-NEXT: add.d $ra, $t8, $ra +; LA64LARGE-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGE-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGE-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGE-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGE-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGE-NEXT: jirl $ra, $ra, 0 ; LA64LARGE-NEXT: ld.w $zero, $a0, 0 ; LA64LARGE-NEXT: addi.w $s1, $s1, 1 diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll index 6a15d3a9cda30..75f494f32e476 100644 --- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll +++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll @@ -105,11 +105,11 @@ define void @foo() nounwind { ; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(gd) ; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(gd) ; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_NO_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_NO_SCH-NEXT: ld.d $zero, $a0, 0 ; LARGE_NO_SCH-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) @@ -117,11 +117,11 @@ define void @foo() nounwind { ; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ; LARGE_NO_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_NO_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_NO_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_NO_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_NO_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_NO_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_NO_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_NO_SCH-NEXT: pcalau12i $a1, %ie_pc_hi20(ie) ; LARGE_NO_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) @@ -162,11 +162,11 @@ define void @foo() nounwind { ; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(gd) ; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(gd) ; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_SCH-NEXT: ld.d $zero, $a0, 0 ; LARGE_SCH-NEXT: pcalau12i $a0, %ld_pc_hi20(ld) @@ -174,11 +174,11 @@ define void @foo() nounwind { ; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ; LARGE_SCH-NEXT: add.d $a0, $t8, $a0 -; LARGE_SCH-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LARGE_SCH-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LARGE_SCH-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LARGE_SCH-NEXT: add.d $ra, $t8, $ra +; LARGE_SCH-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LARGE_SCH-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LARGE_SCH-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LARGE_SCH-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LARGE_SCH-NEXT: ldx.d $ra, $t8, $ra ; LARGE_SCH-NEXT: jirl $ra, $ra, 0 ; LARGE_SCH-NEXT: pcalau12i $a1, %ie_pc_hi20(ie) ; LARGE_SCH-NEXT: addi.d $t8, $zero, %ie_pc_lo12(ie) diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll index bb89794d1c843..04600ffeb37ee 100644 --- a/llvm/test/CodeGen/LoongArch/tls-models.ll +++ b/llvm/test/CodeGen/LoongArch/tls-models.ll @@ -55,11 +55,11 @@ define ptr @f1() nounwind { ; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(unspecified) ; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(unspecified) ; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 -; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra +; LA64LARGEPIC-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 @@ -169,11 +169,11 @@ define ptr @f2() nounwind { ; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(ld) ; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(ld) ; LA64LARGEPIC-NEXT: add.d $a0, $t8, $a0 -; LA64LARGEPIC-NEXT: pcalau12i $ra, %pc_hi20(__tls_get_addr) -; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %pc_lo12(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu32i.d $t8, %pc64_lo20(__tls_get_addr) -; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr) -; LA64LARGEPIC-NEXT: add.d $ra, $t8, $ra +; LA64LARGEPIC-NEXT: pcalau12i $ra, %got_pc_hi20(__tls_get_addr) +; LA64LARGEPIC-NEXT: addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu32i.d $t8, %got64_pc_lo20(__tls_get_addr) +; LA64LARGEPIC-NEXT: lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr) +; LA64LARGEPIC-NEXT: ldx.d $ra, $t8, $ra ; LA64LARGEPIC-NEXT: jirl $ra, $ra, 0 ; LA64LARGEPIC-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload ; LA64LARGEPIC-NEXT: addi.d $sp, $sp, 16 From 11be11b8773bf63abe2c8da72db3ee7c25af524b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 21 Nov 2024 17:23:04 +0800 Subject: [PATCH 11/38] [SCEV] Fix sext handling for `getConstantMultiple` (#117093) Counterexample: 219 is a multiple of 73. But `sext i8 219 to i16 = 65499` is not. Fixes https://github.com/llvm/llvm-project/issues/116483. (cherry picked from commit 458dfbd855806461b4508bf8845cafe0411dbfd4) --- llvm/lib/Analysis/ScalarEvolution.cpp | 4 ++- .../test/Analysis/ScalarEvolution/pr116483.ll | 26 ++++++++++++++ .../Transforms/IndVarSimplify/pr116483.ll | 36 +++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Analysis/ScalarEvolution/pr116483.ll create mode 100644 llvm/test/Transforms/IndVarSimplify/pr116483.ll diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 51cffac808768..412cfe73d3e55 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -6313,8 +6313,10 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) { return getConstantMultiple(Z->getOperand()).zext(BitWidth); } case scSignExtend: { + // Only multiples that are a power of 2 will hold after sext. const SCEVSignExtendExpr *E = cast(S); - return getConstantMultiple(E->getOperand()).sext(BitWidth); + uint32_t TZ = getMinTrailingZeros(E->getOperand()); + return GetShiftedByZeros(TZ); } case scMulExpr: { const SCEVMulExpr *M = cast(S); diff --git a/llvm/test/Analysis/ScalarEvolution/pr116483.ll b/llvm/test/Analysis/ScalarEvolution/pr116483.ll new file mode 100644 index 0000000000000..cc2334e9c64f9 --- /dev/null +++ b/llvm/test/Analysis/ScalarEvolution/pr116483.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -disable-output "-passes=print" < %s 2>&1 | FileCheck %s + +define i16 @test() { +; CHECK-LABEL: 'test' +; CHECK-NEXT: Classifying expressions for: @test +; CHECK-NEXT: %xor = xor i32 0, 3 +; CHECK-NEXT: --> %xor U: [3,4) S: [3,4) +; CHECK-NEXT: %mul = mul i32 %xor, 329 +; CHECK-NEXT: --> (329 * %xor) U: [987,988) S: [987,988) +; CHECK-NEXT: %conv = trunc i32 %mul to i16 +; CHECK-NEXT: --> (329 * (trunc i32 %xor to i16)) U: [987,988) S: [987,988) +; CHECK-NEXT: %sext = shl i16 %conv, 8 +; CHECK-NEXT: --> (18688 * (trunc i32 %xor to i16)) U: [-9472,-9471) S: [-9472,-9471) +; CHECK-NEXT: %conv1 = ashr i16 %sext, 8 +; CHECK-NEXT: --> (sext i8 (73 * (trunc i32 %xor to i8)) to i16) U: [-37,-36) S: [-37,-36) +; CHECK-NEXT: Determining loop execution counts for: @test +; +entry: + %xor = xor i32 0, 3 + %mul = mul i32 %xor, 329 + %conv = trunc i32 %mul to i16 + %sext = shl i16 %conv, 8 + %conv1 = ashr i16 %sext, 8 + ret i16 %conv1 +} diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll new file mode 100644 index 0000000000000..ae108a525223e --- /dev/null +++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=indvars < %s | FileCheck %s + +define i32 @test() { +; CHECK-LABEL: define i32 @test() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 0, 3 +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[XOR]], 329 +; CHECK-NEXT: [[CONV:%.*]] = trunc i32 [[MUL]] to i16 +; CHECK-NEXT: [[SEXT:%.*]] = shl i16 [[CONV]], 8 +; CHECK-NEXT: [[CONV1:%.*]] = ashr i16 [[SEXT]], 8 +; CHECK-NEXT: br label %[[LOOP_BODY:.*]] +; CHECK: [[LOOP_BODY]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CONV3:%.*]] = zext i16 [[CONV1]] to i32 +; CHECK-NEXT: ret i32 [[CONV3]] +; +entry: + %xor = xor i32 0, 3 + %mul = mul i32 %xor, 329 + %conv = trunc i32 %mul to i16 + %sext = shl i16 %conv, 8 + %conv1 = ashr i16 %sext, 8 + %conv3 = zext i16 %conv1 to i32 + br label %loop.body + +loop.body: + %indvar = phi i32 [ %indvar.inc, %loop.body ], [ 1, %entry ] + %indvar.inc = add nuw i32 %indvar, 1 + %exitcond = icmp eq i32 %indvar, %conv3 + br i1 %exitcond, label %exit, label %loop.body + +exit: + ret i32 %conv3 +} From 7e2da7d262380d5ebaf25dbadd7ad2440f6ce21f Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 18 Nov 2024 23:41:04 +0800 Subject: [PATCH 12/38] [ConstraintElim] Bail out on non-dedicated exits when adding exiting conditions (#116627) This patch bails out non-dedicated exits to avoid adding exiting conditions to invalid context. Closes https://github.com/llvm/llvm-project/issues/116553. (cherry picked from commit 52361d0368b79841be12156bf03cf8c1851e5df7) --- .../Scalar/ConstraintElimination.cpp | 13 +++--- .../induction-condition-in-loop-exit.ll | 44 +++++++++++++++++++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp index 37022104d0a9b..d1c80aa671243 100644 --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -1033,9 +1033,9 @@ void State::addInfoForInductions(BasicBlock &BB) { DTN, CmpInst::ICMP_SLT, PN, B, ConditionTy(CmpInst::ICMP_SLE, StartValue, B))); - // Try to add condition from header to the exit blocks. When exiting either - // with EQ or NE in the header, we know that the induction value must be u<= - // B, as other exits may only exit earlier. + // Try to add condition from header to the dedicated exit blocks. When exiting + // either with EQ or NE in the header, we know that the induction value must + // be u<= B, as other exits may only exit earlier. assert(!StepOffset.isNegative() && "induction must be increasing"); assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) && "unsupported predicate"); @@ -1043,8 +1043,11 @@ void State::addInfoForInductions(BasicBlock &BB) { SmallVector ExitBBs; L->getExitBlocks(ExitBBs); for (BasicBlock *EB : ExitBBs) { - WorkList.emplace_back(FactOrCheck::getConditionFact( - DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond)); + // Bail out on non-dedicated exits. + if (DT.dominates(&BB, EB)) { + WorkList.emplace_back(FactOrCheck::getConditionFact( + DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond)); + } } } diff --git a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll index 15e1d84372627..a04b06e1bf0a5 100644 --- a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll +++ b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll @@ -763,3 +763,47 @@ exit.2: %t.2 = icmp ult i32 %iv, %N ret i1 %t.2 } + +define i1 @test_non_dedicated_exit(i16 %n) { +; CHECK-LABEL: define i1 @test_non_dedicated_exit( +; CHECK-SAME: i16 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[COND:%.*]] = icmp slt i16 [[N]], 1 +; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]] +; CHECK: [[LOOP_PREHEADER]]: +; CHECK-NEXT: [[SUB:%.*]] = add nsw i16 [[N]], -1 +; CHECK-NEXT: [[EXT:%.*]] = zext nneg i16 [[SUB]] to i32 +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[INDVAR:%.*]] = phi i32 [ [[INDVAR_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[EXT]] +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_LATCH]] +; CHECK: [[LOOP_LATCH]]: +; CHECK-NEXT: [[INDVAR_INC]] = add nuw nsw i32 [[INDVAR]], 1 +; CHECK-NEXT: br label %[[LOOP]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i16 [[N]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %cond = icmp slt i16 %n, 1 + br i1 %cond, label %exit, label %loop.preheader + +loop.preheader: + %sub = add nsw i16 %n, -1 + %ext = zext nneg i16 %sub to i32 + br label %loop + +loop: + %indvar = phi i32 [ %indvar.inc, %loop.latch ], [ 0, %loop.preheader ] + %exitcond = icmp eq i32 %indvar, %ext + br i1 %exitcond, label %exit, label %loop.latch + +loop.latch: + %indvar.inc = add nuw nsw i32 %indvar, 1 + br label %loop + +exit: + %cmp = icmp sgt i16 %n, 0 + ret i1 %cmp +} From 32cbe24de3f2ecb1c77899ff27bfe70bb033ecde Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 20 Nov 2024 15:10:19 +0000 Subject: [PATCH 13/38] [MachineLICM] Add test case showing load hoisted across memory barrier. (cherry picked from commit a9b3ec154d7ab2d0896ac5c9f1e9a1266a37be80) --- .../AArch64/machine-licm-hoist-load.ll | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index e8dafd5e8fbab..932a5af264a00 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -497,6 +497,35 @@ for.exit: ; preds = %for.body ret i64 %spec.select } +@a = external local_unnamed_addr global i32, align 4 + +; FIXME: Load hoisted out of the loop across memory barriers. +define i32 @load_between_memory_barriers() { +; CHECK-LABEL: load_between_memory_barriers: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, :got:a +; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: .LBB8_1: // %loop +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: //MEMBARRIER +; CHECK-NEXT: //MEMBARRIER +; CHECK-NEXT: cbz w0, .LBB8_1 +; CHECK-NEXT: // %bb.2: // %exit +; CHECK-NEXT: ret + br label %loop + +loop: + fence syncscope("singlethread") acq_rel + %l = load i32, ptr @a, align 4 + fence syncscope("singlethread") acq_rel + %c = icmp eq i32 %l, 0 + br i1 %c, label %loop, label %exit + +exit: + ret i32 %l +} + declare i32 @bcmp(ptr, ptr, i64) declare i32 @memcmp(ptr, ptr, i64) declare void @func() From 086d8e6bb5daf8de43880ba90258c49e0fabf2c9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 21 Nov 2024 10:25:04 +0000 Subject: [PATCH 14/38] [MachineLICM] Don't allow hoisting invariant loads across mem barrier. (#116987) The improvements in 63917e1 / #70796 do not check for memory barriers/unmodelled sideeffects, which means we may incorrectly hoist loads across memory barriers. Fix this by checking any machine instruction in the loop is a load-fold barrier. PR: https://github.com/llvm/llvm-project/pull/116987 (cherry picked from commit ef102b4a6333a304e36dc623d5381257a7ef1ed6) --- llvm/lib/CodeGen/MachineLICM.cpp | 2 +- llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll | 4 ++-- llvm/test/CodeGen/Mips/lcb5.ll | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp index f24ab187ef400..21a02a6f09478 100644 --- a/llvm/lib/CodeGen/MachineLICM.cpp +++ b/llvm/lib/CodeGen/MachineLICM.cpp @@ -1474,7 +1474,7 @@ void MachineLICMBase::InitializeLoadsHoistableLoops() { if (!AllowedToHoistLoads[Loop]) continue; for (auto &MI : *MBB) { - if (!MI.mayStore() && !MI.isCall() && + if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() && !(MI.mayLoad() && MI.hasOrderedMemoryRef())) continue; for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop()) diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index 932a5af264a00..17f8263560430 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -499,16 +499,16 @@ for.exit: ; preds = %for.body @a = external local_unnamed_addr global i32, align 4 -; FIXME: Load hoisted out of the loop across memory barriers. +; Make sure the load is not hoisted out of the loop across memory barriers. define i32 @load_between_memory_barriers() { ; CHECK-LABEL: load_between_memory_barriers: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w0, [x8] ; CHECK-NEXT: .LBB8_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: //MEMBARRIER +; CHECK-NEXT: ldr w0, [x8] ; CHECK-NEXT: //MEMBARRIER ; CHECK-NEXT: cbz w0, .LBB8_1 ; CHECK-NEXT: // %bb.2: // %exit diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll index f320f6fc5660c..bb059f1ee8453 100644 --- a/llvm/test/CodeGen/Mips/lcb5.ll +++ b/llvm/test/CodeGen/Mips/lcb5.ll @@ -186,7 +186,7 @@ if.end: ; preds = %if.then, %entry } ; ci: .ent z3 -; ci: bteqz $BB6_3 +; ci: bteqz $BB6_2 ; ci: .end z3 ; Function Attrs: nounwind optsize @@ -210,7 +210,7 @@ if.end: ; preds = %if.then, %entry ; ci: .ent z4 ; ci: btnez $BB7_1 # 16 bit inst -; ci: jal $BB7_3 # branch +; ci: jal $BB7_2 # branch ; ci: nop ; ci: $BB7_1: ; ci: .p2align 2 From 0e7e5d9bdf3c130069a1d622dc9a2b71357fc1ee Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Tue, 19 Nov 2024 20:06:34 +0800 Subject: [PATCH 15/38] [InstCombine] Drop noundef attributes in `foldCttzCtlz` (#116718) Closes https://github.com/llvm/llvm-project/issues/112068. (cherry picked from commit a59976bea8ad76f18119a11391dc8ba3e6ba07d5) --- .../Transforms/InstCombine/InstCombineCalls.cpp | 4 +++- .../Transforms/InstCombine/shift-cttz-ctlz.ll | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 9d2990c98ce27..3223fccbcf49a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -506,8 +506,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true. if (II.hasOneUse() && match(Op1, m_Zero()) && - match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) + match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) { + II.dropUBImplyingAttrsAndMetadata(); return IC.replaceOperand(II, 1, IC.Builder.getTrue()); + } Constant *C; diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll index 1c381d0839071..63caec9501325 100644 --- a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll +++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll @@ -15,6 +15,22 @@ entry: ret i32 %res } +; Make sure that noundef is dropped. + +define i32 @shl_cttz_false_noundef(i32 %x, i32 %y) { +; CHECK-LABEL: define i32 @shl_cttz_false_noundef( +; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[Y]], i1 true) +; CHECK-NEXT: [[RES:%.*]] = shl i32 [[X]], [[CTTZ]] +; CHECK-NEXT: ret i32 [[RES]] +; +entry: + %cttz = call noundef i32 @llvm.cttz.i32(i32 %y, i1 false) + %res = shl i32 %x, %cttz + ret i32 %res +} + define i32 @shl_ctlz_false(i32 %x, i32 %y) { ; CHECK-LABEL: define i32 @shl_ctlz_false( ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) { From edded5af5494adfc53187719fa3f3b0be7a4a20e Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 18 Oct 2024 13:44:57 -0700 Subject: [PATCH 16/38] [SLP][NFC]Add a test with the incorrect casting of the abs argument, NFC (cherry picked from commit 825f9cb1b31aa91d23eba803003897490de74a20) --- .../abs-overflow-incorrect-minbws.ll | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll new file mode 100644 index 0000000000000..a936b076138d0 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s + +define i32 @test(i32 %n) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], +; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 +; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]] +; CHECK-NEXT: ret i32 [[RES1]] +; +entry: + %n1 = add i32 %n, 1 + %zn1 = zext nneg i32 %n1 to i64 + %m1 = mul nuw nsw i64 %zn1, 273837369 + %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true) + %t1 = trunc i64 %a1 to i32 + %n2 = add i32 %n, 2 + %zn2 = zext nneg i32 %n2 to i64 + %m2 = mul nuw nsw i64 %zn2, 273837369 + %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true) + %t2 = trunc i64 %a2 to i32 + %res1 = add i32 %t1, %t2 + ret i32 %res1 +} From 9f72c9837c553063ab0cbacc1a472a73c0ec2a4b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 18 Oct 2024 13:54:30 -0700 Subject: [PATCH 17/38] [SLP]Check that operand of abs does not overflow before making it part of minbitwidth transformation Need to check that the operand of the abs intrinsic can be safely truncated before making it part of the minbitwidth transformation. Fixes #112577 (cherry picked from commit 709abacdc350d63c61888607edb28ce272daa0a0) --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 16 ++++++++++++++++ .../abs-overflow-incorrect-minbws.ll | 6 ++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index ab2b96cdc42db..746ba51a981fe 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote( MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL))); }); }; + auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + return all_of(E.Scalars, [&](Value *V) { + auto *I = cast(V); + unsigned SignBits = OrigBitWidth - BitWidth; + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1); + unsigned Op0SignBits = + ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT); + return SignBits <= Op0SignBits && + ((SignBits != Op0SignBits && + !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) || + MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL))); + }); + }; if (ID != Intrinsic::abs) { Operands.push_back(getOperandEntry(&E, 1)); CallChecker = CompChecker; + } else { + CallChecker = AbsChecker; } InstructionCost BestCost = std::numeric_limits::max(); diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll index a936b076138d0..51b635837d3b5 100644 --- a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll +++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll @@ -8,8 +8,10 @@ define i32 @test(i32 %n) { ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP2:%.*]] = add <2 x i32> [[TMP1]], -; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], -; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false) +; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], +; CHECK-NEXT: [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1 ; CHECK-NEXT: [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]] From dc665fa5f5b8b572479ceac6bf32e0174de65f1e Mon Sep 17 00:00:00 2001 From: Zhaoxin Yang Date: Mon, 11 Nov 2024 16:46:22 +0800 Subject: [PATCH 18/38] [MC][LoongArch] Change default cpu in `MCSubtargetInfo`. (#114922) The default value of this CPU affects the `FeatureBits` obtained by `LoongArchTargetELFStreamer` when creating an ELF file, and it will further affect the `Flags` field in the generated file. So, the default CPU value should be consistent with the `initializeSubtargetDependencies` in `LoongArchSubtarget.cpp`. Otherwise, the `Flags` field may be unexpected. (cherry picked from commit 75c2888209473884cb3fa5720899d8199dafb8cb) --- lld/test/ELF/emulation-loongarch.s | 2 +- lld/test/ELF/loongarch-interlink.test | 4 ++-- .../Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp | 2 +- llvm/test/CodeGen/LoongArch/e_flags.ll | 6 ++++++ 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s index 28b879f758468..cfa8df4d8e2fe 100644 --- a/lld/test/ELF/emulation-loongarch.s +++ b/lld/test/ELF/emulation-loongarch.s @@ -37,7 +37,7 @@ # LA32-NEXT: StringTableSectionIndex: # LA32-NEXT: } -# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+d %s -o %t.o # RUN: ld.lld %t.o -o %t # RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s # RUN: ld.lld -m elf64loongarch %t.o -o %t diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test index 44e5d03409a47..15c8318512660 100644 --- a/lld/test/ELF/loongarch-interlink.test +++ b/lld/test/ELF/loongarch-interlink.test @@ -3,9 +3,9 @@ # RUN: yaml2obj %t/blob.yaml -o %t/blob.o # RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o -# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/start.s -o %t/v1-lp64d.o # RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o -# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o +# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/bar.s -o %t/v1-b-lp64d.o ## Check that binary input results in e_flags=0 output. # RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp index e40981f5b5cd5..0712cc01ea038 100644 --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp @@ -55,7 +55,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() { static MCSubtargetInfo * createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) { if (CPU.empty() || CPU == "generic") - CPU = TT.isArch64Bit() ? "la464" : "generic-la32"; + CPU = TT.isArch64Bit() ? "generic-la64" : "generic-la32"; return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS); } diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll index 2feb9d832bca9..9b2dc87eb353d 100644 --- a/llvm/test/CodeGen/LoongArch/e_flags.ll +++ b/llvm/test/CodeGen/LoongArch/e_flags.ll @@ -1,3 +1,6 @@ +; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32s +; RUN: llvm-readelf -h %t-la32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines + ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s -o %t-la32 ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines @@ -10,6 +13,9 @@ ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d ; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines +; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64d +; RUN: llvm-readelf -h %t-la64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines + ; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64 ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines From f64f76feab83859b37b7fa5de3d4bba9a446e72b Mon Sep 17 00:00:00 2001 From: AdityaK Date: Tue, 10 Sep 2024 22:39:02 -0700 Subject: [PATCH 19/38] Bail out jump threading on indirect branches (#103688) The bug was introduced by https://github.com/llvm/llvm-project/pull/68473 Fixes: #102351 (cherry picked from commit 3c9022c965b85951f30af140da591f819acef8a0) --- llvm/lib/Transforms/Utils/Local.cpp | 11 +- .../switch-branch-fold-indirectbr-102351.ll | 104 ++++++++++++++++++ 2 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 7192efe3f16b9..4eb8dc1d2d615 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1028,7 +1028,14 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, if (!BB->hasNPredecessorsOrMore(2)) return false; - // Get single common predecessors of both BB and Succ + if (any_of(BBPreds, [](const BasicBlock *Pred) { + return isa(Pred->begin()) && + isa(Pred->getTerminator()); + })) + return false; + + // Get the single common predecessor of both BB and Succ. Return false + // when there are more than one common predecessors. for (BasicBlock *SuccPred : SuccPreds) { if (BBPreds.count(SuccPred)) { if (CommonPred) @@ -1133,7 +1140,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds); - // Even if we can not fold bB into Succ, we may be able to redirect the + // Even if we can not fold BB into Succ, we may be able to redirect the // predecessors of BB to Succ. bool BBPhisMergeable = BBKillable || diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll new file mode 100644 index 0000000000000..03aee68fa4248 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll @@ -0,0 +1,104 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define dso_local noundef i32 @main() { +; CHECK-LABEL: define dso_local noundef i32 @main() { +; CHECK-NEXT: [[BB:.*]]: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x ptr], align 16 +; CHECK-NEXT: store ptr blockaddress(@main, %[[BB4:.*]]), ptr [[ALLOCA]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: store ptr blockaddress(@main, %[[BB10:.*]]), ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI8:%.*]], %[[BB7:.*]] ] +; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI9:%.*]], %[[BB7]] ] +; CHECK-NEXT: switch i32 [[PHI]], label %[[BB7]] [ +; CHECK-NEXT: i32 0, label %[[BB12:.*]] +; CHECK-NEXT: i32 1, label %[[BB4]] +; CHECK-NEXT: i32 2, label %[[BB6:.*]] +; CHECK-NEXT: ] +; CHECK: [[BB4]]: +; CHECK-NEXT: [[PHI5:%.*]] = phi i32 [ [[PHI13:%.*]], %[[BB12]] ], [ [[PHI2]], %[[BB1]] ] +; CHECK-NEXT: br label %[[BB7]] +; CHECK: [[BB6]]: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32 noundef [[PHI2]]) +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[PHI2]], 1 +; CHECK-NEXT: br label %[[BB12]] +; CHECK: [[BB7]]: +; CHECK-NEXT: [[PHI8]] = phi i32 [ [[PHI]], %[[BB1]] ], [ 2, %[[BB4]] ] +; CHECK-NEXT: [[PHI9]] = phi i32 [ [[PHI2]], %[[BB1]] ], [ [[PHI5]], %[[BB4]] ] +; CHECK-NEXT: br label %[[BB1]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[BB10]]: +; CHECK-NEXT: [[CALL11:%.*]] = call i32 @foo(i32 noundef [[PHI13]]) +; CHECK-NEXT: ret i32 0 +; CHECK: [[BB12]]: +; CHECK-NEXT: [[PHI13]] = phi i32 [ [[ADD]], %[[BB6]] ], [ [[PHI2]], %[[BB1]] ] +; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[PHI13]] to i64 +; CHECK-NEXT: [[GETELEMENTPTR14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 [[SEXT]] +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR14]], align 8, !tbaa [[TBAA0]] +; CHECK-NEXT: indirectbr ptr [[LOAD]], [label %[[BB4]], label %bb10] +; +bb: + %alloca = alloca [2 x ptr], align 16 + store ptr blockaddress(@main, %bb4), ptr %alloca, align 16, !tbaa !0 + %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1 + store ptr blockaddress(@main, %bb10), ptr %getelementptr, align 8, !tbaa !0 + br label %bb1 + +bb1: ; preds = %bb7, %bb + %phi = phi i32 [ 0, %bb ], [ %phi8, %bb7 ] + %phi2 = phi i32 [ 0, %bb ], [ %phi9, %bb7 ] + switch i32 %phi, label %bb7 [ + i32 0, label %bb3 + i32 1, label %bb4 + i32 2, label %bb6 + ] + +bb3: ; preds = %bb1 + br label %bb12 + +bb4: ; preds = %bb12, %bb1 + %phi5 = phi i32 [ %phi13, %bb12 ], [ %phi2, %bb1 ] + br label %bb7 + +bb6: ; preds = %bb1 + %call = call i32 @foo(i32 noundef %phi2) + %add = add nsw i32 %phi2, 1 + br label %bb12 + +bb7: ; preds = %bb4, %bb1 + %phi8 = phi i32 [ %phi, %bb1 ], [ 2, %bb4 ] + %phi9 = phi i32 [ %phi2, %bb1 ], [ %phi5, %bb4 ] + br label %bb1, !llvm.loop !4 + +bb10: ; preds = %bb12 + %call11 = call i32 @foo(i32 noundef %phi13) + ret i32 0 + +bb12: ; preds = %bb6, %bb3 + %phi13 = phi i32 [ %add, %bb6 ], [ %phi2, %bb3 ] + %sext = sext i32 %phi13 to i64 + %getelementptr14 = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 %sext + %load = load ptr, ptr %getelementptr14, align 8, !tbaa !0 + indirectbr ptr %load, [label %bb4, label %bb10] +} + +declare i32 @foo(i32) + +!0 = !{!1, !1, i64 0} +!1 = !{!"any pointer", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C++ TBAA"} +!4 = !{!5, !5, i64 0} +!5 = !{!"int", !2, i64 0} +;. +; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} +; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0} +; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} +; CHECK: [[META3]] = !{!"Simple C++ TBAA"} +; CHECK: [[LOOP4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} +; CHECK: [[META5]] = !{!"int", [[META2]], i64 0} +;. From 321f0dd2008160b674c010425133bebc586392e7 Mon Sep 17 00:00:00 2001 From: AdityaK Date: Tue, 26 Nov 2024 14:57:28 -0800 Subject: [PATCH 20/38] Bail out jump threading on indirect branches only (#117778) Remove check for PHI in pred as pointed out in #103688 Reduced the testcase to remove redundant phi in pred Fixes: #102351 (cherry picked from commit 39601a6e5484de183bf525b7d0624e7890ccd8ab) --- llvm/lib/Transforms/Utils/Local.cpp | 3 +- .../switch-branch-fold-indirectbr-102351.ll | 141 ++++++++---------- 2 files changed, 60 insertions(+), 84 deletions(-) diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp index 4eb8dc1d2d615..f68cbf62b9825 100644 --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -1029,8 +1029,7 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ, return false; if (any_of(BBPreds, [](const BasicBlock *Pred) { - return isa(Pred->begin()) && - isa(Pred->getTerminator()); + return isa(Pred->getTerminator()); })) return false; diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll index 03aee68fa4248..d3713be8358db 100644 --- a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll +++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll @@ -1,104 +1,81 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name pref --version 5 ; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -define dso_local noundef i32 @main() { -; CHECK-LABEL: define dso_local noundef i32 @main() { +define i32 @foo.1(i32 %arg, ptr %arg1) { +; CHECK-LABEL: define i32 @foo.1( +; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]]) { ; CHECK-NEXT: [[BB:.*]]: ; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x ptr], align 16 -; CHECK-NEXT: store ptr blockaddress(@main, %[[BB4:.*]]), ptr [[ALLOCA]], align 16, !tbaa [[TBAA0:![0-9]+]] +; CHECK-NEXT: store ptr blockaddress(@foo.1, %[[BB8:.*]]), ptr [[ALLOCA]], align 16 ; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1 -; CHECK-NEXT: store ptr blockaddress(@main, %[[BB10:.*]]), ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: br label %[[BB1:.*]] -; CHECK: [[BB1]]: -; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI8:%.*]], %[[BB7:.*]] ] -; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI9:%.*]], %[[BB7]] ] -; CHECK-NEXT: switch i32 [[PHI]], label %[[BB7]] [ -; CHECK-NEXT: i32 0, label %[[BB12:.*]] -; CHECK-NEXT: i32 1, label %[[BB4]] -; CHECK-NEXT: i32 2, label %[[BB6:.*]] +; CHECK-NEXT: store ptr blockaddress(@foo.1, %[[BB16:.*]]), ptr [[GETELEMENTPTR]], align 8 +; CHECK-NEXT: br label %[[PREFBB2:.*]] +; CHECK: [[PREFBB2]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI14:%.*]], %[[BB13:.*]] ] +; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI15:%.*]], %[[BB13]] ] +; CHECK-NEXT: switch i32 [[PHI]], label %[[BB13]] [ +; CHECK-NEXT: i32 0, label %[[PREFBB18:.*]] +; CHECK-NEXT: i32 1, label %[[BB8]] +; CHECK-NEXT: i32 2, label %[[PREFBB11:.*]] ; CHECK-NEXT: ] -; CHECK: [[BB4]]: -; CHECK-NEXT: [[PHI5:%.*]] = phi i32 [ [[PHI13:%.*]], %[[BB12]] ], [ [[PHI2]], %[[BB1]] ] -; CHECK-NEXT: br label %[[BB7]] -; CHECK: [[BB6]]: -; CHECK-NEXT: [[CALL:%.*]] = call i32 @foo(i32 noundef [[PHI2]]) -; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[PHI2]], 1 -; CHECK-NEXT: br label %[[BB12]] -; CHECK: [[BB7]]: -; CHECK-NEXT: [[PHI8]] = phi i32 [ [[PHI]], %[[BB1]] ], [ 2, %[[BB4]] ] -; CHECK-NEXT: [[PHI9]] = phi i32 [ [[PHI2]], %[[BB1]] ], [ [[PHI5]], %[[BB4]] ] -; CHECK-NEXT: br label %[[BB1]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[BB10]]: -; CHECK-NEXT: [[CALL11:%.*]] = call i32 @foo(i32 noundef [[PHI13]]) +; CHECK: [[BB8]]: +; CHECK-NEXT: [[PHI10:%.*]] = phi i32 [ [[ARG]], %[[PREFBB18]] ], [ [[PHI3]], %[[PREFBB2]] ] +; CHECK-NEXT: br label %[[BB13]] +; CHECK: [[PREFBB11]]: +; CHECK-NEXT: [[CALL:%.*]] = call i32 @wombat(i32 noundef [[PHI3]]) +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[PHI3]], 1 +; CHECK-NEXT: br label %[[PREFBB18]] +; CHECK: [[BB13]]: +; CHECK-NEXT: [[PHI14]] = phi i32 [ [[PHI]], %[[PREFBB2]] ], [ 2, %[[BB8]] ] +; CHECK-NEXT: [[PHI15]] = phi i32 [ [[PHI3]], %[[PREFBB2]] ], [ [[PHI10]], %[[BB8]] ] +; CHECK-NEXT: br label %[[PREFBB2]] +; CHECK: [[BB16]]: +; CHECK-NEXT: [[CALL17:%.*]] = call i32 @wombat(i32 noundef [[ARG]]) ; CHECK-NEXT: ret i32 0 -; CHECK: [[BB12]]: -; CHECK-NEXT: [[PHI13]] = phi i32 [ [[ADD]], %[[BB6]] ], [ [[PHI2]], %[[BB1]] ] -; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[PHI13]] to i64 -; CHECK-NEXT: [[GETELEMENTPTR14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 [[SEXT]] -; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR14]], align 8, !tbaa [[TBAA0]] -; CHECK-NEXT: indirectbr ptr [[LOAD]], [label %[[BB4]], label %bb10] +; CHECK: [[PREFBB18]]: +; CHECK-NEXT: [[LOAD:%.*]] = load ptr, ptr [[ARG1]], align 8 +; CHECK-NEXT: indirectbr ptr [[LOAD]], [label %[[BB8]], label %bb16] ; bb: %alloca = alloca [2 x ptr], align 16 - store ptr blockaddress(@main, %bb4), ptr %alloca, align 16, !tbaa !0 + store ptr blockaddress(@foo.1, %bb8), ptr %alloca, align 16 %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1 - store ptr blockaddress(@main, %bb10), ptr %getelementptr, align 8, !tbaa !0 - br label %bb1 + store ptr blockaddress(@foo.1, %bb16), ptr %getelementptr, align 8 + br label %bb2 -bb1: ; preds = %bb7, %bb - %phi = phi i32 [ 0, %bb ], [ %phi8, %bb7 ] - %phi2 = phi i32 [ 0, %bb ], [ %phi9, %bb7 ] - switch i32 %phi, label %bb7 [ - i32 0, label %bb3 - i32 1, label %bb4 - i32 2, label %bb6 +bb2: ; preds = %bb13, %bb + %phi = phi i32 [ 0, %bb ], [ %phi14, %bb13 ] + %phi3 = phi i32 [ 0, %bb ], [ %phi15, %bb13 ] + switch i32 %phi, label %bb13 [ + i32 0, label %bb5 + i32 1, label %bb8 + i32 2, label %bb11 ] -bb3: ; preds = %bb1 - br label %bb12 +bb5: ; preds = %bb2 + br label %bb18 -bb4: ; preds = %bb12, %bb1 - %phi5 = phi i32 [ %phi13, %bb12 ], [ %phi2, %bb1 ] - br label %bb7 +bb8: ; preds = %bb18, %bb2 + %phi10 = phi i32 [ %arg, %bb18 ], [ %phi3, %bb2 ] + br label %bb13 -bb6: ; preds = %bb1 - %call = call i32 @foo(i32 noundef %phi2) - %add = add nsw i32 %phi2, 1 - br label %bb12 +bb11: ; preds = %bb2 + %call = call i32 @wombat(i32 noundef %phi3) + %add = add nsw i32 %phi3, 1 + br label %bb18 -bb7: ; preds = %bb4, %bb1 - %phi8 = phi i32 [ %phi, %bb1 ], [ 2, %bb4 ] - %phi9 = phi i32 [ %phi2, %bb1 ], [ %phi5, %bb4 ] - br label %bb1, !llvm.loop !4 +bb13: ; preds = %bb8, %bb2 + %phi14 = phi i32 [ %phi, %bb2 ], [ 2, %bb8 ] + %phi15 = phi i32 [ %phi3, %bb2 ], [ %phi10, %bb8 ] + br label %bb2 -bb10: ; preds = %bb12 - %call11 = call i32 @foo(i32 noundef %phi13) +bb16: ; preds = %bb18 + %call17 = call i32 @wombat(i32 noundef %arg) ret i32 0 -bb12: ; preds = %bb6, %bb3 - %phi13 = phi i32 [ %add, %bb6 ], [ %phi2, %bb3 ] - %sext = sext i32 %phi13 to i64 - %getelementptr14 = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 %sext - %load = load ptr, ptr %getelementptr14, align 8, !tbaa !0 - indirectbr ptr %load, [label %bb4, label %bb10] +bb18: ; preds = %bb11, %bb5 + %load = load ptr, ptr %arg1, align 8 + indirectbr ptr %load, [label %bb8, label %bb16] } -declare i32 @foo(i32) - -!0 = !{!1, !1, i64 0} -!1 = !{!"any pointer", !2, i64 0} -!2 = !{!"omnipotent char", !3, i64 0} -!3 = !{!"Simple C++ TBAA"} -!4 = !{!5, !5, i64 0} -!5 = !{!"int", !2, i64 0} -;. -; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0} -; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0} -; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0} -; CHECK: [[META3]] = !{!"Simple C++ TBAA"} -; CHECK: [[LOOP4]] = !{[[META5:![0-9]+]], [[META5]], i64 0} -; CHECK: [[META5]] = !{!"int", [[META2]], i64 0} -;. +declare i32 @wombat(i32) From e6bcdea700dc7a5b1cf3a3f5a4eaa40d1234e220 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Fri, 29 Nov 2024 15:31:02 +0530 Subject: [PATCH 21/38] [clang-repl] Fix generation of wasm binaries while running clang-repl in browser (#117978) Co-authored-by: Vassil Vassilev (cherry picked from commit a174aa1e416c4e27945f5a8c646b119126dc8441) --- clang/lib/Interpreter/CMakeLists.txt | 2 + clang/lib/Interpreter/Interpreter.cpp | 1 + clang/lib/Interpreter/Wasm.cpp | 57 ++++++++++++++++++++------- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt index 0a2d60757c216..85efa4b0f984f 100644 --- a/clang/lib/Interpreter/CMakeLists.txt +++ b/clang/lib/Interpreter/CMakeLists.txt @@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS if (EMSCRIPTEN AND "lld" IN_LIST LLVM_ENABLE_PROJECTS) set(WASM_SRC Wasm.cpp) set(WASM_LINK lldWasm) + set(COMMON_LINK lldCommon) endif() add_clang_library(clangInterpreter @@ -45,6 +46,7 @@ add_clang_library(clangInterpreter clangSema clangSerialization ${WASM_LINK} + ${COMMON_LINK} ) if ((MINGW OR CYGWIN) AND BUILD_SHARED_LIBS) diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index c0b8bfebb4343..985d0b7c0ef31 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -193,6 +193,7 @@ IncrementalCompilerBuilder::CreateCpp() { Argv.push_back("-target"); Argv.push_back("wasm32-unknown-emscripten"); Argv.push_back("-shared"); + Argv.push_back("-fvisibility=default"); #endif Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end()); diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp index 79efbaa03982d..aa10b160ccf84 100644 --- a/clang/lib/Interpreter/Wasm.cpp +++ b/clang/lib/Interpreter/Wasm.cpp @@ -23,6 +23,31 @@ #include namespace lld { +enum Flavor { + Invalid, + Gnu, // -flavor gnu + MinGW, // -flavor gnu MinGW + WinLink, // -flavor link + Darwin, // -flavor darwin + Wasm, // -flavor wasm +}; + +using Driver = bool (*)(llvm::ArrayRef, llvm::raw_ostream &, + llvm::raw_ostream &, bool, bool); + +struct DriverDef { + Flavor f; + Driver d; +}; + +struct Result { + int retCode; + bool canRunAgain; +}; + +Result lldMain(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS, + llvm::raw_ostream &stderrOS, llvm::ArrayRef drivers); + namespace wasm { bool link(llvm::ArrayRef args, llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput); @@ -51,13 +76,14 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) { llvm::TargetMachine *TargetMachine = Target->createTargetMachine( PTU.TheModule->getTargetTriple(), "", "", TO, llvm::Reloc::Model::PIC_); PTU.TheModule->setDataLayout(TargetMachine->createDataLayout()); - std::string OutputFileName = PTU.TheModule->getName().str() + ".wasm"; + std::string ObjectFileName = PTU.TheModule->getName().str() + ".o"; + std::string BinaryFileName = PTU.TheModule->getName().str() + ".wasm"; std::error_code Error; - llvm::raw_fd_ostream OutputFile(llvm::StringRef(OutputFileName), Error); + llvm::raw_fd_ostream ObjectFileOutput(llvm::StringRef(ObjectFileName), Error); llvm::legacy::PassManager PM; - if (TargetMachine->addPassesToEmitFile(PM, OutputFile, nullptr, + if (TargetMachine->addPassesToEmitFile(PM, ObjectFileOutput, nullptr, llvm::CodeGenFileType::ObjectFile)) { return llvm::make_error( "Wasm backend cannot produce object.", llvm::inconvertibleErrorCode()); @@ -69,27 +95,30 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) { llvm::inconvertibleErrorCode()); } - OutputFile.close(); + ObjectFileOutput.close(); std::vector LinkerArgs = {"wasm-ld", "-shared", "--import-memory", - "--no-entry", - "--export-all", "--experimental-pic", "--stack-first", "--allow-undefined", - OutputFileName.c_str(), + ObjectFileName.c_str(), "-o", - OutputFileName.c_str()}; - int Result = - lld::wasm::link(LinkerArgs, llvm::outs(), llvm::errs(), false, false); - if (!Result) + BinaryFileName.c_str()}; + + const lld::DriverDef WasmDriver = {lld::Flavor::Wasm, &lld::wasm::link}; + std::vector WasmDriverArgs; + WasmDriverArgs.push_back(WasmDriver); + lld::Result Result = + lld::lldMain(LinkerArgs, llvm::outs(), llvm::errs(), WasmDriverArgs); + + if (Result.retCode) return llvm::make_error( "Failed to link incremental module", llvm::inconvertibleErrorCode()); void *LoadedLibModule = - dlopen(OutputFileName.c_str(), RTLD_NOW | RTLD_GLOBAL); + dlopen(BinaryFileName.c_str(), RTLD_NOW | RTLD_GLOBAL); if (LoadedLibModule == nullptr) { llvm::errs() << dlerror() << '\n'; return llvm::make_error( @@ -109,7 +138,7 @@ llvm::Error WasmIncrementalExecutor::runCtors() const { return llvm::Error::success(); } -llvm::Error WasmIncrementalExecutor::cleanUp() const { +llvm::Error WasmIncrementalExecutor::cleanUp() { // Can't call cleanUp through IncrementalExecutor as it // tries to deinitialize JIT which hasn't been initialized return llvm::Error::success(); @@ -117,4 +146,4 @@ llvm::Error WasmIncrementalExecutor::cleanUp() const { WasmIncrementalExecutor::~WasmIncrementalExecutor() = default; -} // namespace clang +} // namespace clang \ No newline at end of file From cf55b9cb8af3ce3fa776e46e6ad71155a7d38c75 Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Tue, 13 Aug 2024 00:39:14 -0700 Subject: [PATCH 22/38] [AArch64][Darwin][SME] Don't try to save VG to the stack for unwinding. On Darwin we don't have any hardware that has SVE support, only SME. Therefore we don't need to save VG for unwinders and can safely omit it. This also fixes crashes introduced since this feature landed since Darwin's compact unwind code can't handle the presence of VG anyway. rdar://131072344 --- .../Target/AArch64/AArch64FrameLowering.cpp | 24 ++- .../Target/AArch64/AArch64ISelLowering.cpp | 19 ++- .../CodeGen/AArch64/sme-darwin-no-sve-vg.ll | 161 ++++++++++++++++++ 3 files changed, 189 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 87e057a468afd..83d9dd1725973 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) { !MF.getSubtarget().hasSVE(); } +static bool requiresSaveVG(MachineFunction &MF) { + AArch64FunctionInfo *AFI = MF.getInfo(); + // For Darwin platforms we don't save VG for non-SVE functions, even if SME + // is enabled with streaming mode changes. + if (!AFI->hasStreamingModeChanges()) + return false; + auto &ST = MF.getSubtarget(); + if (ST.isTargetDarwin()) + return ST.hasSVE(); + return true; +} + bool isVGInstruction(MachineBasicBlock::iterator MBBI) { unsigned Opc = MBBI->getOpcode(); if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI || @@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( // functions, we need to do this for both the streaming and non-streaming // vector length. Move past these instructions if necessary. MachineFunction &MF = *MBB.getParent(); - AArch64FunctionInfo *AFI = MF.getInfo(); - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -1937,7 +1948,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { // Move past instructions generated to calculate VG - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) while (isVGInstruction(MBBI)) ++MBBI; @@ -3720,7 +3731,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, // non-streaming VG value. const Function &F = MF.getFunction(); SMEAttrs Attrs(F); - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface()) CSStackSize += 16; else @@ -3873,7 +3884,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots( } // Insert VG into the list of CSRs, immediately before LR if saved. - if (AFI->hasStreamingModeChanges()) { + if (requiresSaveVG(MF)) { std::vector VGSaves; SMEAttrs Attrs(MF.getFunction()); @@ -4602,10 +4613,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II, void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced( MachineFunction &MF, RegScavenger *RS = nullptr) const { - AArch64FunctionInfo *AFI = MF.getInfo(); for (auto &BB : MF) for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) { - if (AFI->hasStreamingModeChanges()) + if (requiresSaveVG(MF)) II = emitVGSaveRestore(II, this); if (StackTaggingMergeSetTag) II = tryMergeAdjacentSTG(II, this, RS); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 62078822c89b1..ef2789e96213b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue InGlue; if (RequiresSMChange) { - - Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), Chain); - InGlue = Chain.getValue(1); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), Chain); + InGlue = Chain.getValue(1); + } SDValue NewChain = changeStreamingMode( DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue, @@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Result = changeStreamingMode( DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue, getSMCondition(CallerAttrs, CalleeAttrs), PStateSM); - InGlue = Result.getValue(1); - Result = - DAG.getNode(AArch64ISD::VG_RESTORE, DL, - DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) { + InGlue = Result.getValue(1); + Result = + DAG.getNode(AArch64ISD::VG_RESTORE, DL, + DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue}); + } } if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs)) diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll new file mode 100644 index 0000000000000..36a300fea25e5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll @@ -0,0 +1,161 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -o - %s | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx14.0.0" + +; Check we don't crash on Darwin and that we don't try to save VG +; when only SME (and not SVE) is enabled. + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define noundef i32 @main() local_unnamed_addr #0 { +; CHECK-LABEL: main: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: bl __ZL9sme_crashv +; CHECK-NEXT: smstop sm +; CHECK-NEXT: mov w0, #0 ; =0x0 +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +entry: + tail call fastcc void @_ZL9sme_crashv() #4 + ret i32 0 +} + +; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync) +define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 { +; CHECK-LABEL: _ZL9sme_crashv: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #80 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset w27, -24 +; CHECK-NEXT: .cfi_offset w28, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: sub x9, sp, #160 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff00 +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str x8, [sp, #152] +; CHECK-NEXT: mov z0.b, #0 ; =0x0 +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-EMPTY: +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ldr x8, [sp, #152] +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: adrp x9, ___stack_chk_guard@GOTPAGE +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: b.ne LBB1_2 +; CHECK-NEXT: ; %bb.1: ; %entry +; CHECK-NEXT: sub sp, x29, #80 +; CHECK-NEXT: .cfi_def_cfa wsp, 96 +; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_restore w27 +; CHECK-NEXT: .cfi_restore w28 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB1_2: ; %entry +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl ___stack_chk_fail +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5 +; CHECK-NEXT: .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2 +entry: + %uu = alloca [16 x float], align 256 + call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5 + call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false) + call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5 + call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5 + ret void +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write) +declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) } +attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" } +attributes #5 = { nounwind } From 4716c4752fd56692bcca775606a2650418eb3be8 Mon Sep 17 00:00:00 2001 From: Kerry McLaughlin Date: Mon, 19 Aug 2024 10:17:10 +0100 Subject: [PATCH 23/38] [AArch64][SME] Return false from produceCompactUnwindFrame if VG save required. (#104588) The compact unwind format requires all registers are stored in pairs, so return false from produceCompactUnwindFrame if we require saving VG. --- .../Target/AArch64/AArch64FrameLowering.cpp | 3 +- .../test/CodeGen/AArch64/sme-darwin-sve-vg.ll | 55 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 83d9dd1725973..c09c1792e9898 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2859,7 +2859,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) { return Subtarget.isTargetMachO() && !(Subtarget.getTargetLowering()->supportSwiftError() && Attrs.hasAttrSomewhere(Attribute::SwiftError)) && - MF.getFunction().getCallingConv() != CallingConv::SwiftTail; + MF.getFunction().getCallingConv() != CallingConv::SwiftTail && + !requiresSaveVG(MF); } static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll new file mode 100644 index 0000000000000..c32e9cbc05393 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll @@ -0,0 +1,55 @@ +; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +declare void @normal_callee(); + +define void @locally_streaming_fn() #0 { +; CHECK-LABEL: locally_streaming_fn: +; CHECK: ; %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: lsr x9, x9, #3 +; CHECK-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill +; CHECK-NEXT: stp x30, x9, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: cntd x9 +; CHECK-NEXT: str x9, [sp, #80] ; 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset vg, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .cfi_offset vg, -24 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl _normal_callee +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .cfi_restore vg +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] ; 8-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 ; 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore b8 +; CHECK-NEXT: .cfi_restore b9 +; CHECK-NEXT: .cfi_restore b10 +; CHECK-NEXT: .cfi_restore b11 +; CHECK-NEXT: .cfi_restore b12 +; CHECK-NEXT: .cfi_restore b13 +; CHECK-NEXT: .cfi_restore b14 +; CHECK-NEXT: .cfi_restore b15 +; CHECK-NEXT: ret + call void @normal_callee() + ret void +} + +attributes #0 = { "aarch64_pstate_sm_body" uwtable(async) } From 876d0501d312b7303423fc4d3c388174a1465ca5 Mon Sep 17 00:00:00 2001 From: Sander de Smalen Date: Tue, 15 Oct 2024 11:56:40 +0100 Subject: [PATCH 24/38] [AArch64][SME] Fix iterator to fixupCalleeSaveRestoreStackOffset (#110855) The iterator passed to `fixupCalleeSaveRestoreStackOffset` may be incorrect when it tries to skip over the instructions that get the current value of 'vg', when there is a 'rdsvl' instruction straight after the prologue. That's because it doesn't check that the instruction is still a 'frame-setup' instruction. --- .../Target/AArch64/AArch64FrameLowering.cpp | 9 ++--- llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll | 38 +++++++++++++++++++ 2 files changed, 41 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index c09c1792e9898..c183ffd384c22 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1947,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // pointer bump above. while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) && !IsSVECalleeSave(MBBI)) { - // Move past instructions generated to calculate VG - if (requiresSaveVG(MF)) - while (isVGInstruction(MBBI)) - ++MBBI; - - if (CombineSPBump) + if (CombineSPBump && + // Only fix-up frame-setup load/store instructions. + (!requiresSaveVG(MF) || !isVGInstruction(MBBI))) fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(), NeedsWinCFI, &HasWinCFI); ++MBBI; diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll index fa8f92cb0a2c9..38666a05c20f8 100644 --- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll +++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll @@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 { ret void } +; The algorithm that fixes up the offsets of the callee-save/restore +; instructions must jump over the instructions that instantiate the current +; 'VG' value. We must make sure that it doesn't consider any RDSVL in +; user-code as if it is part of the frame-setup when doing so. +define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind { +; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue: +; NO-SVE-CHECK: // %bb.0: +; NO-SVE-CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x9, x0 +; NO-SVE-CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; NO-SVE-CHECK-NEXT: bl __arm_get_current_vg +; NO-SVE-CHECK-NEXT: str x0, [sp, #80] // 8-byte Folded Spill +; NO-SVE-CHECK-NEXT: mov x0, x9 +; NO-SVE-CHECK-NEXT: rdsvl x8, #1 +; NO-SVE-CHECK-NEXT: add x29, sp, #64 +; NO-SVE-CHECK-NEXT: lsr x8, x8, #3 +; NO-SVE-CHECK-NEXT: mov x1, x0 +; NO-SVE-CHECK-NEXT: smstart sm +; NO-SVE-CHECK-NEXT: mov x0, x8 +; NO-SVE-CHECK-NEXT: bl bar +; NO-SVE-CHECK-NEXT: smstop sm +; NO-SVE-CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; NO-SVE-CHECK-NEXT: ret + %some_alloc = alloca i64, align 8 + %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd() + call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled" + ret void +} + +declare void @bar(i64, i64) + ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables ; if the function contains a streaming-mode change. From ab4b5a2db582958af1ee308a790cfdb42bd24720 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Fri, 29 Nov 2024 16:42:44 -0600 Subject: [PATCH 25/38] [clang] recognize hexagon-*-ld.lld variants (#117338) If we create a cross toolchain with a ${triple}-ld.lld symlink, clang finds that symlink and when it uses it, it's not recognized as "lld". Let's resolve that symlink and consider it when determining lld-ness. For example, clang provides hexagon-link specific link arguments such as `-mcpu=hexagonv65` and `-march=hexagon` when hexagon-unknown-linux-musl-ld.lld is found. lld rejects this with the following error: hexagon-unknown-linux-musl-ld.lld: error: unknown emulation: cpu=hexagonv65 (cherry picked from commit 2dc0de753b6df83e35f3d98e0e6a26c95e3399c0) --- clang/lib/Driver/ToolChains/Hexagon.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index 29781399cbab4..be7851adecea6 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -294,9 +294,10 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles); bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs); bool UseG0 = false; - const char *Exec = Args.MakeArgString(HTC.GetLinkerPath()); - bool UseLLD = (llvm::sys::path::filename(Exec).equals_insensitive("ld.lld") || - llvm::sys::path::stem(Exec).equals_insensitive("ld.lld")); + bool UseLLD = false; + const char *Exec = Args.MakeArgString(HTC.GetLinkerPath(&UseLLD)); + UseLLD = UseLLD || llvm::sys::path::filename(Exec).ends_with("ld.lld") || + llvm::sys::path::stem(Exec).ends_with("ld.lld"); bool UseShared = IsShared && !IsStatic; StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); From c9e72b3945f1672f0507539ce903c9696b3479ba Mon Sep 17 00:00:00 2001 From: Tobias Hieta Date: Fri, 13 Dec 2024 14:49:03 +0100 Subject: [PATCH 26/38] Bump version to 19.1.6 --- cmake/Modules/LLVMVersion.cmake | 2 +- libcxx/include/__config | 2 +- llvm/utils/gn/secondary/llvm/version.gni | 2 +- llvm/utils/lit/lit/__init__.py | 2 +- llvm/utils/mlgo-utils/mlgo/__init__.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake index 9b39550118c49..93d36736439b1 100644 --- a/cmake/Modules/LLVMVersion.cmake +++ b/cmake/Modules/LLVMVersion.cmake @@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR) set(LLVM_VERSION_MINOR 1) endif() if(NOT DEFINED LLVM_VERSION_PATCH) - set(LLVM_VERSION_PATCH 5) + set(LLVM_VERSION_PATCH 6) endif() if(NOT DEFINED LLVM_VERSION_SUFFIX) set(LLVM_VERSION_SUFFIX) diff --git a/libcxx/include/__config b/libcxx/include/__config index 33e0043136fee..e97669bca411e 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -27,7 +27,7 @@ // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM. // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is // defined to XXYYZZ. -# define _LIBCPP_VERSION 190105 +# define _LIBCPP_VERSION 190106 # define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y # define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni index c32a040dcba67..c46d2abdb8ef2 100644 --- a/llvm/utils/gn/secondary/llvm/version.gni +++ b/llvm/utils/gn/secondary/llvm/version.gni @@ -1,4 +1,4 @@ llvm_version_major = 19 llvm_version_minor = 1 -llvm_version_patch = 5 +llvm_version_patch = 6 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch" diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py index 8557e5a061d1d..ee0a3b2240e1e 100644 --- a/llvm/utils/lit/lit/__init__.py +++ b/llvm/utils/lit/lit/__init__.py @@ -2,7 +2,7 @@ __author__ = "Daniel Dunbar" __email__ = "daniel@minormatter.com" -__versioninfo__ = (19, 1, 5) +__versioninfo__ = (19, 1, 6) __version__ = ".".join(str(v) for v in __versioninfo__) + "dev" __all__ = [] diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py index e2f8ca88d91ec..cec9ca8b2f648 100644 --- a/llvm/utils/mlgo-utils/mlgo/__init__.py +++ b/llvm/utils/mlgo-utils/mlgo/__init__.py @@ -4,7 +4,7 @@ from datetime import timezone, datetime -__versioninfo__ = (19, 1, 5) +__versioninfo__ = (19, 1, 6) __version__ = ( ".".join(str(v) for v in __versioninfo__) + "dev" From 3d21a9a8e4ecd4a88acfa80700cebfdcd35dde6c Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 6 Dec 2024 18:34:59 -0800 Subject: [PATCH 27/38] [Clang][perf-training] Fix clean command in perf-helper.py (#118978) The first path argument was always being ignored, and since most calls to this command only passed one path, it wasn't actually doing anything in most cases. This bug was introduced by dd0356d741aefa25ece973d6cc4b55dcb73b84b4. (cherry picked from commit 18af3fc1bf8855e1e166e64a9210ed07d610aa54) --- clang/utils/perf-training/perf-helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py index 3ed42a187fd80..d76c6ede3fe5a 100644 --- a/clang/utils/perf-training/perf-helper.py +++ b/clang/utils/perf-training/perf-helper.py @@ -36,7 +36,7 @@ def clean(args): + "\tRemoves all files with extension from ." ) return 1 - for path in args[1:-1]: + for path in args[0:-1]: for filename in findFilesWithExtension(path, args[-1]): os.remove(filename) return 0 From d2953ab0a79f9f2d2877703dbc235b92541faec9 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Mon, 9 Sep 2024 09:28:08 -0700 Subject: [PATCH 28/38] [lld][WebAssembly] Fix use of uninitialized stack data with --wasm64 (#107780) In the case of `--wasm64` we were setting the type of the init expression to be 64-bit but were only setting the low 32-bits of the value (by assigning to Int32). Fixes: https://github.com/emscripten-core/emscripten/issues/22538 (cherry picked from commit 5c8fd1eece8fff69871cef57a2363dc0f734a7d1) --- lld/wasm/SyntheticSections.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index f02f55519a251..72d08b849d8e8 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -587,8 +587,7 @@ void ElemSection::writeBody() { initExpr.Inst.Value.Global = WasmSym::tableBase->getGlobalIndex(); } else { bool is64 = config->is64.value_or(false); - initExpr.Inst.Opcode = is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST; - initExpr.Inst.Value.Int32 = config->tableBase; + initExpr = intConst(config->tableBase, is64); } writeInitExpr(os, initExpr); From be6e2e72cb51f95c852e6d6567c8b2d066dfd19e Mon Sep 17 00:00:00 2001 From: estewart08 Date: Mon, 19 Aug 2024 17:59:21 -0500 Subject: [PATCH 29/38] [offload] - Fix issue with standalone debug offload build (#104647) Error: CommandLine Error: Option 'attributor-manifest-internal' registered more than once During the standalone debug build of offload the above error is seen at app runtime when using a prebuilt llvm with LLVM_LINK_LLVM_DYLIB=ON. This is caused by linking both libLLVM.so and various archives that are found via llvm_map_components_to_libnames for jit support. --- offload/plugins-nextgen/common/CMakeLists.txt | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt index 284f98875170c..aea20c6ec3143 100644 --- a/offload/plugins-nextgen/common/CMakeLists.txt +++ b/offload/plugins-nextgen/common/CMakeLists.txt @@ -11,13 +11,15 @@ add_dependencies(PluginCommon intrinsics_gen) # Only enable JIT for those targets that LLVM can support. set(supported_jit_targets AMDGPU NVPTX) -foreach(target IN LISTS supported_jit_targets) - if("${target}" IN_LIST LLVM_TARGETS_TO_BUILD) - target_compile_definitions(PluginCommon PRIVATE "LIBOMPTARGET_JIT_${target}") - llvm_map_components_to_libnames(llvm_libs ${target}) - target_link_libraries(PluginCommon PRIVATE ${llvm_libs}) - endif() -endforeach() +if (NOT LLVM_LINK_LLVM_DYLIB) + foreach(target IN LISTS supported_jit_targets) + if("${target}" IN_LIST LLVM_TARGETS_TO_BUILD) + target_compile_definitions(PluginCommon PRIVATE "LIBOMPTARGET_JIT_${target}") + llvm_map_components_to_libnames(llvm_libs ${target}) + target_link_libraries(PluginCommon PRIVATE ${llvm_libs}) + endif() + endforeach() +endif() # Include the RPC server from the `libc` project if availible. if(TARGET llvmlibc_rpc_server AND ${LIBOMPTARGET_GPU_LIBC_SUPPORT}) From e3b2d0149ffadef53fe8165f55e648466c025416 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 30 Nov 2024 12:32:57 +0100 Subject: [PATCH 30/38] [offload] Include CheckCXXCompilerFlag in CMake Include CheckCXXCompilerFlag before it is used in the top-level CMake file. This fixes standalone builds, but it is also cleaner than assuming that some previous CMake file will include it for us. --- offload/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt index 4cd97a6a5ff63..3ba892c76c8b8 100644 --- a/offload/CMakeLists.txt +++ b/offload/CMakeLists.txt @@ -127,6 +127,7 @@ include(LibomptargetGetDependencies) # Set up testing infrastructure. include(OpenMPTesting) +include(CheckCXXCompilerFlag) check_cxx_compiler_flag(-Werror=global-constructors OFFLOAD_HAVE_WERROR_CTOR) # LLVM source tree is required at build time for libomptarget From 5ce79b5045ae2e362fbd7b28351b10634d05b0f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 30 Nov 2024 15:32:39 +0100 Subject: [PATCH 31/38] [offload] [test] Add "omp" test dependency only when present Add the `omp` target dependency to tests only when the respective target is present, i.e. when not building standalone against system libomp. --- offload/test/CMakeLists.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt index 3ac5d7907e2cc..c90ed26389faf 100644 --- a/offload/test/CMakeLists.txt +++ b/offload/test/CMakeLists.txt @@ -22,6 +22,11 @@ if(CUDAToolkit_FOUND) get_filename_component(CUDA_LIBDIR "${CUDA_cudart_static_LIBRARY}" DIRECTORY) endif() +set(OMP_DEPEND) +if(TARGET omp) + set(OMP_DEPEND omp) +endif() + string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}") foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS) string(STRIP "${CURRENT_TARGET}" CURRENT_TARGET) @@ -29,7 +34,7 @@ foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS) add_offload_testsuite(check-libomptarget-${CURRENT_TARGET} "Running libomptarget tests" ${CMAKE_CURRENT_BINARY_DIR}/${CURRENT_TARGET} - DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) list(APPEND LIBOMPTARGET_LIT_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CURRENT_TARGET}) @@ -43,12 +48,12 @@ add_offload_testsuite(check-libomptarget "Running libomptarget tests" ${LIBOMPTARGET_LIT_TESTSUITES} EXCLUDE_FROM_CHECK_ALL - DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) add_offload_testsuite(check-offload "Running libomptarget tests" ${LIBOMPTARGET_LIT_TESTSUITES} EXCLUDE_FROM_CHECK_ALL - DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) From 909104bb5a0540a3bffaa4a816efc1c54be81969 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 30 Nov 2024 15:39:53 +0100 Subject: [PATCH 32/38] [offload] Define OPENMP_TEST*_FLAGS in standalone builds Copy the `OPENMP_TEST_FLAGS` and `OPENMP_TEST_OPENMP_FLAGS` from openmp for standalone builds, as required by the lit configs. --- offload/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt index 3ba892c76c8b8..752434c6a28e1 100644 --- a/offload/CMakeLists.txt +++ b/offload/CMakeLists.txt @@ -283,6 +283,11 @@ if(OPENMP_STANDALONE_BUILD) ${LLVM_LIBRARY_DIRS} REQUIRED ) + + set(OPENMP_TEST_FLAGS "" CACHE STRING + "Extra compiler flags to send to the test compiler.") + set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING + "OpenMP compiler flag to use for testing OpenMP runtime libraries.") endif() macro(pythonize_bool var) From b70bf59262cbae12728bd866506debff5c89eb4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sat, 30 Nov 2024 15:44:37 +0100 Subject: [PATCH 33/38] [offload] Define LIBOMPTARGET_OPENMP_*_FOLDER in standalone builds Define `LIBOMPTARGET_OPENMP_HEADER_FOLDER` and `LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER` needed for testing in standalone builds. --- offload/CMakeLists.txt | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt index 752434c6a28e1..959d6260bc749 100644 --- a/offload/CMakeLists.txt +++ b/offload/CMakeLists.txt @@ -284,10 +284,25 @@ if(OPENMP_STANDALONE_BUILD) REQUIRED ) + find_path ( + LIBOMP_INCLUDE_DIR + NAMES + omp.h + HINTS + ${COMPILER_RESOURCE_DIR}/include + ${CMAKE_INSTALL_PREFIX}/include + ) + + get_filename_component(LIBOMP_LIBRARY_DIR ${LIBOMP_STANDALONE} DIRECTORY) + set(OPENMP_TEST_FLAGS "" CACHE STRING "Extra compiler flags to send to the test compiler.") set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING "OpenMP compiler flag to use for testing OpenMP runtime libraries.") + set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${LIBOMP_INCLUDE_DIR}" CACHE STRING + "Path to folder containing omp.h") + set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "${LIBOMP_LIBRARY_DIR}" CACHE STRING + "Path to folder containing libomp.so, and libLLVMSupport.so with profiling enabled") endif() macro(pythonize_bool var) From 2384a062fc1cc9d115c537f0cfe36ae6028e7bfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= Date: Sun, 1 Dec 2024 16:26:46 +0100 Subject: [PATCH 34/38] [cmake] Use DetectTestCompiler from openmp directory Fix the DetectTestCompiler project use to reference the openmp source tree, since the respective files were not copied to offload, and there is no point in duplicating them. Fixes #90333 --- offload/cmake/OpenMPTesting.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/cmake/OpenMPTesting.cmake b/offload/cmake/OpenMPTesting.cmake index 11eafeb764260..3e04a3423c4d6 100644 --- a/offload/cmake/OpenMPTesting.cmake +++ b/offload/cmake/OpenMPTesting.cmake @@ -124,7 +124,7 @@ if (${OPENMP_STANDALONE_BUILD}) # project is built which is too late for detecting the compiler... file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler) execute_process( - COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_LIST_DIR}/DetectTestCompiler + COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_SOURCE_DIR}/../openmp/cmake/DetectTestCompiler -DCMAKE_C_COMPILER=${OPENMP_TEST_C_COMPILER} -DCMAKE_CXX_COMPILER=${OPENMP_TEST_CXX_COMPILER} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler From 8069ce6ab1766a166b956192ad72c143be3b860e Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 4 Dec 2024 21:12:15 +0000 Subject: [PATCH 35/38] [WebAssembly] Support multiple `.init_array` fragments when writing Wasm objects (#111008) (cherry picked from commit ac5dd455caaf286625f61b604291f2eaed9702f0) --- llvm/lib/MC/WasmObjectWriter.cpp | 89 ++++++++++++++------------- llvm/test/MC/WebAssembly/init-array.s | 49 +++++++++++++++ 2 files changed, 96 insertions(+), 42 deletions(-) create mode 100644 llvm/test/MC/WebAssembly/init-array.s diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index f25dc92fa235a..a66c5713ff8a6 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1853,49 +1853,54 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, if (EmptyFrag.getKind() != MCFragment::FT_Data) report_fatal_error(".init_array section should be aligned"); - const MCFragment &AlignFrag = *EmptyFrag.getNext(); - if (AlignFrag.getKind() != MCFragment::FT_Align) - report_fatal_error(".init_array section should be aligned"); - if (cast(AlignFrag).getAlignment() != - Align(is64Bit() ? 8 : 4)) - report_fatal_error(".init_array section should be aligned for pointers"); - - const MCFragment &Frag = *AlignFrag.getNext(); - if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) - report_fatal_error("only data supported in .init_array section"); - - uint16_t Priority = UINT16_MAX; - unsigned PrefixLength = strlen(".init_array"); - if (WS.getName().size() > PrefixLength) { - if (WS.getName()[PrefixLength] != '.') + const MCFragment *nextFrag = EmptyFrag.getNext(); + while (nextFrag != nullptr) { + const MCFragment &AlignFrag = *nextFrag; + if (AlignFrag.getKind() != MCFragment::FT_Align) + report_fatal_error(".init_array section should be aligned"); + if (cast(AlignFrag).getAlignment() != + Align(is64Bit() ? 8 : 4)) report_fatal_error( - ".init_array section priority should start with '.'"); - if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority)) - report_fatal_error("invalid .init_array section priority"); - } - const auto &DataFrag = cast(Frag); - const SmallVectorImpl &Contents = DataFrag.getContents(); - for (const uint8_t * - P = (const uint8_t *)Contents.data(), - *End = (const uint8_t *)Contents.data() + Contents.size(); - P != End; ++P) { - if (*P != 0) - report_fatal_error("non-symbolic data in .init_array section"); - } - for (const MCFixup &Fixup : DataFrag.getFixups()) { - assert(Fixup.getKind() == - MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); - const MCExpr *Expr = Fixup.getValue(); - auto *SymRef = dyn_cast(Expr); - if (!SymRef) - report_fatal_error("fixups in .init_array should be symbol references"); - const auto &TargetSym = cast(SymRef->getSymbol()); - if (TargetSym.getIndex() == InvalidIndex) - report_fatal_error("symbols in .init_array should exist in symtab"); - if (!TargetSym.isFunction()) - report_fatal_error("symbols in .init_array should be for functions"); - InitFuncs.push_back( - std::make_pair(Priority, TargetSym.getIndex())); + ".init_array section should be aligned for pointers"); + + const MCFragment &Frag = *AlignFrag.getNext(); + nextFrag = Frag.getNext(); + if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data) + report_fatal_error("only data supported in .init_array section"); + + uint16_t Priority = UINT16_MAX; + unsigned PrefixLength = strlen(".init_array"); + if (WS.getName().size() > PrefixLength) { + if (WS.getName()[PrefixLength] != '.') + report_fatal_error( + ".init_array section priority should start with '.'"); + if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority)) + report_fatal_error("invalid .init_array section priority"); + } + const auto &DataFrag = cast(Frag); + const SmallVectorImpl &Contents = DataFrag.getContents(); + for (const uint8_t * + P = (const uint8_t *)Contents.data(), + *End = (const uint8_t *)Contents.data() + Contents.size(); + P != End; ++P) { + if (*P != 0) + report_fatal_error("non-symbolic data in .init_array section"); + } + for (const MCFixup &Fixup : DataFrag.getFixups()) { + assert(Fixup.getKind() == + MCFixup::getKindForSize(is64Bit() ? 8 : 4, false)); + const MCExpr *Expr = Fixup.getValue(); + auto *SymRef = dyn_cast(Expr); + if (!SymRef) + report_fatal_error( + "fixups in .init_array should be symbol references"); + const auto &TargetSym = cast(SymRef->getSymbol()); + if (TargetSym.getIndex() == InvalidIndex) + report_fatal_error("symbols in .init_array should exist in symtab"); + if (!TargetSym.isFunction()) + report_fatal_error("symbols in .init_array should be for functions"); + InitFuncs.push_back(std::make_pair(Priority, TargetSym.getIndex())); + } } } diff --git a/llvm/test/MC/WebAssembly/init-array.s b/llvm/test/MC/WebAssembly/init-array.s new file mode 100644 index 0000000000000..e79fb453ec12a --- /dev/null +++ b/llvm/test/MC/WebAssembly/init-array.s @@ -0,0 +1,49 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck %s + +init1: + .functype init1 () -> () + end_function + +init2: + .functype init2 () -> () + end_function + + .section .init_array,"",@ + .p2align 2, 0 + .int32 init1 + + .section .init_array,"",@ + .p2align 2 + .int32 init2 + +# CHECK: - Type: FUNCTION +# CHECK-NEXT: FunctionTypes: [ 0, 0 ] +# CHECK-NEXT: - Type: CODE +# CHECK-NEXT: Functions: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Type: CUSTOM +# CHECK-NEXT: Name: linking +# CHECK-NEXT: Version: 2 +# CHECK-NEXT: SymbolTable: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init1 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 0 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init2 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 1 +# CHECK-NEXT: InitFunctions: +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 1 +# CHECK-NEXT: ... +# From 8e9465bd150bcbe2d200efc7c0e31abf8ddebe58 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Tue, 10 Dec 2024 16:28:18 +0000 Subject: [PATCH 36/38] [WebAssembly] Handle symbols in `.init_array` sections (#119127) Follow on from #111008. (cherry picked from commit ed91843d435d0cd2c39ebb1a50f2907c621f07ed) --- llvm/lib/MC/WasmObjectWriter.cpp | 22 ++++- llvm/test/MC/WebAssembly/init-array-label.s | 91 +++++++++++++++++++++ 2 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 llvm/test/MC/WebAssembly/init-array-label.s diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp index a66c5713ff8a6..8526469245676 100644 --- a/llvm/lib/MC/WasmObjectWriter.cpp +++ b/llvm/lib/MC/WasmObjectWriter.cpp @@ -1326,6 +1326,22 @@ static bool isInSymtab(const MCSymbolWasm &Sym) { return true; } +static bool isSectionReferenced(MCAssembler &Asm, MCSectionWasm &Section) { + StringRef SectionName = Section.getName(); + + for (const MCSymbol &S : Asm.symbols()) { + const auto &WS = static_cast(S); + if (WS.isData() && WS.isInSection()) { + auto &RefSection = static_cast(WS.getSection()); + if (RefSection.getName() == SectionName) { + return true; + } + } + } + + return false; +} + void WasmObjectWriter::prepareImports( SmallVectorImpl &Imports, MCAssembler &Asm) { // For now, always emit the memory import, since loads and stores are not @@ -1482,8 +1498,10 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm, LLVM_DEBUG(dbgs() << "Processing Section " << SectionName << " group " << Section.getGroup() << "\n";); - // .init_array sections are handled specially elsewhere. - if (SectionName.starts_with(".init_array")) + // .init_array sections are handled specially elsewhere, include them in + // data segments if and only if referenced by a symbol. + if (SectionName.starts_with(".init_array") && + !isSectionReferenced(Asm, Section)) continue; // Code is handled separately diff --git a/llvm/test/MC/WebAssembly/init-array-label.s b/llvm/test/MC/WebAssembly/init-array-label.s new file mode 100644 index 0000000000000..0b4a5ea2da0b5 --- /dev/null +++ b/llvm/test/MC/WebAssembly/init-array-label.s @@ -0,0 +1,91 @@ +# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck %s + +init1: + .functype init1 () -> () + end_function + +init2: + .functype init2 () -> () + end_function + + .section .init_array.42,"",@ + .p2align 2, 0x0 + .int32 init1 + + .section .init_array,"",@ + .globl p_init1 + .p2align 2, 0x0 +p_init1: + .int32 init1 + .size p_init1, 4 + + .section .init_array,"",@ + .globl p_init2 + .p2align 2, 0x0 +p_init2: + .int32 init1 + .int32 init2 + .size p_init2, 8 + +# CHECK: - Type: FUNCTION +# CHECK-NEXT: FunctionTypes: [ 0, 0 ] +# CHECK-NEXT: - Type: DATACOUNT +# CHECK-NEXT: Count: 1 +# CHECK-NEXT: - Type: CODE +# CHECK-NEXT: Functions: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Locals: [] +# CHECK-NEXT: Body: 0B +# CHECK-NEXT: - Type: DATA +# CHECK-NEXT: Segments: +# CHECK-NEXT: - SectionOffset: 6 +# CHECK-NEXT: InitFlags: 0 +# CHECK-NEXT: Offset: +# CHECK-NEXT: Opcode: I32_CONST +# CHECK-NEXT: Value: 0 +# CHECK-NEXT: Content: '000000000000000000000000' +# CHECK-NEXT: - Type: CUSTOM +# CHECK-NEXT: Name: linking +# CHECK-NEXT: Version: 2 +# CHECK-NEXT: SymbolTable: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init1 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 0 +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Kind: FUNCTION +# CHECK-NEXT: Name: init2 +# CHECK-NEXT: Flags: [ BINDING_LOCAL ] +# CHECK-NEXT: Function: 1 +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Kind: DATA +# CHECK-NEXT: Name: p_init1 +# CHECK-NEXT: Flags: [ ] +# CHECK-NEXT: Segment: 0 +# CHECK-NEXT: Size: 4 +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Kind: DATA +# CHECK-NEXT: Name: p_init2 +# CHECK-NEXT: Flags: [ ] +# CHECK-NEXT: Segment: 0 +# CHECK-NEXT: Offset: 4 +# CHECK-NEXT: Size: 8 +# CHECK-NEXT: SegmentInfo: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: .init_array +# CHECK-NEXT: Alignment: 2 +# CHECK-NEXT: Flags: [ ] +# CHECK-NEXT: InitFunctions: +# CHECK-NEXT: - Priority: 42 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 0 +# CHECK-NEXT: - Priority: 65535 +# CHECK-NEXT: Symbol: 1 +# CHECK-NEXT: ... From 657e03f8625c9e7326f241aea584ff5c43b30ba3 Mon Sep 17 00:00:00 2001 From: Antonio Frighetto Date: Wed, 4 Dec 2024 10:15:11 +0100 Subject: [PATCH 37/38] [AggressiveInstCombine] Use APInt and avoid truncation when folding loads A miscompilation issue has been addressed with improved handling. Fixes: https://github.com/llvm/llvm-project/issues/118467. (cherry picked from commit f68b0e36997322eeda8fd199ea80deb1b49c5410) --- .../AggressiveInstCombine.cpp | 3 +- .../AggressiveInstCombine/AArch64/or-load.ll | 20 +++---- .../AggressiveInstCombine/X86/or-load.ll | 52 +++++++++++++++---- 3 files changed, 53 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index d5a38ec17a2a8..1d23ec8ced204 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -811,8 +811,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0); Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets( DL, Offset1, /* AllowNonInbounds */ true); - Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, - Builder.getInt32(Offset1.getZExtValue())); + Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1)); } // Generate wider load. NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(), diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll index 1400ee7f703ca..10c4c9b0ca4c9 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll @@ -1121,19 +1121,19 @@ entry: define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) { ; LE-LABEL: @loadCombine_4consecutive_metadata( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0 -; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope [[META0:![0-9]+]] +; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_metadata( ; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 ; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 -; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META0:![0-9]+]] +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope [[META0]] +; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 ; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 ; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 @@ -1869,7 +1869,7 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert3( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 ; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; @@ -2088,7 +2088,7 @@ define i32 @loadCombine_4consecutive_badinsert6(ptr %p) { define void @nested_gep(ptr %p, ptr %dest) { ; LE-LABEL: @nested_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 @@ -2128,7 +2128,7 @@ define void @nested_gep(ptr %p, ptr %dest) { define void @bitcast_gep(ptr %p, ptr %dest) { ; LE-LABEL: @bitcast_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll index 0aa6f9ecdf884..1b53c8f71222b 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll @@ -1205,19 +1205,19 @@ entry: define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) { ; LE-LABEL: @loadCombine_4consecutive_metadata( -; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0 -; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; LE-NEXT: [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope [[META0:![0-9]+]] +; LE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; LE-NEXT: ret i32 [[L1]] ; ; BE-LABEL: @loadCombine_4consecutive_metadata( ; BE-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 ; BE-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2 ; BE-NEXT: [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3 -; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0 -; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0 -; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0 -; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0 -; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0 +; BE-NEXT: [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META0:![0-9]+]] +; BE-NEXT: [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope [[META0]] +; BE-NEXT: [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope [[META0]] +; BE-NEXT: store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]] ; BE-NEXT: [[E1:%.*]] = zext i8 [[L1]] to i32 ; BE-NEXT: [[E2:%.*]] = zext i8 [[L2]] to i32 ; BE-NEXT: [[E3:%.*]] = zext i8 [[L3]] to i32 @@ -2005,7 +2005,7 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) { define i32 @loadCombine_4consecutive_badinsert3(ptr %p) { ; LE-LABEL: @loadCombine_4consecutive_badinsert3( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1 ; LE-NEXT: [[L1:%.*]] = load i32, ptr [[TMP1]], align 1 ; LE-NEXT: ret i32 [[L1]] ; @@ -2306,7 +2306,7 @@ define i64 @loadCombine_nonConstShift2(ptr %arg, i8 %b) { define void @nested_gep(ptr %p, ptr %dest) { ; LE-LABEL: @nested_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 @@ -2346,7 +2346,7 @@ define void @nested_gep(ptr %p, ptr %dest) { define void @bitcast_gep(ptr %p, ptr %dest) { ; LE-LABEL: @bitcast_gep( -; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68 +; LE-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68 ; LE-NEXT: [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4 ; LE-NEXT: [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32 ; LE-NEXT: store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4 @@ -2382,3 +2382,35 @@ define void @bitcast_gep(ptr %p, ptr %dest) { store i32 %trunc, ptr %dest, align 4 ret void } + +define i32 @loadcombine_consecutive_idx_64(ptr %data) { +; LE-LABEL: @loadcombine_consecutive_idx_64( +; LE-NEXT: entry: +; LE-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DATA:%.*]], i64 2149675576 +; LE-NEXT: [[VAL_2:%.*]] = load i16, ptr [[TMP0]], align 1 +; LE-NEXT: [[TMP1:%.*]] = zext i16 [[VAL_2]] to i32 +; LE-NEXT: ret i32 [[TMP1]] +; +; BE-LABEL: @loadcombine_consecutive_idx_64( +; BE-NEXT: entry: +; BE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA:%.*]], i64 2149675577 +; BE-NEXT: [[VAL:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; BE-NEXT: [[CONV:%.*]] = zext i8 [[VAL]] to i32 +; BE-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2149675576 +; BE-NEXT: [[VAL_2:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1 +; BE-NEXT: [[CONV_2:%.*]] = zext i8 [[VAL_2]] to i32 +; BE-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV]], 8 +; BE-NEXT: [[OR:%.*]] = or disjoint i32 [[SHL]], [[CONV_2]] +; BE-NEXT: ret i32 [[OR]] +; +entry: + %arrayidx = getelementptr inbounds nuw i8, ptr %data, i64 2149675577 + %val = load i8, ptr %arrayidx, align 1 + %conv = zext i8 %val to i32 + %arrayidx.2 = getelementptr inbounds nuw i8, ptr %data, i64 2149675576 + %val.2 = load i8, ptr %arrayidx.2, align 1 + %conv.2 = zext i8 %val.2 to i32 + %shl = shl nuw nsw i32 %conv, 8 + %or = or disjoint i32 %shl, %conv.2 + ret i32 %or +} From e21dc4bd5474d04b8e62d7331362edcc5648d7e5 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 27 Nov 2024 11:47:22 +0100 Subject: [PATCH 38/38] [SimpleLoopUnswitch] Fix LCSSA phi node invalidation Fixes https://github.com/llvm/llvm-project/issues/117537. (cherry picked from commit fc5c89900f2a4b50e0f3a88ef7c89115d93684f4) --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 5 +- .../Transforms/SimpleLoopUnswitch/pr117537.ll | 92 +++++++++++++++++++ 2 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index c235d2fb2a5bd..f99f4487c5540 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -1249,8 +1249,9 @@ static BasicBlock *buildClonedLoopBlocks( assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!"); // Forget SCEVs based on exit phis in case SCEV looked through the phi. - if (SE && isa(I)) - SE->forgetValue(&I); + if (SE) + if (auto *PN = dyn_cast(&I)) + SE->forgetLcssaPhiWithNewPredecessor(&L, PN); BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt(); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll new file mode 100644 index 0000000000000..fd61cfab164d3 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll @@ -0,0 +1,92 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes='print,simple-loop-unswitch,print' -verify-scev < %s 2>/dev/null | FileCheck %s + +; Make sure we don't assert due to insufficient SCEV invalidation. + +define void @test(ptr %p) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CHECK:%.*]] = icmp eq ptr [[P]], null +; CHECK-NEXT: br i1 [[CHECK]], label %[[ENTRY_SPLIT_US:.*]], label %[[ENTRY_SPLIT:.*]] +; CHECK: [[ENTRY_SPLIT_US]]: +; CHECK-NEXT: br label %[[BB0_US:.*]] +; CHECK: [[BB0_US]]: +; CHECK-NEXT: br label %[[LOOP0_US:.*]] +; CHECK: [[LOOP0_US]]: +; CHECK-NEXT: [[V_US:%.*]] = load atomic i32, ptr [[P]] unordered, align 8 +; CHECK-NEXT: [[ADD_US:%.*]] = add i32 [[V_US]], 3 +; CHECK-NEXT: br i1 true, label %[[PREHEADER_SPLIT_US:.*]], label %[[BB0_US]] +; CHECK: [[PREHEADER_SPLIT_US]]: +; CHECK-NEXT: [[ADD_LCSSA_US:%.*]] = phi i32 [ [[ADD_US]], %[[LOOP0_US]] ] +; CHECK-NEXT: br label %[[PREHEADER:.*]] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: br label %[[BB0:.*]] +; CHECK: [[BB0]]: +; CHECK-NEXT: br label %[[LATCH:.*]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: br i1 false, label %[[EXIT0:.*]], label %[[LOOP0:.*]] +; CHECK: [[EXIT0]]: +; CHECK-NEXT: ret void +; CHECK: [[LOOP0]]: +; CHECK-NEXT: [[V:%.*]] = load atomic i32, ptr [[P]] unordered, align 8 +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[V]], 3 +; CHECK-NEXT: br i1 true, label %[[PREHEADER_SPLIT:.*]], label %[[BB0]] +; CHECK: [[PREHEADER_SPLIT]]: +; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[LOOP0]] ] +; CHECK-NEXT: br label %[[PREHEADER]] +; CHECK: [[PREHEADER]]: +; CHECK-NEXT: [[DOTUS_PHI:%.*]] = phi i32 [ [[ADD_LCSSA]], %[[PREHEADER_SPLIT]] ], [ [[ADD_LCSSA_US]], %[[PREHEADER_SPLIT_US]] ] +; CHECK-NEXT: br label %[[LOOP1:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ [[DOTUS_PHI]], %[[PREHEADER]] ], [ [[IV1_NEXT:%.*]], %[[BACKEDGE:.*]] ] +; CHECK-NEXT: [[IV1_NEXT]] = add i32 [[IV1]], -33 +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[BACKEDGE]]: +; CHECK-NEXT: br i1 true, label %[[EXIT1:.*]], label %[[LOOP1]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: [[IV0:%.*]] = phi i32 [ [[IV1]], %[[LOOP1]] ], [ [[IV0_NEXT:%.*]], %[[LOOP2]] ] +; CHECK-NEXT: [[IV0_NEXT]] = add nsw i32 [[IV0]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[IV0_NEXT]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[BACKEDGE]], label %[[LOOP2]] +; CHECK: [[EXIT1]]: +; CHECK-NEXT: ret void +; +entry: + %check = icmp eq ptr %p, null + br label %bb0 + +bb0: ; preds = %loop0, %entry + br i1 %check, label %loop0, label %latch + +latch: ; preds = %bb0 + br i1 %check, label %exit0, label %loop0 + +exit0: ; preds = %latch + ret void + +loop0: ; preds = %latch, %bb0 + %v = load atomic i32, ptr %p unordered, align 8 + %add = add i32 %v, 3 + br i1 true, label %preheader, label %bb0 + +preheader: ; preds = %loop0 + br label %loop1 + +loop1: ; preds = %backedge, %preheader + %iv1 = phi i32 [ %add, %preheader ], [ %iv1.next, %backedge ] + %iv1.next = add i32 %iv1, -33 + br label %loop2 + +backedge: ; preds = %loop2 + br i1 true, label %exit1, label %loop1 + +loop2: ; preds = %loop2, %loop1 + %iv0 = phi i32 [ %iv1, %loop1 ], [ %iv0.next, %loop2 ] + %iv0.next = add nsw i32 %iv0, 1 + %cmp = icmp sgt i32 %iv0.next, 0 + br i1 %cmp, label %backedge, label %loop2 + +exit1: ; preds = %backedge + ret void +}