From 51dee6b64fda7c49a68af78555135c8f51a308cf Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Tue, 19 Nov 2024 13:43:39 +0100
Subject: [PATCH 01/38] Bump version to 19.1.5

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 6ccb934aef436..9b39550118c49 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 4)
+  set(LLVM_VERSION_PATCH 5)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index a929db5d0f2d1..33e0043136fee 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190104
+#  define _LIBCPP_VERSION 190105
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 0c2804f70a147..c32a040dcba67 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 4
+llvm_version_patch = 5
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 81b74db977b08..8557e5a061d1d 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 4)
+__versioninfo__ = (19, 1, 5)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index 77fe60a0b1590..e2f8ca88d91ec 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 4)
+__versioninfo__ = (19, 1, 5)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From 02930b87faeb490505b22f588757a18744248b6f Mon Sep 17 00:00:00 2001
From: Manasij Mukherjee <manasij7479@gmail.com>
Date: Fri, 4 Oct 2024 15:15:30 -0600
Subject: [PATCH 02/38] [NVPTX] Promote v2i8 to v2i16 (#111189)

Promote v2i8 to v2i16, fixes a crash.
Re-enable a test in NVPTX/vector-returns.ll

Partial cherry-pick of fda2fea w/o the test which does not exist in release/19.x
https://github.com/llvm/llvm-project/issues/104864
---
 llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 6975412ce5d35..b2153a7afe736 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -229,6 +229,10 @@ static void ComputePTXValueVTs(const TargetLowering &TLI, const DataLayout &DL,
         // v*i8 are formally lowered as v4i8
         EltVT = MVT::v4i8;
         NumElts = (NumElts + 3) / 4;
+      } else if (EltVT.getSimpleVT() == MVT::i8 && NumElts == 2) {
+        // v2i8 is promoted to v2i16
+        NumElts = 1;
+        EltVT = MVT::v2i16;
       }
       for (unsigned j = 0; j != NumElts; ++j) {
         ValueVTs.push_back(EltVT);

From e032d7ad80f8d185626b39751f672932f92dc033 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Tue, 19 Nov 2024 13:37:40 +0530
Subject: [PATCH 03/38] [clang-repl] Improve flags responsible for generating
 shared wasm binaries (#116735)

There are a couple changes in this PR that help getting clang-repl to
run in the browser. Using a jupyterlite instance for the example pasted
below

1) Updating flags responsible for generating shared wasm binaries that
need to be dynamically loaded Most Importantly as can be seen in the
changes `shared` and `allow-undefined` are crucial.

![image](https://github.com/user-attachments/assets/1183fd44-8951-496a-899a-e4af39a48447)

2) While exiting we encounter this.

![image](https://github.com/user-attachments/assets/9487a3f4-7200-471d-ba88-09e98ccbc47a)

Now as can be seen here

https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/Interpreter.cpp#L421-L430

We call cleanUP in the destructor. Now cleanUP through
IncrementalExecutor tries to deinitialize the JIT which wasn't even
intialized as runCtors in wasm.cpp is a no-op

https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/IncrementalExecutor.cpp#L94-L101

https://github.com/llvm/llvm-project/blob/cd418030de7ae75750bc4e48d1238baf03c675e5/clang/lib/Interpreter/Wasm.cpp#L107-L109
(cherry picked from commit 752dbd6112affa418e33910ac08bf9921f9c270b)
---
 clang/lib/Interpreter/IncrementalExecutor.h |  2 +-
 clang/lib/Interpreter/Interpreter.cpp       |  1 -
 clang/lib/Interpreter/Wasm.cpp              | 10 ++++++++--
 clang/lib/Interpreter/Wasm.h                |  1 +
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h
index 7954cde36588b..dbd61f0b8b1eb 100644
--- a/clang/lib/Interpreter/IncrementalExecutor.h
+++ b/clang/lib/Interpreter/IncrementalExecutor.h
@@ -56,7 +56,7 @@ class IncrementalExecutor {
   virtual llvm::Error addModule(PartialTranslationUnit &PTU);
   virtual llvm::Error removeModule(PartialTranslationUnit &PTU);
   virtual llvm::Error runCtors() const;
-  llvm::Error cleanUp();
+  virtual llvm::Error cleanUp();
   llvm::Expected<llvm::orc::ExecutorAddr>
   getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const;
 
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index b4882ab5d2236..c0b8bfebb4343 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -192,7 +192,6 @@ IncrementalCompilerBuilder::CreateCpp() {
 #ifdef __EMSCRIPTEN__
   Argv.push_back("-target");
   Argv.push_back("wasm32-unknown-emscripten");
-  Argv.push_back("-pie");
   Argv.push_back("-shared");
 #endif
   Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());
diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp
index 1001410aa0f27..79efbaa03982d 100644
--- a/clang/lib/Interpreter/Wasm.cpp
+++ b/clang/lib/Interpreter/Wasm.cpp
@@ -72,13 +72,13 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) {
   OutputFile.close();
 
   std::vector<const char *> LinkerArgs = {"wasm-ld",
-                                          "-pie",
+                                          "-shared",
                                           "--import-memory",
                                           "--no-entry",
                                           "--export-all",
                                           "--experimental-pic",
-                                          "--no-export-dynamic",
                                           "--stack-first",
+                                          "--allow-undefined",
                                           OutputFileName.c_str(),
                                           "-o",
                                           OutputFileName.c_str()};
@@ -109,6 +109,12 @@ llvm::Error WasmIncrementalExecutor::runCtors() const {
   return llvm::Error::success();
 }
 
+llvm::Error WasmIncrementalExecutor::cleanUp() const {
+  // Can't call cleanUp through IncrementalExecutor as it
+  // tries to deinitialize JIT which hasn't been initialized
+  return llvm::Error::success();
+}
+
 WasmIncrementalExecutor::~WasmIncrementalExecutor() = default;
 
 } // namespace clang
diff --git a/clang/lib/Interpreter/Wasm.h b/clang/lib/Interpreter/Wasm.h
index b1fd88024f14d..4632613326d39 100644
--- a/clang/lib/Interpreter/Wasm.h
+++ b/clang/lib/Interpreter/Wasm.h
@@ -28,6 +28,7 @@ class WasmIncrementalExecutor : public IncrementalExecutor {
   llvm::Error addModule(PartialTranslationUnit &PTU) override;
   llvm::Error removeModule(PartialTranslationUnit &PTU) override;
   llvm::Error runCtors() const override;
+  llvm::Error cleanUp() override;
 
   ~WasmIncrementalExecutor() override;
 };

From fb6b195cae03ba6e5b50870031d710ca6886c5bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 20 Oct 2024 13:51:50 +0300
Subject: [PATCH 04/38] [compiler-rt] [test] Remove an unintended grep
 parameter

This parameter seems unintentional here; we're trying to grep
the input on stdin, from the earlier stage in the pipeline.

Since a recent update on Github Actions runners, the previous
form (grepping a file, while piping in data on stdin) would fail
running the test, with the test runner Python script throwing
an exception when evaluating it:

      File "D:\a\llvm-mingw\llvm-mingw\llvm-project\llvm\utils\lit\lit\TestRunner.py", line 935, in _executeShCmd
        out = procs[i].stdout.read()
              ^^^^^^^^^^^^^^^^^^^^^^
      File "C:\hostedtoolcache\windows\Python\3.12.7\x64\Lib\encodings\cp1252.py", line 23, in decode
        return codecs.charmap_decode(input,self.errors,decoding_table)[0]
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    TypeError: a bytes-like object is required, not 'NoneType'

(cherry picked from commit c2717a89b8437d041d532c7b2c535ca4f4b35872)
---
 compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp
index 9277fe0b23516..38e99cf685945 100644
--- a/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp
+++ b/compiler-rt/test/asan/TestCases/Windows/delay_dbghelp.cpp
@@ -9,7 +9,7 @@
 // static build, there won't be any clang_rt DLLs.
 // RUN: not grep cl""ang_rt %t || \
 // RUN:   grep cl""ang_rt %t | xargs which | \
-// RUN:   xargs llvm-readobj --coff-imports | not grep dbghelp.dll %t
+// RUN:   xargs llvm-readobj --coff-imports | not grep dbghelp.dll
 
 extern "C" int puts(const char *);
 

From ea960400213ad6a619d40536ae8965fca70eac89 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Sat, 16 Nov 2024 20:55:33 -0800
Subject: [PATCH 05/38] [Mips] Change vsplat_imm_eq_1 to a ComplexPattern.
 (#116471)

Resolves a FIXME and avoids needing to workaround #116075.

Adding parentheses around the (vsplat_imm_eq_1) fixes the error cited in
the FIXME by changing the ComplexPattern from a leaf node to an
operator.

(cherry picked from commit 2f4572f5e7e2d7f4626e825404c11f07d191fb05)
---
 llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp   |  4 ++
 llvm/lib/Target/Mips/MipsISelDAGToDAG.h     |  3 ++
 llvm/lib/Target/Mips/MipsMSAInstrInfo.td    | 59 ++++++++-------------
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 12 +++++
 llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h   |  3 ++
 5 files changed, 43 insertions(+), 38 deletions(-)

diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f6f32fde3b777..a9ffd2bedf21e 100644
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -220,6 +220,10 @@ bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const {
   return false;
 }
 
+bool MipsDAGToDAGISel::selectVSplatImmEq1(SDValue N) const {
+  llvm_unreachable("Unimplemented function.");
+}
+
 /// Convert vector addition with vector subtraction if that allows to encode
 /// constant as an immediate and thus avoid extra 'ldi' instruction.
 /// add X, <-1, -1...> --> sub X, <1, 1...>
diff --git a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index 6135f96807854..3485300a782c9 100644
--- a/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -120,6 +120,9 @@ class MipsDAGToDAGISel : public SelectionDAGISel {
   /// starting at bit zero.
   virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const;
 
+  /// Select constant vector splats whose value is 1.
+  virtual bool selectVSplatImmEq1(SDValue N) const;
+
   /// Convert vector addition with vector subtraction if that allows to encode
   /// constant as an immediate and thus avoid extra 'ldi' instruction.
   /// add X, <-1, -1...> --> sub X, <1, 1...>
diff --git a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index c4abccb24c6f3..f4c32c9dcd421 100644
--- a/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -198,14 +198,8 @@ def vsplati32 : PatFrag<(ops node:$e0),
                         (v4i32 (build_vector node:$e0, node:$e0,
                                              node:$e0, node:$e0))>;
 
-def vsplati64_imm_eq_1 : PatLeaf<(bitconvert (v4i32 (build_vector))), [{
-  APInt Imm;
-  SDNode *BV = N->getOperand(0).getNode();
-  EVT EltTy = N->getValueType(0).getVectorElementType();
-
-  return selectVSplat(BV, Imm, EltTy.getSizeInBits()) &&
-         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
-}]>;
+// Any build_vector that is a constant splat with a value that equals 1
+def vsplat_imm_eq_1 : ComplexPattern<vAny, 0, "selectVSplatImmEq1">;
 
 def vsplati64 : PatFrag<(ops node:$e0),
                         (v2i64 (build_vector node:$e0, node:$e0))>;
@@ -217,7 +211,7 @@ def vsplati64_splat_d : PatFrag<(ops node:$e0),
                                                                 node:$e0,
                                                                 node:$e0,
                                                                 node:$e0)),
-                                           vsplati64_imm_eq_1))))>;
+                                           (vsplat_imm_eq_1)))))>;
 
 def vsplatf32 : PatFrag<(ops node:$e0),
                         (v4f32 (build_vector node:$e0, node:$e0,
@@ -352,46 +346,35 @@ def vsplat_maskr_bits_uimm6
     : SplatComplexPattern<vsplat_uimm6, vAny, 1, "selectVSplatMaskR",
                           [build_vector, bitconvert]>;
 
-// Any build_vector that is a constant splat with a value that equals 1
-// FIXME: These should be a ComplexPattern but we can't use them because the
-//        ISel generator requires the uses to have a name, but providing a name
-//        causes other errors ("used in pattern but not operand list")
-def vsplat_imm_eq_1 : PatLeaf<(build_vector), [{
-  APInt Imm;
-  EVT EltTy = N->getValueType(0).getVectorElementType();
-
-  return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
-         Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 1;
-}]>;
 
 def vbclr_b : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>;
+                      (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>;
 def vbclr_h : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>;
+                      (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>;
 def vbclr_w : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl vsplat_imm_eq_1, node:$wt)))>;
+                      (and node:$ws, (vnot (shl (vsplat_imm_eq_1), node:$wt)))>;
 def vbclr_d : PatFrag<(ops node:$ws, node:$wt),
-                      (and node:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1),
+                      (and node:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)),
                                                node:$wt)))>;
 
 def vbneg_b : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbneg_h : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbneg_w : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (xor node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbneg_d : PatFrag<(ops node:$ws, node:$wt),
-                      (xor node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+                      (xor node:$ws, (shl (v2i64 (vsplat_imm_eq_1)),
                                           node:$wt))>;
 
 def vbset_b : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbset_h : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbset_w : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl vsplat_imm_eq_1, node:$wt))>;
+                      (or node:$ws, (shl (vsplat_imm_eq_1), node:$wt))>;
 def vbset_d : PatFrag<(ops node:$ws, node:$wt),
-                      (or node:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+                      (or node:$ws, (shl (v2i64 (vsplat_imm_eq_1)),
                                          node:$wt))>;
 
 def muladd : PatFrag<(ops node:$wd, node:$ws, node:$wt),
@@ -3842,7 +3825,7 @@ class MSAShiftPat<SDNode Node, ValueType VT, MSAInst Insn, dag Vec> :
          (VT (Insn VT:$ws, VT:$wt))>;
 
 class MSABitPat<SDNode Node, ValueType VT, MSAInst Insn, PatFrag Frag> :
-  MSAPat<(VT (Node VT:$ws, (shl vsplat_imm_eq_1, (Frag VT:$wt)))),
+  MSAPat<(VT (Node VT:$ws, (shl (vsplat_imm_eq_1), (Frag VT:$wt)))),
          (VT (Insn VT:$ws, VT:$wt))>;
 
 multiclass MSAShiftPats<SDNode Node, string Insn> {
@@ -3861,7 +3844,7 @@ multiclass MSABitPats<SDNode Node, string Insn> {
   def : MSABitPat<Node, v16i8, !cast<MSAInst>(Insn#_B), vsplati8imm7>;
   def : MSABitPat<Node, v8i16, !cast<MSAInst>(Insn#_H), vsplati16imm15>;
   def : MSABitPat<Node, v4i32, !cast<MSAInst>(Insn#_W), vsplati32imm31>;
-  def : MSAPat<(Node v2i64:$ws, (shl (v2i64 vsplati64_imm_eq_1),
+  def : MSAPat<(Node v2i64:$ws, (shl (v2i64 (vsplat_imm_eq_1)),
                                      (vsplati64imm63 v2i64:$wt))),
                (v2i64 (!cast<MSAInst>(Insn#_D) v2i64:$ws, v2i64:$wt))>;
 }
@@ -3872,16 +3855,16 @@ defm : MSAShiftPats<sra, "SRA">;
 defm : MSABitPats<xor, "BNEG">;
 defm : MSABitPats<or, "BSET">;
 
-def : MSAPat<(and v16i8:$ws, (vnot (shl vsplat_imm_eq_1,
+def : MSAPat<(and v16i8:$ws, (vnot (shl (vsplat_imm_eq_1),
                                         (vsplati8imm7 v16i8:$wt)))),
              (v16i8 (BCLR_B v16i8:$ws, v16i8:$wt))>;
-def : MSAPat<(and v8i16:$ws, (vnot (shl vsplat_imm_eq_1,
+def : MSAPat<(and v8i16:$ws, (vnot (shl (vsplat_imm_eq_1),
                                         (vsplati16imm15 v8i16:$wt)))),
              (v8i16 (BCLR_H v8i16:$ws, v8i16:$wt))>;
-def : MSAPat<(and v4i32:$ws, (vnot (shl vsplat_imm_eq_1,
+def : MSAPat<(and v4i32:$ws, (vnot (shl (vsplat_imm_eq_1),
                                         (vsplati32imm31 v4i32:$wt)))),
              (v4i32 (BCLR_W v4i32:$ws, v4i32:$wt))>;
-def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 vsplati64_imm_eq_1),
+def : MSAPat<(and v2i64:$ws, (vnot (shl (v2i64 (vsplat_imm_eq_1)),
                                         (vsplati64imm63 v2i64:$wt)))),
              (v2i64 (BCLR_D v2i64:$ws, v2i64:$wt))>;
 
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index 7ad300c6cccd4..66c034a889c60 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -730,6 +730,18 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
   return false;
 }
 
+// Select const vector splat of 1.
+bool MipsSEDAGToDAGISel::selectVSplatImmEq1(SDValue N) const {
+  APInt ImmValue;
+  EVT EltTy = N->getValueType(0).getVectorElementType();
+
+  if (N->getOpcode() == ISD::BITCAST)
+    N = N->getOperand(0);
+
+  return selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
+         ImmValue.getBitWidth() == EltTy.getSizeInBits() && ImmValue == 1;
+}
+
 bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
   unsigned Opcode = Node->getOpcode();
   SDLoc DL(Node);
diff --git a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 7b843b0e0b255..22d8e924ac534 100644
--- a/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -124,6 +124,9 @@ class MipsSEDAGToDAGISel : public MipsDAGToDAGISel {
   /// starting at bit zero.
   bool selectVSplatMaskR(SDValue N, SDValue &Imm) const override;
 
+  /// Select constant vector splats whose value is 1.
+  bool selectVSplatImmEq1(SDValue N) const override;
+
   bool trySelect(SDNode *Node) override;
 
   // Emits proper ABI for _mcount profiling calls.

From 3d12f45e50b68ac908ef05571e5cc52f4b966d94 Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 19 Nov 2024 21:24:40 +0800
Subject: [PATCH 06/38] [SDAG][ISel][TableGen][LoongArch] Report error for
 trivial bitcasts when there are predicate calls (#116075)

On loongarch64 with lsx extension, we select `VBITREV_W` for `v4i32 (xor
X, (shl splat(1), Y))`:

https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td#L1583-L1584

And `vsplat_imm_eq_1` is defined as:

https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td#L77-L87

For the `(bitconvert (v4i32 (build_vector)))` case, the pattern is
expected to be:
```
PATTERN: (xor:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, (shl:{ *:[v4i32] } (bitconvert:{ *:[v4i32] } (build_vector:{ *:[v4i32] }))<<P:Predicate_vsplat_imm_eq_1>>, v4i32:{ *:[v4i32] }:$vk))
RESULT:  (VBITREV_W:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, v4i32:{ *:[v4i32] }:$vk)
```

However, `simplifyTree` drops the `bitconvert` node and its predicates:

https://github.com/llvm/llvm-project/blob/8e6630391699116641cf390a10476295b7d4b95c/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp#L3036-L3062

Then llvm will match `vsplat_imm_eq_1` for any v4i32 splats and cause a
miscompilation:
```
PATTERN: (xor:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, (shl:{ *:[v4i32] } (build_vector:{ *:[v4i32] }), v4i32:{ *:[v4i32] }:$vk))
RESULT:  (VBITREV_W:{ *:[v4i32] } v4i32:{ *:[v4i32] }:$vj, v4i32:{ *:[v4i32] }:$vk)
```

This patch adds additional checks for predicates associated with the
trivial bitconvert node. Unused patterns in the LoongArch target are
also removed.

Fixes https://github.com/llvm/llvm-project/issues/116008.

(cherry picked from commit c727b48287cc96888f9e262f23d53cf635cf3b3d)
---
 .../Target/LoongArch/LoongArchLSXInstrInfo.td   |  6 ++----
 llvm/test/CodeGen/LoongArch/lsx/pr116008.ll     | 17 +++++++++++++++++
 .../TableGen/Common/CodeGenDAGPatterns.cpp      |  8 ++++++++
 3 files changed, 27 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/LoongArch/lsx/pr116008.ll

diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 0580683c3ce30..0233baecf6dd9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -67,8 +67,7 @@ class VecCond<SDPatternOperator OpNode, ValueType TyNode,
   let usesCustomInserter = 1;
 }
 
-def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector),
-                                       (bitconvert (v4i32 (build_vector)))], [{
+def vsplat_imm_eq_1 : PatFrags<(ops), [(build_vector)], [{
   APInt Imm;
   EVT EltTy = N->getValueType(0).getVectorElementType();
 
@@ -109,8 +108,7 @@ def vsplati32_imm_eq_31 : PatFrags<(ops), [(build_vector)], [{
   return selectVSplat(N, Imm, EltTy.getSizeInBits()) &&
          Imm.getBitWidth() == EltTy.getSizeInBits() && Imm == 31;
 }]>;
-def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector),
-                                           (bitconvert (v4i32 (build_vector)))], [{
+def vsplati64_imm_eq_63 : PatFrags<(ops), [(build_vector)], [{
   APInt Imm;
   EVT EltTy = N->getValueType(0).getVectorElementType();
 
diff --git a/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll
new file mode 100644
index 0000000000000..ba8ffc3493189
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/pr116008.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define <4 x i32> @xor_shl_splat_vec_one(i32 %x, <4 x i32> %y) nounwind {
+; CHECK-LABEL: xor_shl_splat_vec_one:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vreplgr2vr.w $vr1, $a0
+; CHECK-NEXT:    vsll.w $vr0, $vr1, $vr0
+; CHECK-NEXT:    vbitrevi.w $vr0, $vr0, 0
+; CHECK-NEXT:    ret
+entry:
+  %ins = insertelement <4 x i32> poison, i32 %x, i64 0
+  %splat = shufflevector <4 x i32> %ins, <4 x i32> poison, <4 x i32> zeroinitializer
+  %shl = shl <4 x i32> %splat, %y
+  %xor = xor <4 x i32> %shl, splat (i32 1)
+  ret <4 x i32> %xor
+}
diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
index a8cecca0d4a54..ca71569008d5e 100644
--- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
+++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp
@@ -3042,6 +3042,14 @@ static bool SimplifyTree(TreePatternNodePtr &N) {
       !N->getExtType(0).empty() &&
       N->getExtType(0) == N->getChild(0).getExtType(0) &&
       N->getName().empty()) {
+    if (!N->getPredicateCalls().empty()) {
+      std::string Str;
+      raw_string_ostream OS(Str);
+      OS << *N
+         << "\n trivial bitconvert node should not have predicate calls\n";
+      PrintFatalError(Str);
+      return false;
+    }
     N = N->getChildShared(0);
     SimplifyTree(N);
     return true;

From f9ae37c670d4bcf4713278ac94d2c8991a326f9e Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 19 Nov 2024 22:17:24 +0800
Subject: [PATCH 07/38] [InstCombine] Handle constant GEP expr in
 `SimplifyDemandedUseBits` (#116794)

Closes https://github.com/llvm/llvm-project/issues/116775.

(cherry picked from commit 03d8831fa8ef5b7e32172c718b550a454645faea)
---
 .../InstCombine/InstCombineSimplifyDemanded.cpp     |  2 +-
 llvm/test/Transforms/InstCombine/ptrmask.ll         | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 8a6ec3076ac62..b9d06b5936850 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1004,7 +1004,7 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Instruction *I,
             uint64_t MaskedGEPIndex = HighBitsGEPIndex | MaskedLowBitsGEPIndex;
 
             if (MaskedGEPIndex != GEPIndex) {
-              auto *GEP = cast<GetElementPtrInst>(II->getArgOperand(0));
+              auto *GEP = cast<GEPOperator>(II->getArgOperand(0));
               Builder.SetInsertPoint(I);
               Type *GEPIndexType =
                   DL.getIndexType(GEP->getPointerOperand()->getType());
diff --git a/llvm/test/Transforms/InstCombine/ptrmask.ll b/llvm/test/Transforms/InstCombine/ptrmask.ll
index 4631b81cd1ce1..cd998bac3f9f0 100644
--- a/llvm/test/Transforms/InstCombine/ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/ptrmask.ll
@@ -578,3 +578,16 @@ define ptr @ptrmask_is_useless_fail1(i64 %i, i64 %m) {
   %r = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m0)
   ret ptr %r
 }
+
+@GC_arrays = external global { i8, i8, i64 }
+
+define ptr @ptrmask_demandedbits_constantexpr() {
+; CHECK-LABEL: define ptr @ptrmask_demandedbits_constantexpr() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ALIGNED_RESULT:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr nonnull @GC_arrays, i64 -8)
+; CHECK-NEXT:    ret ptr [[ALIGNED_RESULT]]
+;
+entry:
+  %aligned_result = call ptr @llvm.ptrmask.p0.i64(ptr getelementptr inbounds (i8, ptr @GC_arrays, i64 1), i64 -8)
+  ret ptr %aligned_result
+}

From e80925b5eb4c824fe97a055d49faa586de16c2b9 Mon Sep 17 00:00:00 2001
From: Alexey Karyakin <akaryaki@quicinc.com>
Date: Tue, 19 Nov 2024 09:27:01 -0600
Subject: [PATCH 08/38] [lld][Hexagon] Fix R_HEX_B22_PCREL range checks
 (#115925)

Range checks for R_HEX_B22_PCREL did not account for the fact that
offset is measured in instructions, not bytes.

Add a test for all range-checked relocations.
---
 lld/ELF/Arch/Hexagon.cpp          |  2 +-
 lld/test/ELF/hexagon-jump-error.s |  2 +-
 lld/test/ELF/hexagon.s            | 41 ++++++++++++++++++++++++++++++-
 3 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/lld/ELF/Arch/Hexagon.cpp b/lld/ELF/Arch/Hexagon.cpp
index 56cf96fd17704..8bcd28309f8b3 100644
--- a/lld/ELF/Arch/Hexagon.cpp
+++ b/lld/ELF/Arch/Hexagon.cpp
@@ -329,7 +329,7 @@ void Hexagon::relocate(uint8_t *loc, const Relocation &rel,
   case R_HEX_B22_PCREL:
   case R_HEX_GD_PLT_B22_PCREL:
   case R_HEX_PLT_B22_PCREL:
-    checkInt(loc, val, 22, rel);
+    checkInt(loc, val, 24, rel);
     or32le(loc, applyMask(0x1ff3ffe, val >> 2));
     break;
   case R_HEX_B22_PCREL_X:
diff --git a/lld/test/ELF/hexagon-jump-error.s b/lld/test/ELF/hexagon-jump-error.s
index fec873827e573..53860b5daf2b1 100644
--- a/lld/test/ELF/hexagon-jump-error.s
+++ b/lld/test/ELF/hexagon-jump-error.s
@@ -25,7 +25,7 @@ if (p0) jump #1f
 .section b15, "ax"
 1:
 
-# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-2097152, 2097151]
+# CHECK: relocation R_HEX_B22_PCREL out of range: 8388612 is not in [-8388608, 8388607]
 jump #1f
 .space (1<<23)
 .section b22, "ax"
diff --git a/lld/test/ELF/hexagon.s b/lld/test/ELF/hexagon.s
index 8ef9b8eead8f1..b1576fb47d81a 100644
--- a/lld/test/ELF/hexagon.s
+++ b/lld/test/ELF/hexagon.s
@@ -1,7 +1,9 @@
 # REQUIRES: hexagon
 # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %s -o %t.o
 # RUN: llvm-mc -filetype=obj -triple=hexagon-unknown-elf %S/Inputs/hexagon.s -o %t1.o
-# RUN: ld.lld %t.o %t1.o -o %t
+# RUN: ld.lld %t.o %t1.o -o %t --Ttext=0x200b4 --section-start=b_1000000=0x1000000 \
+# RUN:  --section-start=b_1000400=0x1000400 --section-start=b_1004000=0x1004000 \
+# RUN:  --section-start=b_1010000=0x1010000 --section-start=b_1800000=0x1800000
 # RUN: llvm-objdump --no-print-imm-hex -d %t | FileCheck %s
 
 # Note: 131584 == 0x20200
@@ -221,3 +223,40 @@ r0 = memw(r1+##_start)
 
 memw(r0+##_start) = r1
 # CHECK: memw(r0+##131644) = r1
+
+
+## Tests for maximum branch ranges reachable without trampolines.
+
+.section b_1000000, "ax"
+## The nop makes sure the first jump is within range.
+nop
+{ r0 = #0; jump #b_1000400 } // R_HEX_B9_PCREL
+if (r0==#0) jump:t #b_1004000 // R_HEX_B13_PCREL
+if (p0) jump #b_1010000 // R_HEX_B15_PCREL
+jump #b_1800000 // R_HEX_B22_PCREL
+
+.section b_1000400, "ax"
+nop
+
+.section b_1004000, "ax"
+nop
+
+.section b_1010000, "ax"
+nop
+
+.section b_1800000, "ax"
+nop
+
+## Make sure we got the right relocations.
+# RUN: llvm-readelf -r %t.o | FileCheck %s --check-prefix=REL
+# REL: R_HEX_B9_PCREL         00000000   b_1000400
+# REL: R_HEX_B13_PCREL        00000000   b_1004000
+# REL: R_HEX_B15_PCREL        00000000   b_1010000
+# REL: R_HEX_B22_PCREL        00000000   b_1800000
+
+# CHECK: 01000000 <b_1000000>:
+# CHECK-NEXT: 1000000: {{.*}} {  nop }
+# CHECK-NEXT: 1000004: {{.*}} {  r0 = #0 ; jump 0x1000400 }
+# CHECK-NEXT: 1000008: {{.*}} {  if (r0==#0) jump:t 0x1004000 }
+# CHECK-NEXT: 100000c: {{.*}} {  if (p0) jump:nt 0x1010000 }
+# CHECK-NEXT: 1000010: {{.*}} {  jump 0x1800000 }

From 5bd0474d1c45d58e472f25bf8292570ac78b5c15 Mon Sep 17 00:00:00 2001
From: DianQK <dianqk@dianqk.net>
Date: Wed, 20 Nov 2024 19:52:51 +0800
Subject: [PATCH 09/38] [LICM] allow MemoryAccess creation failure (#116813)

Fixes #116809.

After running some passes (SimpleLoopUnswitch, LoopInstSimplify, etc.),
MemorySSA might be outdated, and the instruction `I` may have become a
non-memory touching instruction.

LICM has already handled this, but it does not pass
`CreationMustSucceed=false` to `createDefinedAccess`.

(cherry picked from commit 18b02bbf441660683df7f3925946984203d49bab)
---
 llvm/include/llvm/Analysis/MemorySSAUpdater.h |  5 ++
 llvm/lib/Analysis/MemorySSAUpdater.cpp        | 13 ++++-
 llvm/lib/Transforms/Scalar/LICM.cpp           |  5 +-
 .../LICM/PR116813-memoryssa-outdated.ll       | 50 +++++++++++++++++++
 4 files changed, 70 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll

diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index d4da3ef1146db..f598dedea75fd 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -192,6 +192,11 @@ class MemorySSAUpdater {
                                        const BasicBlock *BB,
                                        MemorySSA::InsertionPlace Point);
 
+  MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition,
+                                       const BasicBlock *BB,
+                                       MemorySSA::InsertionPlace Point,
+                                       bool CreationMustSucceed);
+
   /// Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
   ///
   /// See createMemoryAccessInBB() for usage details.
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index aa550f0b6a7bf..94061c949b7f8 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -1404,8 +1404,17 @@ void MemorySSAUpdater::changeToUnreachable(const Instruction *I) {
 MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
     Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
     MemorySSA::InsertionPlace Point) {
-  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(I, Definition);
-  MSSA->insertIntoListsForBlock(NewAccess, BB, Point);
+  return createMemoryAccessInBB(I, Definition, BB, Point,
+                                /*CreationMustSucceed=*/true);
+}
+
+MemoryAccess *MemorySSAUpdater::createMemoryAccessInBB(
+    Instruction *I, MemoryAccess *Definition, const BasicBlock *BB,
+    MemorySSA::InsertionPlace Point, bool CreationMustSucceed) {
+  MemoryUseOrDef *NewAccess = MSSA->createDefinedAccess(
+      I, Definition, /*Template=*/nullptr, CreationMustSucceed);
+  if (NewAccess)
+    MSSA->insertIntoListsForBlock(NewAccess, BB, Point);
   return NewAccess;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 91ef2b4b7c183..ca03eff7a4e25 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -1464,8 +1464,11 @@ static Instruction *cloneInstructionInExitBlock(
 
   if (MSSAU.getMemorySSA()->getMemoryAccess(&I)) {
     // Create a new MemoryAccess and let MemorySSA set its defining access.
+    // After running some passes, MemorySSA might be outdated, and the
+    // instruction `I` may have become a non-memory touching instruction.
     MemoryAccess *NewMemAcc = MSSAU.createMemoryAccessInBB(
-        New, nullptr, New->getParent(), MemorySSA::Beginning);
+        New, nullptr, New->getParent(), MemorySSA::Beginning,
+        /*CreationMustSucceed=*/false);
     if (NewMemAcc) {
       if (auto *MemDef = dyn_cast<MemoryDef>(NewMemAcc))
         MSSAU.insertDef(MemDef, /*RenameUses=*/true);
diff --git a/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll
new file mode 100644
index 0000000000000..a040c3cc6947c
--- /dev/null
+++ b/llvm/test/Transforms/LICM/PR116813-memoryssa-outdated.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes='loop-mssa(simple-loop-unswitch<nontrivial>,licm)' -verify-memoryssa -S < %s | FileCheck %s
+
+; Check that running LICM after SimpleLoopUnswitch does not result in a crash.
+
+define i32 @foo(i1 %arg, ptr %arg1) {
+; CHECK-LABEL: define i32 @foo(
+; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[ARG_FR:%.*]] = freeze i1 [[ARG]]
+; CHECK-NEXT:    br i1 [[ARG_FR]], label %[[START_SPLIT_US:.*]], label %[[START_SPLIT:.*]]
+; CHECK:       [[START_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[LOOP_US:.*]]
+; CHECK:       [[LOOP_US]]:
+; CHECK-NEXT:    br label %[[BB0:.*]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[UNSWITCHED_SELECT_US:%.*]] = phi ptr [ [[ARG1]], %[[BB0]] ]
+; CHECK-NEXT:    [[I3_US:%.*]] = call i32 [[UNSWITCHED_SELECT_US]]()
+; CHECK-NEXT:    br i1 true, label %[[LOOP_US]], label %[[RET_SPLIT_US:.*]]
+; CHECK:       [[RET_SPLIT_US]]:
+; CHECK-NEXT:    [[I3_LCSSA_US:%.*]] = phi i32 [ [[I3_US]], %[[BB1]] ]
+; CHECK-NEXT:    br label %[[RET:.*]]
+; CHECK:       [[START_SPLIT]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    br label %[[BB2:.*]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    br i1 false, label %[[LOOP]], label %[[RET_SPLIT:.*]]
+; CHECK:       [[RET_SPLIT]]:
+; CHECK-NEXT:    [[I3_LE:%.*]] = call i32 @bar()
+; CHECK-NEXT:    br label %[[RET]]
+; CHECK:       [[RET]]:
+; CHECK-NEXT:    [[DOTUS_PHI:%.*]] = phi i32 [ [[I3_LE]], %[[RET_SPLIT]] ], [ [[I3_LCSSA_US]], %[[RET_SPLIT_US]] ]
+; CHECK-NEXT:    ret i32 [[DOTUS_PHI]]
+;
+start:
+  br label %loop
+
+loop:                                              ; preds = %loop, %bb
+  %i = select i1 %arg, ptr %arg1, ptr @bar
+  %i3 = call i32 %i()
+  br i1 %arg, label %loop, label %ret
+
+ret:                                              ; preds = %loop
+  ret i32 %i3
+}
+
+declare i32 @bar() nounwind willreturn memory(none)

From 336f87753b510aed840daf87f8d3a4996e6c8f15 Mon Sep 17 00:00:00 2001
From: wanglei <wanglei@loongson.cn>
Date: Thu, 21 Nov 2024 09:31:12 +0800
Subject: [PATCH 10/38] [LoongArch] Fix GOT usage for `non-dso_local` function
 calls in large code model

This commit fixes an issue in the large code model where non-dso_local
function calls did not use the GOT as expected in PIC mode. Instead,
direct PC-relative access was incorrectly applied, leading to linker
errors when building shared libraries.

For `ExternalSymbol`, it is not possible to determine whether it is
dso_local during pseudo-instruction expansion. We use target flags to
differentiate whether GOT should be used.

Cherry-picked from #117099, used for fix linker errors when bulding
shared libraries with large code model.
---
 .../LoongArch/LoongArchExpandPseudoInsts.cpp  |  2 +-
 llvm/test/CodeGen/LoongArch/code-models.ll    | 10 ++---
 .../LoongArch/machinelicm-address-pseudos.ll  | 20 +++++-----
 .../LoongArch/psabi-restricted-scheduling.ll  | 40 +++++++++----------
 llvm/test/CodeGen/LoongArch/tls-models.ll     | 20 +++++-----
 5 files changed, 46 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
index c136f5b3e515d..e680dda7374d0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchExpandPseudoInsts.cpp
@@ -721,7 +721,7 @@ bool LoongArchExpandPseudo::expandFunctionCALL(
         IsTailCall ? LoongArch::PseudoJIRL_TAIL : LoongArch::PseudoJIRL_CALL;
     Register AddrReg = IsTailCall ? LoongArch::R19 : LoongArch::R1;
 
-    bool UseGOT = Func.isGlobal() && !Func.getGlobal()->isDSOLocal();
+    bool UseGOT = Func.getTargetFlags() == LoongArchII::MO_CALL_PLT;
     unsigned MO = UseGOT ? LoongArchII::MO_GOT_PC_HI : LoongArchII::MO_PCREL_LO;
     unsigned LAOpcode = UseGOT ? LoongArch::LDX_D : LoongArch::ADD_D;
     expandLargeAddressLoad(MBB, MBBI, NextMBBI, LAOpcode, MO, Func, AddrReg,
diff --git a/llvm/test/CodeGen/LoongArch/code-models.ll b/llvm/test/CodeGen/LoongArch/code-models.ll
index 4b2b72afaee17..4eb1e5e596fd3 100644
--- a/llvm/test/CodeGen/LoongArch/code-models.ll
+++ b/llvm/test/CodeGen/LoongArch/code-models.ll
@@ -82,11 +82,11 @@ define void @call_external_sym(ptr %dst) {
 ; LARGE-NEXT:    .cfi_offset 1, -8
 ; LARGE-NEXT:    ori $a2, $zero, 1000
 ; LARGE-NEXT:    move $a1, $zero
-; LARGE-NEXT:    pcalau12i $ra, %pc_hi20(memset)
-; LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(memset)
-; LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(memset)
-; LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(memset)
-; LARGE-NEXT:    add.d $ra, $t8, $ra
+; LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(memset)
+; LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(memset)
+; LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(memset)
+; LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(memset)
+; LARGE-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE-NEXT:    jirl $ra, $ra, 0
 ; LARGE-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LARGE-NEXT:    addi.d $sp, $sp, 16
diff --git a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
index ed1a24e82b4e4..29348fe0d641e 100644
--- a/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
+++ b/llvm/test/CodeGen/LoongArch/machinelicm-address-pseudos.ll
@@ -282,11 +282,11 @@ define void @test_la_tls_ld(i32 signext %n) {
 ; LA64LARGE-NEXT:  .LBB3_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64LARGE-NEXT:    move $a0, $s0
-; LA64LARGE-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGE-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGE-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGE-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGE-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGE-NEXT:    addi.w $s1, $s1, 1
@@ -448,11 +448,11 @@ define void @test_la_tls_gd(i32 signext %n) nounwind {
 ; LA64LARGE-NEXT:  .LBB5_1: # %loop
 ; LA64LARGE-NEXT:    # =>This Inner Loop Header: Depth=1
 ; LA64LARGE-NEXT:    move $a0, $s0
-; LA64LARGE-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGE-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGE-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGE-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGE-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGE-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGE-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGE-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGE-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGE-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGE-NEXT:    ld.w $zero, $a0, 0
 ; LA64LARGE-NEXT:    addi.w $s1, $s1, 1
diff --git a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
index 6a15d3a9cda30..75f494f32e476 100644
--- a/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
+++ b/llvm/test/CodeGen/LoongArch/psabi-restricted-scheduling.ll
@@ -105,11 +105,11 @@ define void @foo() nounwind {
 ; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
 ; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
 ; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_NO_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
@@ -117,11 +117,11 @@ define void @foo() nounwind {
 ; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
 ; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
 ; LARGE_NO_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_NO_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_NO_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_NO_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_NO_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_NO_SCH-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
 ; LARGE_NO_SCH-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
@@ -162,11 +162,11 @@ define void @foo() nounwind {
 ; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(gd)
 ; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(gd)
 ; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_SCH-NEXT:    ld.d $zero, $a0, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a0, %ld_pc_hi20(ld)
@@ -174,11 +174,11 @@ define void @foo() nounwind {
 ; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
 ; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
 ; LARGE_SCH-NEXT:    add.d $a0, $t8, $a0
-; LARGE_SCH-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LARGE_SCH-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LARGE_SCH-NEXT:    add.d $ra, $t8, $ra
+; LARGE_SCH-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LARGE_SCH-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LARGE_SCH-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LARGE_SCH-NEXT:    ldx.d $ra, $t8, $ra
 ; LARGE_SCH-NEXT:    jirl $ra, $ra, 0
 ; LARGE_SCH-NEXT:    pcalau12i $a1, %ie_pc_hi20(ie)
 ; LARGE_SCH-NEXT:    addi.d $t8, $zero, %ie_pc_lo12(ie)
diff --git a/llvm/test/CodeGen/LoongArch/tls-models.ll b/llvm/test/CodeGen/LoongArch/tls-models.ll
index bb89794d1c843..04600ffeb37ee 100644
--- a/llvm/test/CodeGen/LoongArch/tls-models.ll
+++ b/llvm/test/CodeGen/LoongArch/tls-models.ll
@@ -55,11 +55,11 @@ define ptr @f1() nounwind {
 ; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(unspecified)
 ; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(unspecified)
 ; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGEPIC-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGEPIC-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGEPIC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, 16
@@ -169,11 +169,11 @@ define ptr @f2() nounwind {
 ; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(ld)
 ; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(ld)
 ; LA64LARGEPIC-NEXT:    add.d $a0, $t8, $a0
-; LA64LARGEPIC-NEXT:    pcalau12i $ra, %pc_hi20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %pc_lo12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu32i.d $t8, %pc64_lo20(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %pc64_hi12(__tls_get_addr)
-; LA64LARGEPIC-NEXT:    add.d $ra, $t8, $ra
+; LA64LARGEPIC-NEXT:    pcalau12i $ra, %got_pc_hi20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    addi.d $t8, $zero, %got_pc_lo12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu32i.d $t8, %got64_pc_lo20(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    lu52i.d $t8, $t8, %got64_pc_hi12(__tls_get_addr)
+; LA64LARGEPIC-NEXT:    ldx.d $ra, $t8, $ra
 ; LA64LARGEPIC-NEXT:    jirl $ra, $ra, 0
 ; LA64LARGEPIC-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
 ; LA64LARGEPIC-NEXT:    addi.d $sp, $sp, 16

From 11be11b8773bf63abe2c8da72db3ee7c25af524b Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Thu, 21 Nov 2024 17:23:04 +0800
Subject: [PATCH 11/38] [SCEV] Fix sext handling for `getConstantMultiple`
 (#117093)

Counterexample: 219 is a multiple of 73. But `sext i8 219 to i16 =
65499` is not.
Fixes https://github.com/llvm/llvm-project/issues/116483.

(cherry picked from commit 458dfbd855806461b4508bf8845cafe0411dbfd4)
---
 llvm/lib/Analysis/ScalarEvolution.cpp         |  4 ++-
 .../test/Analysis/ScalarEvolution/pr116483.ll | 26 ++++++++++++++
 .../Transforms/IndVarSimplify/pr116483.ll     | 36 +++++++++++++++++++
 3 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/pr116483.ll
 create mode 100644 llvm/test/Transforms/IndVarSimplify/pr116483.ll

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 51cffac808768..412cfe73d3e55 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6313,8 +6313,10 @@ APInt ScalarEvolution::getConstantMultipleImpl(const SCEV *S) {
     return getConstantMultiple(Z->getOperand()).zext(BitWidth);
   }
   case scSignExtend: {
+    // Only multiples that are a power of 2 will hold after sext.
     const SCEVSignExtendExpr *E = cast<SCEVSignExtendExpr>(S);
-    return getConstantMultiple(E->getOperand()).sext(BitWidth);
+    uint32_t TZ = getMinTrailingZeros(E->getOperand());
+    return GetShiftedByZeros(TZ);
   }
   case scMulExpr: {
     const SCEVMulExpr *M = cast<SCEVMulExpr>(S);
diff --git a/llvm/test/Analysis/ScalarEvolution/pr116483.ll b/llvm/test/Analysis/ScalarEvolution/pr116483.ll
new file mode 100644
index 0000000000000..cc2334e9c64f9
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/pr116483.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
+
+define i16 @test() {
+; CHECK-LABEL: 'test'
+; CHECK-NEXT:  Classifying expressions for: @test
+; CHECK-NEXT:    %xor = xor i32 0, 3
+; CHECK-NEXT:    --> %xor U: [3,4) S: [3,4)
+; CHECK-NEXT:    %mul = mul i32 %xor, 329
+; CHECK-NEXT:    --> (329 * %xor)<nuw><nsw> U: [987,988) S: [987,988)
+; CHECK-NEXT:    %conv = trunc i32 %mul to i16
+; CHECK-NEXT:    --> (329 * (trunc i32 %xor to i16))<nuw><nsw> U: [987,988) S: [987,988)
+; CHECK-NEXT:    %sext = shl i16 %conv, 8
+; CHECK-NEXT:    --> (18688 * (trunc i32 %xor to i16))<nuw> U: [-9472,-9471) S: [-9472,-9471)
+; CHECK-NEXT:    %conv1 = ashr i16 %sext, 8
+; CHECK-NEXT:    --> (sext i8 (73 * (trunc i32 %xor to i8))<nuw> to i16) U: [-37,-36) S: [-37,-36)
+; CHECK-NEXT:  Determining loop execution counts for: @test
+;
+entry:
+  %xor = xor i32 0, 3
+  %mul = mul i32 %xor, 329
+  %conv = trunc i32 %mul to i16
+  %sext = shl i16 %conv, 8
+  %conv1 = ashr i16 %sext, 8
+  ret i16 %conv1
+}
diff --git a/llvm/test/Transforms/IndVarSimplify/pr116483.ll b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
new file mode 100644
index 0000000000000..ae108a525223e
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/pr116483.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+define i32 @test() {
+; CHECK-LABEL: define i32 @test() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 0, 3
+; CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[XOR]], 329
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[MUL]] to i16
+; CHECK-NEXT:    [[SEXT:%.*]] = shl i16 [[CONV]], 8
+; CHECK-NEXT:    [[CONV1:%.*]] = ashr i16 [[SEXT]], 8
+; CHECK-NEXT:    br label %[[LOOP_BODY:.*]]
+; CHECK:       [[LOOP_BODY]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT:.*]], label %[[LOOP_BODY]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i16 [[CONV1]] to i32
+; CHECK-NEXT:    ret i32 [[CONV3]]
+;
+entry:
+  %xor = xor i32 0, 3
+  %mul = mul i32 %xor, 329
+  %conv = trunc i32 %mul to i16
+  %sext = shl i16 %conv, 8
+  %conv1 = ashr i16 %sext, 8
+  %conv3 = zext i16 %conv1 to i32
+  br label %loop.body
+
+loop.body:
+  %indvar = phi i32 [ %indvar.inc, %loop.body ], [ 1, %entry ]
+  %indvar.inc = add nuw i32 %indvar, 1
+  %exitcond = icmp eq i32 %indvar, %conv3
+  br i1 %exitcond, label %exit, label %loop.body
+
+exit:
+  ret i32 %conv3
+}

From 7e2da7d262380d5ebaf25dbadd7ad2440f6ce21f Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Mon, 18 Nov 2024 23:41:04 +0800
Subject: [PATCH 12/38] [ConstraintElim] Bail out on non-dedicated exits when
 adding exiting conditions (#116627)

This patch bails out non-dedicated exits to avoid adding exiting
conditions to invalid context.
Closes https://github.com/llvm/llvm-project/issues/116553.

(cherry picked from commit 52361d0368b79841be12156bf03cf8c1851e5df7)
---
 .../Scalar/ConstraintElimination.cpp          | 13 +++---
 .../induction-condition-in-loop-exit.ll       | 44 +++++++++++++++++++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
index 37022104d0a9b..d1c80aa671243 100644
--- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp
@@ -1033,9 +1033,9 @@ void State::addInfoForInductions(BasicBlock &BB) {
       DTN, CmpInst::ICMP_SLT, PN, B,
       ConditionTy(CmpInst::ICMP_SLE, StartValue, B)));
 
-  // Try to add condition from header to the exit blocks. When exiting either
-  // with EQ or NE in the header, we know that the induction value must be u<=
-  // B, as other exits may only exit earlier.
+  // Try to add condition from header to the dedicated exit blocks. When exiting
+  // either with EQ or NE in the header, we know that the induction value must
+  // be u<= B, as other exits may only exit earlier.
   assert(!StepOffset.isNegative() && "induction must be increasing");
   assert((Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) &&
          "unsupported predicate");
@@ -1043,8 +1043,11 @@ void State::addInfoForInductions(BasicBlock &BB) {
   SmallVector<BasicBlock *> ExitBBs;
   L->getExitBlocks(ExitBBs);
   for (BasicBlock *EB : ExitBBs) {
-    WorkList.emplace_back(FactOrCheck::getConditionFact(
-        DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond));
+    // Bail out on non-dedicated exits.
+    if (DT.dominates(&BB, EB)) {
+      WorkList.emplace_back(FactOrCheck::getConditionFact(
+          DT.getNode(EB), CmpInst::ICMP_ULE, A, B, Precond));
+    }
   }
 }
 
diff --git a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
index 15e1d84372627..a04b06e1bf0a5 100644
--- a/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
+++ b/llvm/test/Transforms/ConstraintElimination/induction-condition-in-loop-exit.ll
@@ -763,3 +763,47 @@ exit.2:
   %t.2 = icmp ult i32 %iv, %N
   ret i1 %t.2
 }
+
+define i1 @test_non_dedicated_exit(i16 %n) {
+; CHECK-LABEL: define i1 @test_non_dedicated_exit(
+; CHECK-SAME: i16 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[COND:%.*]] = icmp slt i16 [[N]], 1
+; CHECK-NEXT:    br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP_PREHEADER:.*]]
+; CHECK:       [[LOOP_PREHEADER]]:
+; CHECK-NEXT:    [[SUB:%.*]] = add nsw i16 [[N]], -1
+; CHECK-NEXT:    [[EXT:%.*]] = zext nneg i16 [[SUB]] to i32
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_INC:%.*]], %[[LOOP_LATCH:.*]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVAR]], [[EXT]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; CHECK:       [[LOOP_LATCH]]:
+; CHECK-NEXT:    [[INDVAR_INC]] = add nuw nsw i32 [[INDVAR]], 1
+; CHECK-NEXT:    br label %[[LOOP]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i16 [[N]], 0
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %cond = icmp slt i16 %n, 1
+  br i1 %cond, label %exit, label %loop.preheader
+
+loop.preheader:
+  %sub = add nsw i16 %n, -1
+  %ext = zext nneg i16 %sub to i32
+  br label %loop
+
+loop:
+  %indvar = phi i32 [ %indvar.inc, %loop.latch ], [ 0, %loop.preheader ]
+  %exitcond = icmp eq i32 %indvar, %ext
+  br i1 %exitcond, label %exit, label %loop.latch
+
+loop.latch:
+  %indvar.inc = add nuw nsw i32 %indvar, 1
+  br label %loop
+
+exit:
+  %cmp = icmp sgt i16 %n, 0
+  ret i1 %cmp
+}

From 32cbe24de3f2ecb1c77899ff27bfe70bb033ecde Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 20 Nov 2024 15:10:19 +0000
Subject: [PATCH 13/38] [MachineLICM] Add test case showing load hoisted across
 memory barrier.

(cherry picked from commit a9b3ec154d7ab2d0896ac5c9f1e9a1266a37be80)
---
 .../AArch64/machine-licm-hoist-load.ll        | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index e8dafd5e8fbab..932a5af264a00 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -497,6 +497,35 @@ for.exit:                                 ; preds = %for.body
   ret i64 %spec.select
 }
 
+@a = external local_unnamed_addr global i32, align 4
+
+; FIXME: Load hoisted out of the loop across memory barriers.
+define i32 @load_between_memory_barriers() {
+; CHECK-LABEL: load_between_memory_barriers:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    adrp x8, :got:a
+; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
+; CHECK-NEXT:    ldr w0, [x8]
+; CHECK-NEXT:  .LBB8_1: // %loop
+; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    //MEMBARRIER
+; CHECK-NEXT:    //MEMBARRIER
+; CHECK-NEXT:    cbz w0, .LBB8_1
+; CHECK-NEXT:  // %bb.2: // %exit
+; CHECK-NEXT:    ret
+  br label %loop
+
+loop:
+  fence syncscope("singlethread") acq_rel
+  %l = load i32, ptr @a, align 4
+  fence syncscope("singlethread") acq_rel
+  %c = icmp eq i32 %l, 0
+  br i1 %c, label %loop, label %exit
+
+exit:
+  ret i32 %l
+}
+
 declare i32 @bcmp(ptr, ptr, i64)
 declare i32 @memcmp(ptr, ptr, i64)
 declare void @func()

From 086d8e6bb5daf8de43880ba90258c49e0fabf2c9 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 21 Nov 2024 10:25:04 +0000
Subject: [PATCH 14/38] [MachineLICM] Don't allow hoisting invariant loads
 across mem barrier. (#116987)

The improvements in 63917e1 / #70796 do not check for memory
barriers/unmodelled sideeffects, which means we may incorrectly hoist
loads across memory barriers.

Fix this by checking any machine instruction in the loop is a load-fold
barrier.

PR: https://github.com/llvm/llvm-project/pull/116987
(cherry picked from commit ef102b4a6333a304e36dc623d5381257a7ef1ed6)
---
 llvm/lib/CodeGen/MachineLICM.cpp                     | 2 +-
 llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll | 4 ++--
 llvm/test/CodeGen/Mips/lcb5.ll                       | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index f24ab187ef400..21a02a6f09478 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -1474,7 +1474,7 @@ void MachineLICMBase::InitializeLoadsHoistableLoops() {
       if (!AllowedToHoistLoads[Loop])
         continue;
       for (auto &MI : *MBB) {
-        if (!MI.mayStore() && !MI.isCall() &&
+        if (!MI.isLoadFoldBarrier() && !MI.mayStore() && !MI.isCall() &&
             !(MI.mayLoad() && MI.hasOrderedMemoryRef()))
           continue;
         for (MachineLoop *L = Loop; L != nullptr; L = L->getParentLoop())
diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
index 932a5af264a00..17f8263560430 100644
--- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
+++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll
@@ -499,16 +499,16 @@ for.exit:                                 ; preds = %for.body
 
 @a = external local_unnamed_addr global i32, align 4
 
-; FIXME: Load hoisted out of the loop across memory barriers.
+; Make sure the load is not hoisted out of the loop across memory barriers.
 define i32 @load_between_memory_barriers() {
 ; CHECK-LABEL: load_between_memory_barriers:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, :got:a
 ; CHECK-NEXT:    ldr x8, [x8, :got_lo12:a]
-; CHECK-NEXT:    ldr w0, [x8]
 ; CHECK-NEXT:  .LBB8_1: // %loop
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    //MEMBARRIER
+; CHECK-NEXT:    ldr w0, [x8]
 ; CHECK-NEXT:    //MEMBARRIER
 ; CHECK-NEXT:    cbz w0, .LBB8_1
 ; CHECK-NEXT:  // %bb.2: // %exit
diff --git a/llvm/test/CodeGen/Mips/lcb5.ll b/llvm/test/CodeGen/Mips/lcb5.ll
index f320f6fc5660c..bb059f1ee8453 100644
--- a/llvm/test/CodeGen/Mips/lcb5.ll
+++ b/llvm/test/CodeGen/Mips/lcb5.ll
@@ -186,7 +186,7 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 ; ci:	.ent	z3
-; ci:	bteqz	$BB6_3
+; ci:	bteqz	$BB6_2
 ; ci:	.end	z3
 
 ; Function Attrs: nounwind optsize
@@ -210,7 +210,7 @@ if.end:                                           ; preds = %if.then, %entry
 
 ; ci:	.ent	z4
 ; ci:	btnez	$BB7_1  # 16 bit inst
-; ci:	jal	$BB7_3	# branch
+; ci:	jal	$BB7_2	# branch
 ; ci:	nop
 ; ci: $BB7_1:
 ; ci:	.p2align	2

From 0e7e5d9bdf3c130069a1d622dc9a2b71357fc1ee Mon Sep 17 00:00:00 2001
From: Yingwei Zheng <dtcxzyw2333@gmail.com>
Date: Tue, 19 Nov 2024 20:06:34 +0800
Subject: [PATCH 15/38] [InstCombine] Drop noundef attributes in `foldCttzCtlz`
 (#116718)

Closes https://github.com/llvm/llvm-project/issues/112068.

(cherry picked from commit a59976bea8ad76f18119a11391dc8ba3e6ba07d5)
---
 .../Transforms/InstCombine/InstCombineCalls.cpp  |  4 +++-
 .../Transforms/InstCombine/shift-cttz-ctlz.ll    | 16 ++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 9d2990c98ce27..3223fccbcf49a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -506,8 +506,10 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) {
 
   // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
   if (II.hasOneUse() && match(Op1, m_Zero()) &&
-      match(II.user_back(), m_Shift(m_Value(), m_Specific(&II))))
+      match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
+    II.dropUBImplyingAttrsAndMetadata();
     return IC.replaceOperand(II, 1, IC.Builder.getTrue());
+  }
 
   Constant *C;
 
diff --git a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
index 1c381d0839071..63caec9501325 100644
--- a/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
+++ b/llvm/test/Transforms/InstCombine/shift-cttz-ctlz.ll
@@ -15,6 +15,22 @@ entry:
   ret i32 %res
 }
 
+; Make sure that noundef is dropped.
+
+define i32 @shl_cttz_false_noundef(i32 %x, i32 %y) {
+; CHECK-LABEL: define i32 @shl_cttz_false_noundef(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CTTZ:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[Y]], i1 true)
+; CHECK-NEXT:    [[RES:%.*]] = shl i32 [[X]], [[CTTZ]]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+entry:
+  %cttz = call noundef i32 @llvm.cttz.i32(i32 %y, i1 false)
+  %res = shl i32 %x, %cttz
+  ret i32 %res
+}
+
 define i32 @shl_ctlz_false(i32 %x, i32 %y) {
 ; CHECK-LABEL: define i32 @shl_ctlz_false(
 ; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) {

From edded5af5494adfc53187719fa3f3b0be7a4a20e Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Fri, 18 Oct 2024 13:44:57 -0700
Subject: [PATCH 16/38] [SLP][NFC]Add a test with the incorrect casting of the
 abs argument, NFC

(cherry picked from commit 825f9cb1b31aa91d23eba803003897490de74a20)
---
 .../abs-overflow-incorrect-minbws.ll          | 31 +++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
new file mode 100644
index 0000000000000..a936b076138d0
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=slp-vectorizer < %s | FileCheck %s
+
+define i32 @test(i32 %n) {
+; CHECK-LABEL: define i32 @test(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 273837369, i32 273837369>
+; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false)
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
+; CHECK-NEXT:    [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    ret i32 [[RES1]]
+;
+entry:
+  %n1 = add i32 %n, 1
+  %zn1 = zext nneg i32 %n1 to i64
+  %m1 = mul nuw nsw i64 %zn1, 273837369
+  %a1 = call i64 @llvm.abs.i64(i64 %m1, i1 true)
+  %t1 = trunc i64 %a1 to i32
+  %n2 = add i32 %n, 2
+  %zn2 = zext nneg i32 %n2 to i64
+  %m2 = mul nuw nsw i64 %zn2, 273837369
+  %a2 = call i64 @llvm.abs.i64(i64 %m2, i1 true)
+  %t2 = trunc i64 %a2 to i32
+  %res1 = add i32 %t1, %t2
+  ret i32 %res1
+}

From 9f72c9837c553063ab0cbacc1a472a73c0ec2a4b Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@outlook.com>
Date: Fri, 18 Oct 2024 13:54:30 -0700
Subject: [PATCH 17/38] [SLP]Check that operand of abs does not overflow before
 making it part of minbitwidth transformation

Need to check that the operand of the abs intrinsic can be safely
truncated before making it part of the minbitwidth transformation.

Fixes #112577

(cherry picked from commit 709abacdc350d63c61888607edb28ce272daa0a0)
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp  | 16 ++++++++++++++++
 .../abs-overflow-incorrect-minbws.ll             |  6 ++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index ab2b96cdc42db..746ba51a981fe 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -15440,9 +15440,25 @@ bool BoUpSLP::collectValuesToDemote(
                 MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)));
       });
     };
+    auto AbsChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
+      assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
+      return all_of(E.Scalars, [&](Value *V) {
+        auto *I = cast<Instruction>(V);
+        unsigned SignBits = OrigBitWidth - BitWidth;
+        APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth - 1);
+        unsigned Op0SignBits =
+            ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT);
+        return SignBits <= Op0SignBits &&
+               ((SignBits != Op0SignBits &&
+                 !isKnownNonNegative(I->getOperand(0), SimplifyQuery(*DL))) ||
+                MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)));
+      });
+    };
     if (ID != Intrinsic::abs) {
       Operands.push_back(getOperandEntry(&E, 1));
       CallChecker = CompChecker;
+    } else {
+      CallChecker = AbsChecker;
     }
     InstructionCost BestCost =
         std::numeric_limits<InstructionCost::CostType>::max();
diff --git a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
index a936b076138d0..51b635837d3b5 100644
--- a/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
+++ b/llvm/test/Transforms/SLPVectorizer/abs-overflow-incorrect-minbws.ll
@@ -8,8 +8,10 @@ define i32 @test(i32 %n) {
 ; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[N]], i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <2 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 1, i32 2>
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 273837369, i32 273837369>
-; CHECK-NEXT:    [[TMP4:%.*]] = call <2 x i32> @llvm.abs.v2i32(<2 x i32> [[TMP3]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <2 x i32> [[TMP2]] to <2 x i64>
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw nsw <2 x i64> [[TMP3]], <i64 273837369, i64 273837369>
+; CHECK-NEXT:    [[TMP8:%.*]] = call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP7]], i1 true)
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[TMP4]], i32 0
 ; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[TMP4]], i32 1
 ; CHECK-NEXT:    [[RES1:%.*]] = add i32 [[TMP5]], [[TMP6]]

From dc665fa5f5b8b572479ceac6bf32e0174de65f1e Mon Sep 17 00:00:00 2001
From: Zhaoxin Yang <yangzhaoxin@loongson.cn>
Date: Mon, 11 Nov 2024 16:46:22 +0800
Subject: [PATCH 18/38] [MC][LoongArch] Change default cpu in
 `MCSubtargetInfo`. (#114922)

The default value of this CPU affects the `FeatureBits` obtained by
`LoongArchTargetELFStreamer` when creating an ELF file, and it will
further affect the `Flags` field in the generated file.

So, the default CPU value should be consistent with the
`initializeSubtargetDependencies` in `LoongArchSubtarget.cpp`.
Otherwise, the `Flags` field may be unexpected.

(cherry picked from commit 75c2888209473884cb3fa5720899d8199dafb8cb)
---
 lld/test/ELF/emulation-loongarch.s                          | 2 +-
 lld/test/ELF/loongarch-interlink.test                       | 4 ++--
 .../Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp | 2 +-
 llvm/test/CodeGen/LoongArch/e_flags.ll                      | 6 ++++++
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/lld/test/ELF/emulation-loongarch.s b/lld/test/ELF/emulation-loongarch.s
index 28b879f758468..cfa8df4d8e2fe 100644
--- a/lld/test/ELF/emulation-loongarch.s
+++ b/lld/test/ELF/emulation-loongarch.s
@@ -37,7 +37,7 @@
 # LA32-NEXT:   StringTableSectionIndex:
 # LA32-NEXT: }
 
-# RUN: llvm-mc -filetype=obj -triple=loongarch64 %s -o %t.o
+# RUN: llvm-mc -filetype=obj -triple=loongarch64 -mattr=+d %s -o %t.o
 # RUN: ld.lld %t.o -o %t
 # RUN: llvm-readobj --file-headers %t | FileCheck --check-prefix=LA64 %s
 # RUN: ld.lld -m elf64loongarch %t.o -o %t
diff --git a/lld/test/ELF/loongarch-interlink.test b/lld/test/ELF/loongarch-interlink.test
index 44e5d03409a47..15c8318512660 100644
--- a/lld/test/ELF/loongarch-interlink.test
+++ b/lld/test/ELF/loongarch-interlink.test
@@ -3,9 +3,9 @@
 
 # RUN: yaml2obj %t/blob.yaml -o %t/blob.o
 # RUN: yaml2obj %t/v0-lp64d.yaml -o %t/v0-lp64d.o
-# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/start.s -o %t/v1-lp64d.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/start.s -o %t/v1-lp64d.o
 # RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnusf %t/start.s -o %t/v1-lp64s.o
-# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu %t/bar.s -o %t/v1-b-lp64d.o
+# RUN: llvm-mc --filetype=obj --triple=loongarch64-unknown-gnu --mattr=+d %t/bar.s -o %t/v1-b-lp64d.o
 
 ## Check that binary input results in e_flags=0 output.
 # RUN: ld.lld -m elf64loongarch -b binary %t/blob.bin -o %t/blob.out
diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
index e40981f5b5cd5..0712cc01ea038 100644
--- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
+++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp
@@ -55,7 +55,7 @@ static MCInstrInfo *createLoongArchMCInstrInfo() {
 static MCSubtargetInfo *
 createLoongArchMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
   if (CPU.empty() || CPU == "generic")
-    CPU = TT.isArch64Bit() ? "la464" : "generic-la32";
+    CPU = TT.isArch64Bit() ? "generic-la64" : "generic-la32";
   return createLoongArchMCSubtargetInfoImpl(TT, CPU, /*TuneCPU*/ CPU, FS);
 }
 
diff --git a/llvm/test/CodeGen/LoongArch/e_flags.ll b/llvm/test/CodeGen/LoongArch/e_flags.ll
index 2feb9d832bca9..9b2dc87eb353d 100644
--- a/llvm/test/CodeGen/LoongArch/e_flags.ll
+++ b/llvm/test/CodeGen/LoongArch/e_flags.ll
@@ -1,3 +1,6 @@
+; RUN: llc --mtriple=loongarch32 --filetype=obj %s -o %t-la32s
+; RUN: llvm-readelf -h %t-la32s | FileCheck %s --check-prefixes=ILP32,ABI-S --match-full-lines
+
 ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s -o %t-la32
 ; RUN: llvm-readelf -h %t-la32 | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines
 
@@ -10,6 +13,9 @@
 ; RUN: llc --mtriple=loongarch32 -mattr=+d --filetype=obj %s --target-abi=ilp32d -o %t-ilp32d
 ; RUN: llvm-readelf -h %t-ilp32d | FileCheck %s --check-prefixes=ILP32,ABI-D --match-full-lines
 
+; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64d
+; RUN: llvm-readelf -h %t-la64d | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines
+
 ; RUN: llc --mtriple=loongarch64 -mattr=+d --filetype=obj %s -o %t-la64
 ; RUN: llvm-readelf -h %t-la64 | FileCheck %s --check-prefixes=LP64,ABI-D --match-full-lines
 

From f64f76feab83859b37b7fa5de3d4bba9a446e72b Mon Sep 17 00:00:00 2001
From: AdityaK <hiraditya@msn.com>
Date: Tue, 10 Sep 2024 22:39:02 -0700
Subject: [PATCH 19/38] Bail out jump threading on indirect branches (#103688)

The bug was introduced by
https://github.com/llvm/llvm-project/pull/68473

Fixes: #102351
(cherry picked from commit 3c9022c965b85951f30af140da591f819acef8a0)
---
 llvm/lib/Transforms/Utils/Local.cpp           |  11 +-
 .../switch-branch-fold-indirectbr-102351.ll   | 104 ++++++++++++++++++
 2 files changed, 113 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 7192efe3f16b9..4eb8dc1d2d615 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1028,7 +1028,14 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
   if (!BB->hasNPredecessorsOrMore(2))
     return false;
 
-  // Get single common predecessors of both BB and Succ
+  if (any_of(BBPreds, [](const BasicBlock *Pred) {
+        return isa<PHINode>(Pred->begin()) &&
+               isa<IndirectBrInst>(Pred->getTerminator());
+      }))
+    return false;
+
+  // Get the single common predecessor of both BB and Succ. Return false
+  // when there are more than one common predecessors.
   for (BasicBlock *SuccPred : SuccPreds) {
     if (BBPreds.count(SuccPred)) {
       if (CommonPred)
@@ -1133,7 +1140,7 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
 
   bool BBKillable = CanPropagatePredecessorsForPHIs(BB, Succ, BBPreds);
 
-  // Even if we can not fold bB into Succ, we may be able to redirect the
+  // Even if we can not fold BB into Succ, we may be able to redirect the
   // predecessors of BB to Succ.
   bool BBPhisMergeable =
       BBKillable ||
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
new file mode 100644
index 0000000000000..03aee68fa4248
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local noundef i32 @main() {
+; CHECK-LABEL: define dso_local noundef i32 @main() {
+; CHECK-NEXT:  [[BB:.*]]:
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x ptr], align 16
+; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB4:.*]]), ptr [[ALLOCA]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1
+; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB10:.*]]), ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI8:%.*]], %[[BB7:.*]] ]
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI9:%.*]], %[[BB7]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[BB7]] [
+; CHECK-NEXT:      i32 0, label %[[BB12:.*]]
+; CHECK-NEXT:      i32 1, label %[[BB4]]
+; CHECK-NEXT:      i32 2, label %[[BB6:.*]]
+; CHECK-NEXT:    ]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[PHI5:%.*]] = phi i32 [ [[PHI13:%.*]], %[[BB12]] ], [ [[PHI2]], %[[BB1]] ]
+; CHECK-NEXT:    br label %[[BB7]]
+; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 noundef [[PHI2]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI2]], 1
+; CHECK-NEXT:    br label %[[BB12]]
+; CHECK:       [[BB7]]:
+; CHECK-NEXT:    [[PHI8]] = phi i32 [ [[PHI]], %[[BB1]] ], [ 2, %[[BB4]] ]
+; CHECK-NEXT:    [[PHI9]] = phi i32 [ [[PHI2]], %[[BB1]] ], [ [[PHI5]], %[[BB4]] ]
+; CHECK-NEXT:    br label %[[BB1]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[BB10]]:
+; CHECK-NEXT:    [[CALL11:%.*]] = call i32 @foo(i32 noundef [[PHI13]])
+; CHECK-NEXT:    ret i32 0
+; CHECK:       [[BB12]]:
+; CHECK-NEXT:    [[PHI13]] = phi i32 [ [[ADD]], %[[BB6]] ], [ [[PHI2]], %[[BB1]] ]
+; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[PHI13]] to i64
+; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 [[SEXT]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR14]], align 8, !tbaa [[TBAA0]]
+; CHECK-NEXT:    indirectbr ptr [[LOAD]], [label %[[BB4]], label %bb10]
+;
+bb:
+  %alloca = alloca [2 x ptr], align 16
+  store ptr blockaddress(@main, %bb4), ptr %alloca, align 16, !tbaa !0
+  %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1
+  store ptr blockaddress(@main, %bb10), ptr %getelementptr, align 8, !tbaa !0
+  br label %bb1
+
+bb1:                                              ; preds = %bb7, %bb
+  %phi = phi i32 [ 0, %bb ], [ %phi8, %bb7 ]
+  %phi2 = phi i32 [ 0, %bb ], [ %phi9, %bb7 ]
+  switch i32 %phi, label %bb7 [
+  i32 0, label %bb3
+  i32 1, label %bb4
+  i32 2, label %bb6
+  ]
+
+bb3:                                              ; preds = %bb1
+  br label %bb12
+
+bb4:                                              ; preds = %bb12, %bb1
+  %phi5 = phi i32 [ %phi13, %bb12 ], [ %phi2, %bb1 ]
+  br label %bb7
+
+bb6:                                              ; preds = %bb1
+  %call = call i32 @foo(i32 noundef %phi2)
+  %add = add nsw i32 %phi2, 1
+  br label %bb12
+
+bb7:                                              ; preds = %bb4, %bb1
+  %phi8 = phi i32 [ %phi, %bb1 ], [ 2, %bb4 ]
+  %phi9 = phi i32 [ %phi2, %bb1 ], [ %phi5, %bb4 ]
+  br label %bb1, !llvm.loop !4
+
+bb10:                                             ; preds = %bb12
+  %call11 = call i32 @foo(i32 noundef %phi13)
+  ret i32 0
+
+bb12:                                             ; preds = %bb6, %bb3
+  %phi13 = phi i32 [ %add, %bb6 ], [ %phi2, %bb3 ]
+  %sext = sext i32 %phi13 to i64
+  %getelementptr14 = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 %sext
+  %load = load ptr, ptr %getelementptr14, align 8, !tbaa !0
+  indirectbr ptr %load, [label %bb4, label %bb10]
+}
+
+declare i32 @foo(i32)
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"any pointer", !2, i64 0}
+!2 = !{!"omnipotent char", !3, i64 0}
+!3 = !{!"Simple C++ TBAA"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"int", !2, i64 0}
+;.
+; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
+; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
+; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
+; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
+; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
+;.

From 321f0dd2008160b674c010425133bebc586392e7 Mon Sep 17 00:00:00 2001
From: AdityaK <hiraditya@msn.com>
Date: Tue, 26 Nov 2024 14:57:28 -0800
Subject: [PATCH 20/38] Bail out jump threading on indirect branches only
 (#117778)

Remove check for PHI in pred as pointed out in #103688
Reduced the testcase to remove redundant phi in pred

Fixes: #102351
(cherry picked from commit 39601a6e5484de183bf525b7d0624e7890ccd8ab)
---
 llvm/lib/Transforms/Utils/Local.cpp           |   3 +-
 .../switch-branch-fold-indirectbr-102351.ll   | 141 ++++++++----------
 2 files changed, 60 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index 4eb8dc1d2d615..f68cbf62b9825 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -1029,8 +1029,7 @@ CanRedirectPredsOfEmptyBBToSucc(BasicBlock *BB, BasicBlock *Succ,
     return false;
 
   if (any_of(BBPreds, [](const BasicBlock *Pred) {
-        return isa<PHINode>(Pred->begin()) &&
-               isa<IndirectBrInst>(Pred->getTerminator());
+        return isa<IndirectBrInst>(Pred->getTerminator());
       }))
     return false;
 
diff --git a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
index 03aee68fa4248..d3713be8358db 100644
--- a/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
+++ b/llvm/test/Transforms/SimplifyCFG/switch-branch-fold-indirectbr-102351.ll
@@ -1,104 +1,81 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --prefix-filecheck-ir-name pref --version 5
 ; RUN: opt < %s -passes=simplifycfg -S | FileCheck %s
 
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define dso_local noundef i32 @main() {
-; CHECK-LABEL: define dso_local noundef i32 @main() {
+define i32 @foo.1(i32 %arg, ptr %arg1) {
+; CHECK-LABEL: define i32 @foo.1(
+; CHECK-SAME: i32 [[ARG:%.*]], ptr [[ARG1:%.*]]) {
 ; CHECK-NEXT:  [[BB:.*]]:
 ; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [2 x ptr], align 16
-; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB4:.*]]), ptr [[ALLOCA]], align 16, !tbaa [[TBAA0:![0-9]+]]
+; CHECK-NEXT:    store ptr blockaddress(@foo.1, %[[BB8:.*]]), ptr [[ALLOCA]], align 16
 ; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 1
-; CHECK-NEXT:    store ptr blockaddress(@main, %[[BB10:.*]]), ptr [[GETELEMENTPTR]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    br label %[[BB1:.*]]
-; CHECK:       [[BB1]]:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI8:%.*]], %[[BB7:.*]] ]
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI9:%.*]], %[[BB7]] ]
-; CHECK-NEXT:    switch i32 [[PHI]], label %[[BB7]] [
-; CHECK-NEXT:      i32 0, label %[[BB12:.*]]
-; CHECK-NEXT:      i32 1, label %[[BB4]]
-; CHECK-NEXT:      i32 2, label %[[BB6:.*]]
+; CHECK-NEXT:    store ptr blockaddress(@foo.1, %[[BB16:.*]]), ptr [[GETELEMENTPTR]], align 8
+; CHECK-NEXT:    br label %[[PREFBB2:.*]]
+; CHECK:       [[PREFBB2]]:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI14:%.*]], %[[BB13:.*]] ]
+; CHECK-NEXT:    [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[PHI15:%.*]], %[[BB13]] ]
+; CHECK-NEXT:    switch i32 [[PHI]], label %[[BB13]] [
+; CHECK-NEXT:      i32 0, label %[[PREFBB18:.*]]
+; CHECK-NEXT:      i32 1, label %[[BB8]]
+; CHECK-NEXT:      i32 2, label %[[PREFBB11:.*]]
 ; CHECK-NEXT:    ]
-; CHECK:       [[BB4]]:
-; CHECK-NEXT:    [[PHI5:%.*]] = phi i32 [ [[PHI13:%.*]], %[[BB12]] ], [ [[PHI2]], %[[BB1]] ]
-; CHECK-NEXT:    br label %[[BB7]]
-; CHECK:       [[BB6]]:
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 @foo(i32 noundef [[PHI2]])
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI2]], 1
-; CHECK-NEXT:    br label %[[BB12]]
-; CHECK:       [[BB7]]:
-; CHECK-NEXT:    [[PHI8]] = phi i32 [ [[PHI]], %[[BB1]] ], [ 2, %[[BB4]] ]
-; CHECK-NEXT:    [[PHI9]] = phi i32 [ [[PHI2]], %[[BB1]] ], [ [[PHI5]], %[[BB4]] ]
-; CHECK-NEXT:    br label %[[BB1]], !llvm.loop [[LOOP4:![0-9]+]]
-; CHECK:       [[BB10]]:
-; CHECK-NEXT:    [[CALL11:%.*]] = call i32 @foo(i32 noundef [[PHI13]])
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    [[PHI10:%.*]] = phi i32 [ [[ARG]], %[[PREFBB18]] ], [ [[PHI3]], %[[PREFBB2]] ]
+; CHECK-NEXT:    br label %[[BB13]]
+; CHECK:       [[PREFBB11]]:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @wombat(i32 noundef [[PHI3]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI3]], 1
+; CHECK-NEXT:    br label %[[PREFBB18]]
+; CHECK:       [[BB13]]:
+; CHECK-NEXT:    [[PHI14]] = phi i32 [ [[PHI]], %[[PREFBB2]] ], [ 2, %[[BB8]] ]
+; CHECK-NEXT:    [[PHI15]] = phi i32 [ [[PHI3]], %[[PREFBB2]] ], [ [[PHI10]], %[[BB8]] ]
+; CHECK-NEXT:    br label %[[PREFBB2]]
+; CHECK:       [[BB16]]:
+; CHECK-NEXT:    [[CALL17:%.*]] = call i32 @wombat(i32 noundef [[ARG]])
 ; CHECK-NEXT:    ret i32 0
-; CHECK:       [[BB12]]:
-; CHECK-NEXT:    [[PHI13]] = phi i32 [ [[ADD]], %[[BB6]] ], [ [[PHI2]], %[[BB1]] ]
-; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[PHI13]] to i64
-; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds [2 x ptr], ptr [[ALLOCA]], i64 0, i64 [[SEXT]]
-; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[GETELEMENTPTR14]], align 8, !tbaa [[TBAA0]]
-; CHECK-NEXT:    indirectbr ptr [[LOAD]], [label %[[BB4]], label %bb10]
+; CHECK:       [[PREFBB18]]:
+; CHECK-NEXT:    [[LOAD:%.*]] = load ptr, ptr [[ARG1]], align 8
+; CHECK-NEXT:    indirectbr ptr [[LOAD]], [label %[[BB8]], label %bb16]
 ;
 bb:
   %alloca = alloca [2 x ptr], align 16
-  store ptr blockaddress(@main, %bb4), ptr %alloca, align 16, !tbaa !0
+  store ptr blockaddress(@foo.1, %bb8), ptr %alloca, align 16
   %getelementptr = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 1
-  store ptr blockaddress(@main, %bb10), ptr %getelementptr, align 8, !tbaa !0
-  br label %bb1
+  store ptr blockaddress(@foo.1, %bb16), ptr %getelementptr, align 8
+  br label %bb2
 
-bb1:                                              ; preds = %bb7, %bb
-  %phi = phi i32 [ 0, %bb ], [ %phi8, %bb7 ]
-  %phi2 = phi i32 [ 0, %bb ], [ %phi9, %bb7 ]
-  switch i32 %phi, label %bb7 [
-  i32 0, label %bb3
-  i32 1, label %bb4
-  i32 2, label %bb6
+bb2:                                              ; preds = %bb13, %bb
+  %phi = phi i32 [ 0, %bb ], [ %phi14, %bb13 ]
+  %phi3 = phi i32 [ 0, %bb ], [ %phi15, %bb13 ]
+  switch i32 %phi, label %bb13 [
+  i32 0, label %bb5
+  i32 1, label %bb8
+  i32 2, label %bb11
   ]
 
-bb3:                                              ; preds = %bb1
-  br label %bb12
+bb5:                                              ; preds = %bb2
+  br label %bb18
 
-bb4:                                              ; preds = %bb12, %bb1
-  %phi5 = phi i32 [ %phi13, %bb12 ], [ %phi2, %bb1 ]
-  br label %bb7
+bb8:                                              ; preds = %bb18, %bb2
+  %phi10 = phi i32 [ %arg, %bb18 ], [ %phi3, %bb2 ]
+  br label %bb13
 
-bb6:                                              ; preds = %bb1
-  %call = call i32 @foo(i32 noundef %phi2)
-  %add = add nsw i32 %phi2, 1
-  br label %bb12
+bb11:                                             ; preds = %bb2
+  %call = call i32 @wombat(i32 noundef %phi3)
+  %add = add nsw i32 %phi3, 1
+  br label %bb18
 
-bb7:                                              ; preds = %bb4, %bb1
-  %phi8 = phi i32 [ %phi, %bb1 ], [ 2, %bb4 ]
-  %phi9 = phi i32 [ %phi2, %bb1 ], [ %phi5, %bb4 ]
-  br label %bb1, !llvm.loop !4
+bb13:                                             ; preds = %bb8, %bb2
+  %phi14 = phi i32 [ %phi, %bb2 ], [ 2, %bb8 ]
+  %phi15 = phi i32 [ %phi3, %bb2 ], [ %phi10, %bb8 ]
+  br label %bb2
 
-bb10:                                             ; preds = %bb12
-  %call11 = call i32 @foo(i32 noundef %phi13)
+bb16:                                             ; preds = %bb18
+  %call17 = call i32 @wombat(i32 noundef %arg)
   ret i32 0
 
-bb12:                                             ; preds = %bb6, %bb3
-  %phi13 = phi i32 [ %add, %bb6 ], [ %phi2, %bb3 ]
-  %sext = sext i32 %phi13 to i64
-  %getelementptr14 = getelementptr inbounds [2 x ptr], ptr %alloca, i64 0, i64 %sext
-  %load = load ptr, ptr %getelementptr14, align 8, !tbaa !0
-  indirectbr ptr %load, [label %bb4, label %bb10]
+bb18:                                             ; preds = %bb11, %bb5
+  %load = load ptr, ptr %arg1, align 8
+  indirectbr ptr %load, [label %bb8, label %bb16]
 }
 
-declare i32 @foo(i32)
-
-!0 = !{!1, !1, i64 0}
-!1 = !{!"any pointer", !2, i64 0}
-!2 = !{!"omnipotent char", !3, i64 0}
-!3 = !{!"Simple C++ TBAA"}
-!4 = !{!5, !5, i64 0}
-!5 = !{!"int", !2, i64 0}
-;.
-; CHECK: [[TBAA0]] = !{[[META1:![0-9]+]], [[META1]], i64 0}
-; CHECK: [[META1]] = !{!"any pointer", [[META2:![0-9]+]], i64 0}
-; CHECK: [[META2]] = !{!"omnipotent char", [[META3:![0-9]+]], i64 0}
-; CHECK: [[META3]] = !{!"Simple C++ TBAA"}
-; CHECK: [[LOOP4]] = !{[[META5:![0-9]+]], [[META5]], i64 0}
-; CHECK: [[META5]] = !{!"int", [[META2]], i64 0}
-;.
+declare i32 @wombat(i32)

From e6bcdea700dc7a5b1cf3a3f5a4eaa40d1234e220 Mon Sep 17 00:00:00 2001
From: Anutosh Bhat <andersonbhat491@gmail.com>
Date: Fri, 29 Nov 2024 15:31:02 +0530
Subject: [PATCH 21/38] [clang-repl] Fix generation of wasm binaries while
 running clang-repl in browser (#117978)

Co-authored-by: Vassil Vassilev <v.g.vassilev@gmail.com>
(cherry picked from commit a174aa1e416c4e27945f5a8c646b119126dc8441)
---
 clang/lib/Interpreter/CMakeLists.txt  |  2 +
 clang/lib/Interpreter/Interpreter.cpp |  1 +
 clang/lib/Interpreter/Wasm.cpp        | 57 ++++++++++++++++++++-------
 3 files changed, 46 insertions(+), 14 deletions(-)

diff --git a/clang/lib/Interpreter/CMakeLists.txt b/clang/lib/Interpreter/CMakeLists.txt
index 0a2d60757c216..85efa4b0f984f 100644
--- a/clang/lib/Interpreter/CMakeLists.txt
+++ b/clang/lib/Interpreter/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 if (EMSCRIPTEN AND "lld" IN_LIST LLVM_ENABLE_PROJECTS)
   set(WASM_SRC Wasm.cpp)
   set(WASM_LINK lldWasm)
+  set(COMMON_LINK lldCommon)
 endif()
 
 add_clang_library(clangInterpreter
@@ -45,6 +46,7 @@ add_clang_library(clangInterpreter
   clangSema
   clangSerialization
   ${WASM_LINK}
+  ${COMMON_LINK}
   )
 
 if ((MINGW OR CYGWIN) AND BUILD_SHARED_LIBS)
diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp
index c0b8bfebb4343..985d0b7c0ef31 100644
--- a/clang/lib/Interpreter/Interpreter.cpp
+++ b/clang/lib/Interpreter/Interpreter.cpp
@@ -193,6 +193,7 @@ IncrementalCompilerBuilder::CreateCpp() {
   Argv.push_back("-target");
   Argv.push_back("wasm32-unknown-emscripten");
   Argv.push_back("-shared");
+  Argv.push_back("-fvisibility=default");
 #endif
   Argv.insert(Argv.end(), UserArgs.begin(), UserArgs.end());
 
diff --git a/clang/lib/Interpreter/Wasm.cpp b/clang/lib/Interpreter/Wasm.cpp
index 79efbaa03982d..aa10b160ccf84 100644
--- a/clang/lib/Interpreter/Wasm.cpp
+++ b/clang/lib/Interpreter/Wasm.cpp
@@ -23,6 +23,31 @@
 #include <string>
 
 namespace lld {
+enum Flavor {
+  Invalid,
+  Gnu,     // -flavor gnu
+  MinGW,   // -flavor gnu MinGW
+  WinLink, // -flavor link
+  Darwin,  // -flavor darwin
+  Wasm,    // -flavor wasm
+};
+
+using Driver = bool (*)(llvm::ArrayRef<const char *>, llvm::raw_ostream &,
+                        llvm::raw_ostream &, bool, bool);
+
+struct DriverDef {
+  Flavor f;
+  Driver d;
+};
+
+struct Result {
+  int retCode;
+  bool canRunAgain;
+};
+
+Result lldMain(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
+               llvm::raw_ostream &stderrOS, llvm::ArrayRef<DriverDef> drivers);
+
 namespace wasm {
 bool link(llvm::ArrayRef<const char *> args, llvm::raw_ostream &stdoutOS,
           llvm::raw_ostream &stderrOS, bool exitEarly, bool disableOutput);
@@ -51,13 +76,14 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) {
   llvm::TargetMachine *TargetMachine = Target->createTargetMachine(
       PTU.TheModule->getTargetTriple(), "", "", TO, llvm::Reloc::Model::PIC_);
   PTU.TheModule->setDataLayout(TargetMachine->createDataLayout());
-  std::string OutputFileName = PTU.TheModule->getName().str() + ".wasm";
+  std::string ObjectFileName = PTU.TheModule->getName().str() + ".o";
+  std::string BinaryFileName = PTU.TheModule->getName().str() + ".wasm";
 
   std::error_code Error;
-  llvm::raw_fd_ostream OutputFile(llvm::StringRef(OutputFileName), Error);
+  llvm::raw_fd_ostream ObjectFileOutput(llvm::StringRef(ObjectFileName), Error);
 
   llvm::legacy::PassManager PM;
-  if (TargetMachine->addPassesToEmitFile(PM, OutputFile, nullptr,
+  if (TargetMachine->addPassesToEmitFile(PM, ObjectFileOutput, nullptr,
                                          llvm::CodeGenFileType::ObjectFile)) {
     return llvm::make_error<llvm::StringError>(
         "Wasm backend cannot produce object.", llvm::inconvertibleErrorCode());
@@ -69,27 +95,30 @@ llvm::Error WasmIncrementalExecutor::addModule(PartialTranslationUnit &PTU) {
                                                llvm::inconvertibleErrorCode());
   }
 
-  OutputFile.close();
+  ObjectFileOutput.close();
 
   std::vector<const char *> LinkerArgs = {"wasm-ld",
                                           "-shared",
                                           "--import-memory",
-                                          "--no-entry",
-                                          "--export-all",
                                           "--experimental-pic",
                                           "--stack-first",
                                           "--allow-undefined",
-                                          OutputFileName.c_str(),
+                                          ObjectFileName.c_str(),
                                           "-o",
-                                          OutputFileName.c_str()};
-  int Result =
-      lld::wasm::link(LinkerArgs, llvm::outs(), llvm::errs(), false, false);
-  if (!Result)
+                                          BinaryFileName.c_str()};
+
+  const lld::DriverDef WasmDriver = {lld::Flavor::Wasm, &lld::wasm::link};
+  std::vector<lld::DriverDef> WasmDriverArgs;
+  WasmDriverArgs.push_back(WasmDriver);
+  lld::Result Result =
+      lld::lldMain(LinkerArgs, llvm::outs(), llvm::errs(), WasmDriverArgs);
+
+  if (Result.retCode)
     return llvm::make_error<llvm::StringError>(
         "Failed to link incremental module", llvm::inconvertibleErrorCode());
 
   void *LoadedLibModule =
-      dlopen(OutputFileName.c_str(), RTLD_NOW | RTLD_GLOBAL);
+      dlopen(BinaryFileName.c_str(), RTLD_NOW | RTLD_GLOBAL);
   if (LoadedLibModule == nullptr) {
     llvm::errs() << dlerror() << '\n';
     return llvm::make_error<llvm::StringError>(
@@ -109,7 +138,7 @@ llvm::Error WasmIncrementalExecutor::runCtors() const {
   return llvm::Error::success();
 }
 
-llvm::Error WasmIncrementalExecutor::cleanUp() const {
+llvm::Error WasmIncrementalExecutor::cleanUp() {
   // Can't call cleanUp through IncrementalExecutor as it
   // tries to deinitialize JIT which hasn't been initialized
   return llvm::Error::success();
@@ -117,4 +146,4 @@ llvm::Error WasmIncrementalExecutor::cleanUp() const {
 
 WasmIncrementalExecutor::~WasmIncrementalExecutor() = default;
 
-} // namespace clang
+} // namespace clang
\ No newline at end of file

From cf55b9cb8af3ce3fa776e46e6ad71155a7d38c75 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Tue, 13 Aug 2024 00:39:14 -0700
Subject: [PATCH 22/38] [AArch64][Darwin][SME] Don't try to save VG to the
 stack for unwinding.

On Darwin we don't have any hardware that has SVE support, only SME.
Therefore we don't need to save VG for unwinders and can safely omit it.

This also fixes crashes introduced since this feature landed since Darwin's
compact unwind code can't handle the presence of VG anyway.

rdar://131072344
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  24 ++-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  19 ++-
 .../CodeGen/AArch64/sme-darwin-no-sve-vg.ll   | 161 ++++++++++++++++++
 3 files changed, 189 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 87e057a468afd..83d9dd1725973 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1394,6 +1394,18 @@ bool requiresGetVGCall(MachineFunction &MF) {
          !MF.getSubtarget<AArch64Subtarget>().hasSVE();
 }
 
+static bool requiresSaveVG(MachineFunction &MF) {
+  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+  // For Darwin platforms we don't save VG for non-SVE functions, even if SME
+  // is enabled with streaming mode changes.
+  if (!AFI->hasStreamingModeChanges())
+    return false;
+  auto &ST = MF.getSubtarget<AArch64Subtarget>();
+  if (ST.isTargetDarwin())
+    return ST.hasSVE();
+  return true;
+}
+
 bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
   unsigned Opc = MBBI->getOpcode();
   if (Opc == AArch64::CNTD_XPiI || Opc == AArch64::RDSVLI_XI ||
@@ -1430,8 +1442,7 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
   // functions, we need to do this for both the streaming and non-streaming
   // vector length. Move past these instructions if necessary.
   MachineFunction &MF = *MBB.getParent();
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  if (AFI->hasStreamingModeChanges())
+  if (requiresSaveVG(MF))
     while (isVGInstruction(MBBI))
       ++MBBI;
 
@@ -1937,7 +1948,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
          !IsSVECalleeSave(MBBI)) {
     // Move past instructions generated to calculate VG
-    if (AFI->hasStreamingModeChanges())
+    if (requiresSaveVG(MF))
       while (isVGInstruction(MBBI))
         ++MBBI;
 
@@ -3720,7 +3731,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   // non-streaming VG value.
   const Function &F = MF.getFunction();
   SMEAttrs Attrs(F);
-  if (AFI->hasStreamingModeChanges()) {
+  if (requiresSaveVG(MF)) {
     if (Attrs.hasStreamingBody() && !Attrs.hasStreamingInterface())
       CSStackSize += 16;
     else
@@ -3873,7 +3884,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
   }
 
   // Insert VG into the list of CSRs, immediately before LR if saved.
-  if (AFI->hasStreamingModeChanges()) {
+  if (requiresSaveVG(MF)) {
     std::vector<CalleeSavedInfo> VGSaves;
     SMEAttrs Attrs(MF.getFunction());
 
@@ -4602,10 +4613,9 @@ MachineBasicBlock::iterator emitVGSaveRestore(MachineBasicBlock::iterator II,
 
 void AArch64FrameLowering::processFunctionBeforeFrameIndicesReplaced(
     MachineFunction &MF, RegScavenger *RS = nullptr) const {
-  AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   for (auto &BB : MF)
     for (MachineBasicBlock::iterator II = BB.begin(); II != BB.end();) {
-      if (AFI->hasStreamingModeChanges())
+      if (requiresSaveVG(MF))
         II = emitVGSaveRestore(II, this);
       if (StackTaggingMergeSetTag)
         II = tryMergeAdjacentSTG(II, this, RS);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 62078822c89b1..ef2789e96213b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8732,10 +8732,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
   SDValue InGlue;
   if (RequiresSMChange) {
-
-    Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
-                        DAG.getVTList(MVT::Other, MVT::Glue), Chain);
-    InGlue = Chain.getValue(1);
+    if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+      Chain = DAG.getNode(AArch64ISD::VG_SAVE, DL,
+                          DAG.getVTList(MVT::Other, MVT::Glue), Chain);
+      InGlue = Chain.getValue(1);
+    }
 
     SDValue NewChain = changeStreamingMode(
         DAG, DL, CalleeAttrs.hasStreamingInterface(), Chain, InGlue,
@@ -8914,11 +8915,13 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
     Result = changeStreamingMode(
         DAG, DL, !CalleeAttrs.hasStreamingInterface(), Result, InGlue,
         getSMCondition(CallerAttrs, CalleeAttrs), PStateSM);
-    InGlue = Result.getValue(1);
 
-    Result =
-        DAG.getNode(AArch64ISD::VG_RESTORE, DL,
-                    DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+    if (!Subtarget->isTargetDarwin() || Subtarget->hasSVE()) {
+      InGlue = Result.getValue(1);
+      Result =
+          DAG.getNode(AArch64ISD::VG_RESTORE, DL,
+                      DAG.getVTList(MVT::Other, MVT::Glue), {Result, InGlue});
+    }
   }
 
   if (CallerAttrs.requiresEnablingZAAfterCall(CalleeAttrs))
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
new file mode 100644
index 0000000000000..36a300fea25e5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-no-sve-vg.ll
@@ -0,0 +1,161 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -o - %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "arm64-apple-macosx14.0.0"
+
+; Check we don't crash on Darwin and that we don't try to save VG
+; when only SME (and not SVE) is enabled.
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define noundef i32 @main() local_unnamed_addr #0 {
+; CHECK-LABEL: main:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-80]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 80
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset b8, -24
+; CHECK-NEXT:    .cfi_offset b9, -32
+; CHECK-NEXT:    .cfi_offset b10, -40
+; CHECK-NEXT:    .cfi_offset b11, -48
+; CHECK-NEXT:    .cfi_offset b12, -56
+; CHECK-NEXT:    .cfi_offset b13, -64
+; CHECK-NEXT:    .cfi_offset b14, -72
+; CHECK-NEXT:    .cfi_offset b15, -80
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    bl __ZL9sme_crashv
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    mov w0, #0 ; =0x0
+; CHECK-NEXT:    ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #80 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+entry:
+  tail call fastcc void @_ZL9sme_crashv() #4
+  ret i32 0
+}
+
+; Function Attrs: mustprogress norecurse nounwind ssp uwtable(sync)
+define internal fastcc void @_ZL9sme_crashv() unnamed_addr #1 {
+; CHECK-LABEL: _ZL9sme_crashv:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x28, x27, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x29, x30, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #80
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    .cfi_offset w27, -24
+; CHECK-NEXT:    .cfi_offset w28, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    .cfi_remember_state
+; CHECK-NEXT:    sub x9, sp, #160
+; CHECK-NEXT:    and sp, x9, #0xffffffffffffff00
+; CHECK-NEXT:  Lloh0:
+; CHECK-NEXT:    adrp x8, ___stack_chk_guard@GOTPAGE
+; CHECK-NEXT:  Lloh1:
+; CHECK-NEXT:    ldr x8, [x8, ___stack_chk_guard@GOTPAGEOFF]
+; CHECK-NEXT:  Lloh2:
+; CHECK-NEXT:    ldr x8, [x8]
+; CHECK-NEXT:    str x8, [sp, #152]
+; CHECK-NEXT:    mov z0.b, #0 ; =0x0
+; CHECK-NEXT:    stp q0, q0, [sp, #32]
+; CHECK-NEXT:    stp q0, q0, [sp]
+; CHECK-NEXT:    mov x8, sp
+; CHECK-NEXT:    ; InlineAsm Start
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
+; CHECK-EMPTY:
+; CHECK-NEXT:    ; InlineAsm End
+; CHECK-NEXT:    ldr x8, [sp, #152]
+; CHECK-NEXT:  Lloh3:
+; CHECK-NEXT:    adrp x9, ___stack_chk_guard@GOTPAGE
+; CHECK-NEXT:  Lloh4:
+; CHECK-NEXT:    ldr x9, [x9, ___stack_chk_guard@GOTPAGEOFF]
+; CHECK-NEXT:  Lloh5:
+; CHECK-NEXT:    ldr x9, [x9]
+; CHECK-NEXT:    cmp x9, x8
+; CHECK-NEXT:    b.ne LBB1_2
+; CHECK-NEXT:  ; %bb.1: ; %entry
+; CHECK-NEXT:    sub sp, x29, #80
+; CHECK-NEXT:    .cfi_def_cfa wsp, 96
+; CHECK-NEXT:    ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    .cfi_restore w27
+; CHECK-NEXT:    .cfi_restore w28
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  LBB1_2: ; %entry
+; CHECK-NEXT:    .cfi_restore_state
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    bl ___stack_chk_fail
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .loh AdrpLdrGotLdr Lloh3, Lloh4, Lloh5
+; CHECK-NEXT:    .loh AdrpLdrGotLdr Lloh0, Lloh1, Lloh2
+entry:
+  %uu = alloca [16 x float], align 256
+  call void @llvm.lifetime.start.p0(i64 64, ptr nonnull %uu) #5
+  call void @llvm.memset.p0.i64(ptr noundef nonnull align 256 dereferenceable(64) %uu, i8 0, i64 64, i1 false)
+  call void asm sideeffect "ptrue p0.s\0Ast1w { z0.s }, p0, [$0]\0A", "r"(ptr nonnull %uu) #5
+  call void @llvm.lifetime.end.p0(i64 64, ptr nonnull %uu) #5
+  ret void
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: write)
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+attributes #0 = { mustprogress norecurse nounwind ssp uwtable(sync) "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #1 = { mustprogress norecurse nounwind ssp uwtable(sync) "aarch64_pstate_sm_enabled" "stack-protector-buffer-size"="8" "target-cpu"="apple-a16" "target-features"="+sme,+sme-f64f64,+sme2" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: write) }
+attributes #4 = { "aarch64_pstate_sm_enabled" "no-builtin-calloc" "no-builtin-stpcpy" }
+attributes #5 = { nounwind }

From 4716c4752fd56692bcca775606a2650418eb3be8 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Mon, 19 Aug 2024 10:17:10 +0100
Subject: [PATCH 23/38] [AArch64][SME] Return false from
 produceCompactUnwindFrame if VG save required. (#104588)

The compact unwind format requires all registers are stored in pairs, so
return false from produceCompactUnwindFrame if we require saving VG.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  3 +-
 .../test/CodeGen/AArch64/sme-darwin-sve-vg.ll | 55 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 83d9dd1725973..c09c1792e9898 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2859,7 +2859,8 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
   return Subtarget.isTargetMachO() &&
          !(Subtarget.getTargetLowering()->supportSwiftError() &&
            Attrs.hasAttrSomewhere(Attribute::SwiftError)) &&
-         MF.getFunction().getCallingConv() != CallingConv::SwiftTail;
+         MF.getFunction().getCallingConv() != CallingConv::SwiftTail &&
+         !requiresSaveVG(MF);
 }
 
 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
diff --git a/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
new file mode 100644
index 0000000000000..c32e9cbc05393
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-darwin-sve-vg.ll
@@ -0,0 +1,55 @@
+; RUN: llc -mtriple=aarch64-darwin -mattr=+sve -mattr=+sme -verify-machineinstrs < %s | FileCheck %s
+
+declare void @normal_callee();
+
+define void @locally_streaming_fn() #0 {
+; CHECK-LABEL: locally_streaming_fn:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    stp d15, d14, [sp, #-96]! ; 16-byte Folded Spill
+; CHECK-NEXT:    .cfi_def_cfa_offset 96
+; CHECK-NEXT:    rdsvl x9, #1
+; CHECK-NEXT:    stp d13, d12, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT:    lsr x9, x9, #3
+; CHECK-NEXT:    stp d11, d10, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp d9, d8, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT:    stp x30, x9, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT:    cntd x9
+; CHECK-NEXT:    str x9, [sp, #80] ; 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset vg, -16
+; CHECK-NEXT:    .cfi_offset w30, -32
+; CHECK-NEXT:    .cfi_offset b8, -40
+; CHECK-NEXT:    .cfi_offset b9, -48
+; CHECK-NEXT:    .cfi_offset b10, -56
+; CHECK-NEXT:    .cfi_offset b11, -64
+; CHECK-NEXT:    .cfi_offset b12, -72
+; CHECK-NEXT:    .cfi_offset b13, -80
+; CHECK-NEXT:    .cfi_offset b14, -88
+; CHECK-NEXT:    .cfi_offset b15, -96
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .cfi_offset vg, -24
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    bl _normal_callee
+; CHECK-NEXT:    smstart sm
+; CHECK-NEXT:    .cfi_restore vg
+; CHECK-NEXT:    smstop sm
+; CHECK-NEXT:    ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldr x30, [sp, #64] ; 8-byte Folded Reload
+; CHECK-NEXT:    ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT:    ldp d15, d14, [sp], #96 ; 16-byte Folded Reload
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore b8
+; CHECK-NEXT:    .cfi_restore b9
+; CHECK-NEXT:    .cfi_restore b10
+; CHECK-NEXT:    .cfi_restore b11
+; CHECK-NEXT:    .cfi_restore b12
+; CHECK-NEXT:    .cfi_restore b13
+; CHECK-NEXT:    .cfi_restore b14
+; CHECK-NEXT:    .cfi_restore b15
+; CHECK-NEXT:    ret
+  call void @normal_callee()
+  ret void
+}
+
+attributes #0 = { "aarch64_pstate_sm_body" uwtable(async) }

From 876d0501d312b7303423fc4d3c388174a1465ca5 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Tue, 15 Oct 2024 11:56:40 +0100
Subject: [PATCH 24/38] [AArch64][SME] Fix iterator to
 fixupCalleeSaveRestoreStackOffset (#110855)

The iterator passed to `fixupCalleeSaveRestoreStackOffset` may be
incorrect when it tries to skip over the instructions that get the
current value of 'vg', when there is a 'rdsvl' instruction straight
after the prologue. That's because it doesn't check that the instruction
is still a 'frame-setup' instruction.
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  9 ++---
 llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll  | 38 +++++++++++++++++++
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index c09c1792e9898..c183ffd384c22 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1947,12 +1947,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   // pointer bump above.
   while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup) &&
          !IsSVECalleeSave(MBBI)) {
-    // Move past instructions generated to calculate VG
-    if (requiresSaveVG(MF))
-      while (isVGInstruction(MBBI))
-        ++MBBI;
-
-    if (CombineSPBump)
+    if (CombineSPBump &&
+        // Only fix-up frame-setup load/store instructions.
+        (!requiresSaveVG(MF) || !isVGInstruction(MBBI)))
       fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
                                         NeedsWinCFI, &HasWinCFI);
     ++MBBI;
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index fa8f92cb0a2c9..38666a05c20f8 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -1102,6 +1102,44 @@ define void @streaming_compatible_no_sve(i32 noundef %x) #4 {
   ret void
 }
 
+; The algorithm that fixes up the offsets of the callee-save/restore
+; instructions must jump over the instructions that instantiate the current
+; 'VG' value. We must make sure that it doesn't consider any RDSVL in
+; user-code as if it is part of the frame-setup when doing so.
+define void @test_rdsvl_right_after_prologue(i64 %x0) nounwind {
+; NO-SVE-CHECK-LABEL: test_rdsvl_right_after_prologue:
+; NO-SVE-CHECK:     // %bb.0:
+; NO-SVE-CHECK-NEXT: stp     d15, d14, [sp, #-96]!           // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     d13, d12, [sp, #16]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov     x9, x0
+; NO-SVE-CHECK-NEXT: stp     d11, d10, [sp, #32]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     d9, d8, [sp, #48]               // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: stp     x29, x30, [sp, #64]             // 16-byte Folded Spill
+; NO-SVE-CHECK-NEXT: bl      __arm_get_current_vg
+; NO-SVE-CHECK-NEXT: str     x0, [sp, #80]                   // 8-byte Folded Spill
+; NO-SVE-CHECK-NEXT: mov     x0, x9
+; NO-SVE-CHECK-NEXT: rdsvl   x8, #1
+; NO-SVE-CHECK-NEXT: add     x29, sp, #64
+; NO-SVE-CHECK-NEXT: lsr     x8, x8, #3
+; NO-SVE-CHECK-NEXT: mov     x1, x0
+; NO-SVE-CHECK-NEXT: smstart sm
+; NO-SVE-CHECK-NEXT: mov     x0, x8
+; NO-SVE-CHECK-NEXT: bl      bar
+; NO-SVE-CHECK-NEXT: smstop  sm
+; NO-SVE-CHECK-NEXT: ldp     x29, x30, [sp, #64]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d9, d8, [sp, #48]               // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d11, d10, [sp, #32]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d13, d12, [sp, #16]             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ldp     d15, d14, [sp], #96             // 16-byte Folded Reload
+; NO-SVE-CHECK-NEXT: ret
+  %some_alloc = alloca i64, align 8
+  %rdsvl = tail call i64 @llvm.aarch64.sme.cntsd()
+  call void @bar(i64 %rdsvl, i64 %x0) "aarch64_pstate_sm_enabled"
+  ret void
+}
+
+declare void @bar(i64, i64)
+
 ; Ensure we still emit async unwind information with -fno-asynchronous-unwind-tables
 ; if the function contains a streaming-mode change.
 

From ab4b5a2db582958af1ee308a790cfdb42bd24720 Mon Sep 17 00:00:00 2001
From: Brian Cain <bcain@quicinc.com>
Date: Fri, 29 Nov 2024 16:42:44 -0600
Subject: [PATCH 25/38] [clang] recognize hexagon-*-ld.lld variants (#117338)

If we create a cross toolchain with a ${triple}-ld.lld symlink, clang
finds that symlink and when it uses it, it's not recognized as "lld".
Let's resolve that symlink and consider it when determining lld-ness.

For example, clang provides hexagon-link specific link arguments such as
`-mcpu=hexagonv65` and `-march=hexagon` when
hexagon-unknown-linux-musl-ld.lld is found. lld rejects this with the
following error:

hexagon-unknown-linux-musl-ld.lld: error: unknown emulation:
cpu=hexagonv65

(cherry picked from commit 2dc0de753b6df83e35f3d98e0e6a26c95e3399c0)
---
 clang/lib/Driver/ToolChains/Hexagon.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp
index 29781399cbab4..be7851adecea6 100644
--- a/clang/lib/Driver/ToolChains/Hexagon.cpp
+++ b/clang/lib/Driver/ToolChains/Hexagon.cpp
@@ -294,9 +294,10 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA,
   bool IncStartFiles = !Args.hasArg(options::OPT_nostartfiles);
   bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs);
   bool UseG0 = false;
-  const char *Exec = Args.MakeArgString(HTC.GetLinkerPath());
-  bool UseLLD = (llvm::sys::path::filename(Exec).equals_insensitive("ld.lld") ||
-                 llvm::sys::path::stem(Exec).equals_insensitive("ld.lld"));
+  bool UseLLD = false;
+  const char *Exec = Args.MakeArgString(HTC.GetLinkerPath(&UseLLD));
+  UseLLD = UseLLD || llvm::sys::path::filename(Exec).ends_with("ld.lld") ||
+           llvm::sys::path::stem(Exec).ends_with("ld.lld");
   bool UseShared = IsShared && !IsStatic;
   StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args);
 

From c9e72b3945f1672f0507539ce903c9696b3479ba Mon Sep 17 00:00:00 2001
From: Tobias Hieta <tobias@hieta.se>
Date: Fri, 13 Dec 2024 14:49:03 +0100
Subject: [PATCH 26/38] Bump version to 19.1.6

---
 cmake/Modules/LLVMVersion.cmake          | 2 +-
 libcxx/include/__config                  | 2 +-
 llvm/utils/gn/secondary/llvm/version.gni | 2 +-
 llvm/utils/lit/lit/__init__.py           | 2 +-
 llvm/utils/mlgo-utils/mlgo/__init__.py   | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/cmake/Modules/LLVMVersion.cmake b/cmake/Modules/LLVMVersion.cmake
index 9b39550118c49..93d36736439b1 100644
--- a/cmake/Modules/LLVMVersion.cmake
+++ b/cmake/Modules/LLVMVersion.cmake
@@ -7,7 +7,7 @@ if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 1)
 endif()
 if(NOT DEFINED LLVM_VERSION_PATCH)
-  set(LLVM_VERSION_PATCH 5)
+  set(LLVM_VERSION_PATCH 6)
 endif()
 if(NOT DEFINED LLVM_VERSION_SUFFIX)
   set(LLVM_VERSION_SUFFIX)
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 33e0043136fee..e97669bca411e 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -27,7 +27,7 @@
 // _LIBCPP_VERSION represents the version of libc++, which matches the version of LLVM.
 // Given a LLVM release LLVM XX.YY.ZZ (e.g. LLVM 17.0.1 == 17.00.01), _LIBCPP_VERSION is
 // defined to XXYYZZ.
-#  define _LIBCPP_VERSION 190105
+#  define _LIBCPP_VERSION 190106
 
 #  define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y
 #  define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y)
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index c32a040dcba67..c46d2abdb8ef2 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
 llvm_version_major = 19
 llvm_version_minor = 1
-llvm_version_patch = 5
+llvm_version_patch = 6
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 8557e5a061d1d..ee0a3b2240e1e 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = "Daniel Dunbar"
 __email__ = "daniel@minormatter.com"
-__versioninfo__ = (19, 1, 5)
+__versioninfo__ = (19, 1, 6)
 __version__ = ".".join(str(v) for v in __versioninfo__) + "dev"
 
 __all__ = []
diff --git a/llvm/utils/mlgo-utils/mlgo/__init__.py b/llvm/utils/mlgo-utils/mlgo/__init__.py
index e2f8ca88d91ec..cec9ca8b2f648 100644
--- a/llvm/utils/mlgo-utils/mlgo/__init__.py
+++ b/llvm/utils/mlgo-utils/mlgo/__init__.py
@@ -4,7 +4,7 @@
 
 from datetime import timezone, datetime
 
-__versioninfo__ = (19, 1, 5)
+__versioninfo__ = (19, 1, 6)
 __version__ = (
     ".".join(str(v) for v in __versioninfo__)
     + "dev"

From 3d21a9a8e4ecd4a88acfa80700cebfdcd35dde6c Mon Sep 17 00:00:00 2001
From: Tom Stellard <tstellar@redhat.com>
Date: Fri, 6 Dec 2024 18:34:59 -0800
Subject: [PATCH 27/38] [Clang][perf-training] Fix clean command in
 perf-helper.py (#118978)

The first path argument was always being ignored, and since most calls
to this command only passed one path, it wasn't actually doing anything
in most cases.

This bug was introduced by dd0356d741aefa25ece973d6cc4b55dcb73b84b4.

(cherry picked from commit 18af3fc1bf8855e1e166e64a9210ed07d610aa54)
---
 clang/utils/perf-training/perf-helper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/utils/perf-training/perf-helper.py b/clang/utils/perf-training/perf-helper.py
index 3ed42a187fd80..d76c6ede3fe5a 100644
--- a/clang/utils/perf-training/perf-helper.py
+++ b/clang/utils/perf-training/perf-helper.py
@@ -36,7 +36,7 @@ def clean(args):
             + "\tRemoves all files with extension from <path>."
         )
         return 1
-    for path in args[1:-1]:
+    for path in args[0:-1]:
         for filename in findFilesWithExtension(path, args[-1]):
             os.remove(filename)
     return 0

From d2953ab0a79f9f2d2877703dbc235b92541faec9 Mon Sep 17 00:00:00 2001
From: Sam Clegg <sbc@chromium.org>
Date: Mon, 9 Sep 2024 09:28:08 -0700
Subject: [PATCH 28/38] [lld][WebAssembly] Fix use of uninitialized stack data
 with --wasm64 (#107780)

In the case of `--wasm64` we were setting the type of the init expression
to be 64-bit but were only setting the low 32-bits of the value (by
assigning to Int32).

Fixes: https://github.com/emscripten-core/emscripten/issues/22538
(cherry picked from commit 5c8fd1eece8fff69871cef57a2363dc0f734a7d1)
---
 lld/wasm/SyntheticSections.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index f02f55519a251..72d08b849d8e8 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -587,8 +587,7 @@ void ElemSection::writeBody() {
     initExpr.Inst.Value.Global = WasmSym::tableBase->getGlobalIndex();
   } else {
     bool is64 = config->is64.value_or(false);
-    initExpr.Inst.Opcode = is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
-    initExpr.Inst.Value.Int32 = config->tableBase;
+    initExpr = intConst(config->tableBase, is64);
   }
   writeInitExpr(os, initExpr);
 

From be6e2e72cb51f95c852e6d6567c8b2d066dfd19e Mon Sep 17 00:00:00 2001
From: estewart08 <ethan.stewart@amd.com>
Date: Mon, 19 Aug 2024 17:59:21 -0500
Subject: [PATCH 29/38] [offload] - Fix issue with standalone debug offload
 build (#104647)

Error: CommandLine Error: Option 'attributor-manifest-internal'
registered more than once

During the standalone debug build of offload the above error is seen at
app runtime when using a prebuilt llvm with LLVM_LINK_LLVM_DYLIB=ON.
This is caused by linking both libLLVM.so and various archives that are
found via llvm_map_components_to_libnames for jit support.
---
 offload/plugins-nextgen/common/CMakeLists.txt | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/offload/plugins-nextgen/common/CMakeLists.txt b/offload/plugins-nextgen/common/CMakeLists.txt
index 284f98875170c..aea20c6ec3143 100644
--- a/offload/plugins-nextgen/common/CMakeLists.txt
+++ b/offload/plugins-nextgen/common/CMakeLists.txt
@@ -11,13 +11,15 @@ add_dependencies(PluginCommon intrinsics_gen)
 
 # Only enable JIT for those targets that LLVM can support.
 set(supported_jit_targets AMDGPU NVPTX)
-foreach(target IN LISTS supported_jit_targets)
-  if("${target}" IN_LIST LLVM_TARGETS_TO_BUILD)
-	  target_compile_definitions(PluginCommon PRIVATE "LIBOMPTARGET_JIT_${target}")
-    llvm_map_components_to_libnames(llvm_libs ${target})
-    target_link_libraries(PluginCommon PRIVATE ${llvm_libs})
-  endif()
-endforeach()
+if (NOT LLVM_LINK_LLVM_DYLIB)
+  foreach(target IN LISTS supported_jit_targets)
+    if("${target}" IN_LIST LLVM_TARGETS_TO_BUILD)
+      target_compile_definitions(PluginCommon PRIVATE "LIBOMPTARGET_JIT_${target}")
+      llvm_map_components_to_libnames(llvm_libs ${target})
+      target_link_libraries(PluginCommon PRIVATE ${llvm_libs})
+    endif()
+  endforeach()
+endif()
 
 # Include the RPC server from the `libc` project if availible.
 if(TARGET llvmlibc_rpc_server AND ${LIBOMPTARGET_GPU_LIBC_SUPPORT})

From e3b2d0149ffadef53fe8165f55e648466c025416 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Sat, 30 Nov 2024 12:32:57 +0100
Subject: [PATCH 30/38] [offload] Include CheckCXXCompilerFlag in CMake

Include CheckCXXCompilerFlag before it is used in the top-level CMake
file.  This fixes standalone builds, but it is also cleaner than
assuming that some previous CMake file will include it for us.
---
 offload/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index 4cd97a6a5ff63..3ba892c76c8b8 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -127,6 +127,7 @@ include(LibomptargetGetDependencies)
 # Set up testing infrastructure.
 include(OpenMPTesting)
 
+include(CheckCXXCompilerFlag)
 check_cxx_compiler_flag(-Werror=global-constructors OFFLOAD_HAVE_WERROR_CTOR)
 
 # LLVM source tree is required at build time for libomptarget

From 5ce79b5045ae2e362fbd7b28351b10634d05b0f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Sat, 30 Nov 2024 15:32:39 +0100
Subject: [PATCH 31/38] [offload] [test] Add "omp" test dependency only when
 present

Add the `omp` target dependency to tests only when the respective target
is present, i.e. when not building standalone against system libomp.
---
 offload/test/CMakeLists.txt | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt
index 3ac5d7907e2cc..c90ed26389faf 100644
--- a/offload/test/CMakeLists.txt
+++ b/offload/test/CMakeLists.txt
@@ -22,6 +22,11 @@ if(CUDAToolkit_FOUND)
   get_filename_component(CUDA_LIBDIR "${CUDA_cudart_static_LIBRARY}" DIRECTORY)
 endif()
 
+set(OMP_DEPEND)
+if(TARGET omp)
+  set(OMP_DEPEND omp)
+endif()
+
 string(REGEX MATCHALL "([^\ ]+\ |[^\ ]+$)" SYSTEM_TARGETS "${LIBOMPTARGET_SYSTEM_TARGETS}")
 foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS)
   string(STRIP "${CURRENT_TARGET}" CURRENT_TARGET)
@@ -29,7 +34,7 @@ foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS)
   add_offload_testsuite(check-libomptarget-${CURRENT_TARGET}
     "Running libomptarget tests"
     ${CMAKE_CURRENT_BINARY_DIR}/${CURRENT_TARGET}
-    DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS}
+    DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS}
     ARGS ${LIBOMPTARGET_LIT_ARG_LIST})
   list(APPEND LIBOMPTARGET_LIT_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CURRENT_TARGET})
 
@@ -43,12 +48,12 @@ add_offload_testsuite(check-libomptarget
   "Running libomptarget tests"
   ${LIBOMPTARGET_LIT_TESTSUITES}
   EXCLUDE_FROM_CHECK_ALL
-  DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS}
+  DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS}
   ARGS ${LIBOMPTARGET_LIT_ARG_LIST})
 
 add_offload_testsuite(check-offload
   "Running libomptarget tests"
   ${LIBOMPTARGET_LIT_TESTSUITES}
   EXCLUDE_FROM_CHECK_ALL
-  DEPENDS omptarget omp ${LIBOMPTARGET_TESTED_PLUGINS}
+  DEPENDS omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS}
   ARGS ${LIBOMPTARGET_LIT_ARG_LIST})

From 909104bb5a0540a3bffaa4a816efc1c54be81969 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Sat, 30 Nov 2024 15:39:53 +0100
Subject: [PATCH 32/38] [offload] Define OPENMP_TEST*_FLAGS in standalone
 builds

Copy the `OPENMP_TEST_FLAGS` and `OPENMP_TEST_OPENMP_FLAGS` from openmp
for standalone builds, as required by the lit configs.
---
 offload/CMakeLists.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index 3ba892c76c8b8..752434c6a28e1 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -283,6 +283,11 @@ if(OPENMP_STANDALONE_BUILD)
       ${LLVM_LIBRARY_DIRS}
     REQUIRED
   )
+
+  set(OPENMP_TEST_FLAGS "" CACHE STRING
+    "Extra compiler flags to send to the test compiler.")
+  set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING
+    "OpenMP compiler flag to use for testing OpenMP runtime libraries.")
 endif()
 
 macro(pythonize_bool var)

From b70bf59262cbae12728bd866506debff5c89eb4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Sat, 30 Nov 2024 15:44:37 +0100
Subject: [PATCH 33/38] [offload] Define LIBOMPTARGET_OPENMP_*_FOLDER in
 standalone builds

Define `LIBOMPTARGET_OPENMP_HEADER_FOLDER`
and `LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER` needed for testing
in standalone builds.
---
 offload/CMakeLists.txt | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/offload/CMakeLists.txt b/offload/CMakeLists.txt
index 752434c6a28e1..959d6260bc749 100644
--- a/offload/CMakeLists.txt
+++ b/offload/CMakeLists.txt
@@ -284,10 +284,25 @@ if(OPENMP_STANDALONE_BUILD)
     REQUIRED
   )
 
+  find_path (
+    LIBOMP_INCLUDE_DIR
+    NAMES
+      omp.h
+    HINTS
+    ${COMPILER_RESOURCE_DIR}/include
+    ${CMAKE_INSTALL_PREFIX}/include
+  )
+
+  get_filename_component(LIBOMP_LIBRARY_DIR ${LIBOMP_STANDALONE} DIRECTORY)
+
   set(OPENMP_TEST_FLAGS "" CACHE STRING
     "Extra compiler flags to send to the test compiler.")
   set(OPENMP_TEST_OPENMP_FLAGS ${OPENMP_TEST_COMPILER_OPENMP_FLAGS} CACHE STRING
     "OpenMP compiler flag to use for testing OpenMP runtime libraries.")
+  set(LIBOMPTARGET_OPENMP_HEADER_FOLDER "${LIBOMP_INCLUDE_DIR}" CACHE STRING
+    "Path to folder containing omp.h")
+  set(LIBOMPTARGET_OPENMP_HOST_RTL_FOLDER "${LIBOMP_LIBRARY_DIR}" CACHE STRING
+    "Path to folder containing libomp.so, and libLLVMSupport.so with profiling enabled")
 endif()
 
 macro(pythonize_bool var)

From 2384a062fc1cc9d115c537f0cfe36ae6028e7bfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Sun, 1 Dec 2024 16:26:46 +0100
Subject: [PATCH 34/38] [cmake] Use DetectTestCompiler from openmp directory

Fix the DetectTestCompiler project use to reference the openmp source
tree, since the respective files were not copied to offload, and there
is no point in duplicating them.

Fixes #90333
---
 offload/cmake/OpenMPTesting.cmake | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/offload/cmake/OpenMPTesting.cmake b/offload/cmake/OpenMPTesting.cmake
index 11eafeb764260..3e04a3423c4d6 100644
--- a/offload/cmake/OpenMPTesting.cmake
+++ b/offload/cmake/OpenMPTesting.cmake
@@ -124,7 +124,7 @@ if (${OPENMP_STANDALONE_BUILD})
   # project is built which is too late for detecting the compiler...
   file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler)
   execute_process(
-    COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_LIST_DIR}/DetectTestCompiler
+    COMMAND ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} ${CMAKE_CURRENT_SOURCE_DIR}/../openmp/cmake/DetectTestCompiler
       -DCMAKE_C_COMPILER=${OPENMP_TEST_C_COMPILER}
       -DCMAKE_CXX_COMPILER=${OPENMP_TEST_CXX_COMPILER}
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/DetectTestCompiler

From 8069ce6ab1766a166b956192ad72c143be3b860e Mon Sep 17 00:00:00 2001
From: George Stagg <georgestagg@gmail.com>
Date: Wed, 4 Dec 2024 21:12:15 +0000
Subject: [PATCH 35/38] [WebAssembly] Support multiple `.init_array` fragments
 when writing Wasm objects (#111008)

(cherry picked from commit ac5dd455caaf286625f61b604291f2eaed9702f0)
---
 llvm/lib/MC/WasmObjectWriter.cpp      | 89 ++++++++++++++-------------
 llvm/test/MC/WebAssembly/init-array.s | 49 +++++++++++++++
 2 files changed, 96 insertions(+), 42 deletions(-)
 create mode 100644 llvm/test/MC/WebAssembly/init-array.s

diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index f25dc92fa235a..a66c5713ff8a6 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -1853,49 +1853,54 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
     if (EmptyFrag.getKind() != MCFragment::FT_Data)
       report_fatal_error(".init_array section should be aligned");
 
-    const MCFragment &AlignFrag = *EmptyFrag.getNext();
-    if (AlignFrag.getKind() != MCFragment::FT_Align)
-      report_fatal_error(".init_array section should be aligned");
-    if (cast<MCAlignFragment>(AlignFrag).getAlignment() !=
-        Align(is64Bit() ? 8 : 4))
-      report_fatal_error(".init_array section should be aligned for pointers");
-
-    const MCFragment &Frag = *AlignFrag.getNext();
-    if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
-      report_fatal_error("only data supported in .init_array section");
-
-    uint16_t Priority = UINT16_MAX;
-    unsigned PrefixLength = strlen(".init_array");
-    if (WS.getName().size() > PrefixLength) {
-      if (WS.getName()[PrefixLength] != '.')
+    const MCFragment *nextFrag = EmptyFrag.getNext();
+    while (nextFrag != nullptr) {
+      const MCFragment &AlignFrag = *nextFrag;
+      if (AlignFrag.getKind() != MCFragment::FT_Align)
+        report_fatal_error(".init_array section should be aligned");
+      if (cast<MCAlignFragment>(AlignFrag).getAlignment() !=
+          Align(is64Bit() ? 8 : 4))
         report_fatal_error(
-            ".init_array section priority should start with '.'");
-      if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority))
-        report_fatal_error("invalid .init_array section priority");
-    }
-    const auto &DataFrag = cast<MCDataFragment>(Frag);
-    const SmallVectorImpl<char> &Contents = DataFrag.getContents();
-    for (const uint8_t *
-             P = (const uint8_t *)Contents.data(),
-            *End = (const uint8_t *)Contents.data() + Contents.size();
-         P != End; ++P) {
-      if (*P != 0)
-        report_fatal_error("non-symbolic data in .init_array section");
-    }
-    for (const MCFixup &Fixup : DataFrag.getFixups()) {
-      assert(Fixup.getKind() ==
-             MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
-      const MCExpr *Expr = Fixup.getValue();
-      auto *SymRef = dyn_cast<MCSymbolRefExpr>(Expr);
-      if (!SymRef)
-        report_fatal_error("fixups in .init_array should be symbol references");
-      const auto &TargetSym = cast<const MCSymbolWasm>(SymRef->getSymbol());
-      if (TargetSym.getIndex() == InvalidIndex)
-        report_fatal_error("symbols in .init_array should exist in symtab");
-      if (!TargetSym.isFunction())
-        report_fatal_error("symbols in .init_array should be for functions");
-      InitFuncs.push_back(
-          std::make_pair(Priority, TargetSym.getIndex()));
+            ".init_array section should be aligned for pointers");
+
+      const MCFragment &Frag = *AlignFrag.getNext();
+      nextFrag = Frag.getNext();
+      if (Frag.hasInstructions() || Frag.getKind() != MCFragment::FT_Data)
+        report_fatal_error("only data supported in .init_array section");
+
+      uint16_t Priority = UINT16_MAX;
+      unsigned PrefixLength = strlen(".init_array");
+      if (WS.getName().size() > PrefixLength) {
+        if (WS.getName()[PrefixLength] != '.')
+          report_fatal_error(
+              ".init_array section priority should start with '.'");
+        if (WS.getName().substr(PrefixLength + 1).getAsInteger(10, Priority))
+          report_fatal_error("invalid .init_array section priority");
+      }
+      const auto &DataFrag = cast<MCDataFragment>(Frag);
+      const SmallVectorImpl<char> &Contents = DataFrag.getContents();
+      for (const uint8_t *
+               P = (const uint8_t *)Contents.data(),
+              *End = (const uint8_t *)Contents.data() + Contents.size();
+           P != End; ++P) {
+        if (*P != 0)
+          report_fatal_error("non-symbolic data in .init_array section");
+      }
+      for (const MCFixup &Fixup : DataFrag.getFixups()) {
+        assert(Fixup.getKind() ==
+               MCFixup::getKindForSize(is64Bit() ? 8 : 4, false));
+        const MCExpr *Expr = Fixup.getValue();
+        auto *SymRef = dyn_cast<MCSymbolRefExpr>(Expr);
+        if (!SymRef)
+          report_fatal_error(
+              "fixups in .init_array should be symbol references");
+        const auto &TargetSym = cast<const MCSymbolWasm>(SymRef->getSymbol());
+        if (TargetSym.getIndex() == InvalidIndex)
+          report_fatal_error("symbols in .init_array should exist in symtab");
+        if (!TargetSym.isFunction())
+          report_fatal_error("symbols in .init_array should be for functions");
+        InitFuncs.push_back(std::make_pair(Priority, TargetSym.getIndex()));
+      }
     }
   }
 
diff --git a/llvm/test/MC/WebAssembly/init-array.s b/llvm/test/MC/WebAssembly/init-array.s
new file mode 100644
index 0000000000000..e79fb453ec12a
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/init-array.s
@@ -0,0 +1,49 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck %s
+
+init1:
+	.functype	init1 () -> ()
+	end_function
+
+init2:
+	.functype	init2 () -> ()
+	end_function
+
+	.section	.init_array,"",@
+	.p2align	2, 0
+	.int32	init1
+
+	.section	.init_array,"",@
+	.p2align	2
+	.int32	init2
+
+# CHECK:        - Type:            FUNCTION
+# CHECK-NEXT:     FunctionTypes:   [ 0, 0 ]
+# CHECK-NEXT:   - Type:            CODE
+# CHECK-NEXT:     Functions:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            0B
+# CHECK-NEXT:       - Index:           1
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            0B
+# CHECK-NEXT:   - Type:            CUSTOM
+# CHECK-NEXT:     Name:            linking
+# CHECK-NEXT:     Version:         2
+# CHECK-NEXT:     SymbolTable:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         Name:            init1
+# CHECK-NEXT:         Flags:           [ BINDING_LOCAL ]
+# CHECK-NEXT:         Function:        0
+# CHECK-NEXT:       - Index:           1
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         Name:            init2
+# CHECK-NEXT:         Flags:           [ BINDING_LOCAL ]
+# CHECK-NEXT:         Function:        1
+# CHECK-NEXT:     InitFunctions:
+# CHECK-NEXT:       - Priority:        65535
+# CHECK-NEXT:         Symbol:          0
+# CHECK-NEXT:       - Priority:        65535
+# CHECK-NEXT:         Symbol:          1
+# CHECK-NEXT: ...
+#

From 8e9465bd150bcbe2d200efc7c0e31abf8ddebe58 Mon Sep 17 00:00:00 2001
From: George Stagg <george.stagg@posit.co>
Date: Tue, 10 Dec 2024 16:28:18 +0000
Subject: [PATCH 36/38] [WebAssembly] Handle symbols in `.init_array` sections
 (#119127)

Follow on from #111008.

(cherry picked from commit ed91843d435d0cd2c39ebb1a50f2907c621f07ed)
---
 llvm/lib/MC/WasmObjectWriter.cpp            | 22 ++++-
 llvm/test/MC/WebAssembly/init-array-label.s | 91 +++++++++++++++++++++
 2 files changed, 111 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/MC/WebAssembly/init-array-label.s

diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index a66c5713ff8a6..8526469245676 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -1326,6 +1326,22 @@ static bool isInSymtab(const MCSymbolWasm &Sym) {
   return true;
 }
 
+static bool isSectionReferenced(MCAssembler &Asm, MCSectionWasm &Section) {
+  StringRef SectionName = Section.getName();
+
+  for (const MCSymbol &S : Asm.symbols()) {
+    const auto &WS = static_cast<const MCSymbolWasm &>(S);
+    if (WS.isData() && WS.isInSection()) {
+      auto &RefSection = static_cast<MCSectionWasm &>(WS.getSection());
+      if (RefSection.getName() == SectionName) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 void WasmObjectWriter::prepareImports(
     SmallVectorImpl<wasm::WasmImport> &Imports, MCAssembler &Asm) {
   // For now, always emit the memory import, since loads and stores are not
@@ -1482,8 +1498,10 @@ uint64_t WasmObjectWriter::writeOneObject(MCAssembler &Asm,
     LLVM_DEBUG(dbgs() << "Processing Section " << SectionName << "  group "
                       << Section.getGroup() << "\n";);
 
-    // .init_array sections are handled specially elsewhere.
-    if (SectionName.starts_with(".init_array"))
+    // .init_array sections are handled specially elsewhere, include them in
+    // data segments if and only if referenced by a symbol.
+    if (SectionName.starts_with(".init_array") &&
+        !isSectionReferenced(Asm, Section))
       continue;
 
     // Code is handled separately
diff --git a/llvm/test/MC/WebAssembly/init-array-label.s b/llvm/test/MC/WebAssembly/init-array-label.s
new file mode 100644
index 0000000000000..0b4a5ea2da0b5
--- /dev/null
+++ b/llvm/test/MC/WebAssembly/init-array-label.s
@@ -0,0 +1,91 @@
+# RUN: llvm-mc -triple=wasm32-unknown-unknown -filetype=obj < %s | obj2yaml | FileCheck %s
+
+init1:
+	.functype	init1 () -> ()
+	end_function
+
+init2:
+	.functype	init2 () -> ()
+	end_function
+
+	.section	.init_array.42,"",@
+	.p2align	2, 0x0
+	.int32	init1
+
+	.section	.init_array,"",@
+	.globl	p_init1
+	.p2align	2, 0x0
+p_init1:
+	.int32	init1
+	.size	p_init1, 4
+
+	.section	.init_array,"",@
+	.globl	p_init2
+	.p2align	2, 0x0
+p_init2:
+	.int32	init1
+	.int32	init2
+	.size	p_init2, 8
+
+# CHECK:        - Type:            FUNCTION
+# CHECK-NEXT:     FunctionTypes:   [ 0, 0 ]
+# CHECK-NEXT:   - Type:            DATACOUNT
+# CHECK-NEXT:     Count:           1
+# CHECK-NEXT:   - Type:            CODE
+# CHECK-NEXT:     Functions:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            0B
+# CHECK-NEXT:       - Index:           1
+# CHECK-NEXT:         Locals:          []
+# CHECK-NEXT:         Body:            0B
+# CHECK-NEXT:   - Type:            DATA
+# CHECK-NEXT:     Segments:
+# CHECK-NEXT:       - SectionOffset:   6
+# CHECK-NEXT:         InitFlags:       0
+# CHECK-NEXT:         Offset:
+# CHECK-NEXT:           Opcode:          I32_CONST
+# CHECK-NEXT:           Value:           0
+# CHECK-NEXT:         Content:         '000000000000000000000000'
+# CHECK-NEXT:   - Type:            CUSTOM
+# CHECK-NEXT:     Name:            linking
+# CHECK-NEXT:     Version:         2
+# CHECK-NEXT:     SymbolTable:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         Name:            init1
+# CHECK-NEXT:         Flags:           [ BINDING_LOCAL ]
+# CHECK-NEXT:         Function:        0
+# CHECK-NEXT:       - Index:           1
+# CHECK-NEXT:         Kind:            FUNCTION
+# CHECK-NEXT:         Name:            init2
+# CHECK-NEXT:         Flags:           [ BINDING_LOCAL ]
+# CHECK-NEXT:         Function:        1
+# CHECK-NEXT:       - Index:           2
+# CHECK-NEXT:         Kind:            DATA
+# CHECK-NEXT:         Name:            p_init1
+# CHECK-NEXT:         Flags:           [  ]
+# CHECK-NEXT:         Segment:         0
+# CHECK-NEXT:         Size:            4
+# CHECK-NEXT:       - Index:           3
+# CHECK-NEXT:         Kind:            DATA
+# CHECK-NEXT:         Name:            p_init2
+# CHECK-NEXT:         Flags:           [  ]
+# CHECK-NEXT:         Segment:         0
+# CHECK-NEXT:         Offset:          4
+# CHECK-NEXT:         Size:            8
+# CHECK-NEXT:     SegmentInfo:
+# CHECK-NEXT:       - Index:           0
+# CHECK-NEXT:         Name:            .init_array
+# CHECK-NEXT:         Alignment:       2
+# CHECK-NEXT:         Flags:           [  ]
+# CHECK-NEXT:     InitFunctions:
+# CHECK-NEXT:       - Priority:        42
+# CHECK-NEXT:         Symbol:          0
+# CHECK-NEXT:       - Priority:        65535
+# CHECK-NEXT:         Symbol:          0
+# CHECK-NEXT:       - Priority:        65535
+# CHECK-NEXT:         Symbol:          0
+# CHECK-NEXT:       - Priority:        65535
+# CHECK-NEXT:         Symbol:          1
+# CHECK-NEXT: ...

From 657e03f8625c9e7326f241aea584ff5c43b30ba3 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me@antoniofrighetto.com>
Date: Wed, 4 Dec 2024 10:15:11 +0100
Subject: [PATCH 37/38] [AggressiveInstCombine] Use APInt and avoid truncation
 when folding loads

A miscompilation issue has been addressed with improved handling.

Fixes: https://github.com/llvm/llvm-project/issues/118467.
(cherry picked from commit f68b0e36997322eeda8fd199ea80deb1b49c5410)
---
 .../AggressiveInstCombine.cpp                 |  3 +-
 .../AggressiveInstCombine/AArch64/or-load.ll  | 20 +++----
 .../AggressiveInstCombine/X86/or-load.ll      | 52 +++++++++++++++----
 3 files changed, 53 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
index d5a38ec17a2a8..1d23ec8ced204 100644
--- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
+++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp
@@ -811,8 +811,7 @@ static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL,
     APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
     Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
         DL, Offset1, /* AllowNonInbounds */ true);
-    Load1Ptr = Builder.CreatePtrAdd(Load1Ptr,
-                                    Builder.getInt32(Offset1.getZExtValue()));
+    Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
   }
   // Generate wider load.
   NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
index 1400ee7f703ca..10c4c9b0ca4c9 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/or-load.ll
@@ -1121,19 +1121,19 @@ entry:
 
 define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
 ; LE-LABEL: @loadCombine_4consecutive_metadata(
-; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
-; LE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope [[META0:![0-9]+]]
+; LE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]]
 ; LE-NEXT:    ret i32 [[L1]]
 ;
 ; BE-LABEL: @loadCombine_4consecutive_metadata(
 ; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
 ; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
 ; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
-; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
-; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
-; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
-; BE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META0:![0-9]+]]
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope [[META0]]
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope [[META0]]
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope [[META0]]
+; BE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]]
 ; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
 ; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
 ; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
@@ -1869,7 +1869,7 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) {
 
 define i32 @loadCombine_4consecutive_badinsert3(ptr %p) {
 ; LE-LABEL: @loadCombine_4consecutive_badinsert3(
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
 ; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[TMP1]], align 1
 ; LE-NEXT:    ret i32 [[L1]]
 ;
@@ -2088,7 +2088,7 @@ define i32 @loadCombine_4consecutive_badinsert6(ptr %p) {
 
 define void @nested_gep(ptr %p, ptr %dest) {
 ; LE-LABEL: @nested_gep(
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68
 ; LE-NEXT:    [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
 ; LE-NEXT:    [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
 ; LE-NEXT:    store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
@@ -2128,7 +2128,7 @@ define void @nested_gep(ptr %p, ptr %dest) {
 
 define void @bitcast_gep(ptr %p, ptr %dest) {
 ; LE-LABEL: @bitcast_gep(
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68
 ; LE-NEXT:    [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
 ; LE-NEXT:    [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
 ; LE-NEXT:    store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
diff --git a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
index 0aa6f9ecdf884..1b53c8f71222b 100644
--- a/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
+++ b/llvm/test/Transforms/AggressiveInstCombine/X86/or-load.ll
@@ -1205,19 +1205,19 @@ entry:
 
 define i32 @loadCombine_4consecutive_metadata(ptr %p, ptr %pstr) {
 ; LE-LABEL: @loadCombine_4consecutive_metadata(
-; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope !0
-; LE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[P:%.*]], align 1, !alias.scope [[META0:![0-9]+]]
+; LE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]]
 ; LE-NEXT:    ret i32 [[L1]]
 ;
 ; BE-LABEL: @loadCombine_4consecutive_metadata(
 ; BE-NEXT:    [[P1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
 ; BE-NEXT:    [[P2:%.*]] = getelementptr i8, ptr [[P]], i32 2
 ; BE-NEXT:    [[P3:%.*]] = getelementptr i8, ptr [[P]], i32 3
-; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope !0
-; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope !0
-; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope !0
-; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope !0
-; BE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias !0
+; BE-NEXT:    [[L1:%.*]] = load i8, ptr [[P]], align 1, !alias.scope [[META0:![0-9]+]]
+; BE-NEXT:    [[L2:%.*]] = load i8, ptr [[P1]], align 1, !alias.scope [[META0]]
+; BE-NEXT:    [[L3:%.*]] = load i8, ptr [[P2]], align 1, !alias.scope [[META0]]
+; BE-NEXT:    [[L4:%.*]] = load i8, ptr [[P3]], align 1, !alias.scope [[META0]]
+; BE-NEXT:    store i32 25, ptr [[PSTR:%.*]], align 4, !noalias [[META0]]
 ; BE-NEXT:    [[E1:%.*]] = zext i8 [[L1]] to i32
 ; BE-NEXT:    [[E2:%.*]] = zext i8 [[L2]] to i32
 ; BE-NEXT:    [[E3:%.*]] = zext i8 [[L3]] to i32
@@ -2005,7 +2005,7 @@ define i32 @loadCombine_4consecutive_badinsert2(ptr %p) {
 
 define i32 @loadCombine_4consecutive_badinsert3(ptr %p) {
 ; LE-LABEL: @loadCombine_4consecutive_badinsert3(
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 1
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 1
 ; LE-NEXT:    [[L1:%.*]] = load i32, ptr [[TMP1]], align 1
 ; LE-NEXT:    ret i32 [[L1]]
 ;
@@ -2306,7 +2306,7 @@ define i64 @loadCombine_nonConstShift2(ptr %arg, i8 %b) {
 
 define void @nested_gep(ptr %p, ptr %dest) {
 ; LE-LABEL: @nested_gep(
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68
 ; LE-NEXT:    [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
 ; LE-NEXT:    [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
 ; LE-NEXT:    store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
@@ -2346,7 +2346,7 @@ define void @nested_gep(ptr %p, ptr %dest) {
 
 define void @bitcast_gep(ptr %p, ptr %dest) {
 ; LE-LABEL: @bitcast_gep(
-; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i32 68
+; LE-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 68
 ; LE-NEXT:    [[LD2:%.*]] = load i64, ptr [[TMP1]], align 4
 ; LE-NEXT:    [[TRUNC:%.*]] = trunc i64 [[LD2]] to i32
 ; LE-NEXT:    store i32 [[TRUNC]], ptr [[DEST:%.*]], align 4
@@ -2382,3 +2382,35 @@ define void @bitcast_gep(ptr %p, ptr %dest) {
   store i32 %trunc, ptr %dest, align 4
   ret void
 }
+
+define i32 @loadcombine_consecutive_idx_64(ptr %data) {
+; LE-LABEL: @loadcombine_consecutive_idx_64(
+; LE-NEXT:  entry:
+; LE-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr [[DATA:%.*]], i64 2149675576
+; LE-NEXT:    [[VAL_2:%.*]] = load i16, ptr [[TMP0]], align 1
+; LE-NEXT:    [[TMP1:%.*]] = zext i16 [[VAL_2]] to i32
+; LE-NEXT:    ret i32 [[TMP1]]
+;
+; BE-LABEL: @loadcombine_consecutive_idx_64(
+; BE-NEXT:  entry:
+; BE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA:%.*]], i64 2149675577
+; BE-NEXT:    [[VAL:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; BE-NEXT:    [[CONV:%.*]] = zext i8 [[VAL]] to i32
+; BE-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DATA]], i64 2149675576
+; BE-NEXT:    [[VAL_2:%.*]] = load i8, ptr [[ARRAYIDX_2]], align 1
+; BE-NEXT:    [[CONV_2:%.*]] = zext i8 [[VAL_2]] to i32
+; BE-NEXT:    [[SHL:%.*]] = shl nuw nsw i32 [[CONV]], 8
+; BE-NEXT:    [[OR:%.*]] = or disjoint i32 [[SHL]], [[CONV_2]]
+; BE-NEXT:    ret i32 [[OR]]
+;
+entry:
+  %arrayidx = getelementptr inbounds nuw i8, ptr %data, i64 2149675577
+  %val = load i8, ptr %arrayidx, align 1
+  %conv = zext i8 %val to i32
+  %arrayidx.2 = getelementptr inbounds nuw i8, ptr %data, i64 2149675576
+  %val.2 = load i8, ptr %arrayidx.2, align 1
+  %conv.2 = zext i8 %val.2 to i32
+  %shl = shl nuw nsw i32 %conv, 8
+  %or = or disjoint i32 %shl, %conv.2
+  ret i32 %or
+}

From e21dc4bd5474d04b8e62d7331362edcc5648d7e5 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 27 Nov 2024 11:47:22 +0100
Subject: [PATCH 38/38] [SimpleLoopUnswitch] Fix LCSSA phi node invalidation

Fixes https://github.com/llvm/llvm-project/issues/117537.

(cherry picked from commit fc5c89900f2a4b50e0f3a88ef7c89115d93684f4)
---
 .../Transforms/Scalar/SimpleLoopUnswitch.cpp  |  5 +-
 .../Transforms/SimpleLoopUnswitch/pr117537.ll | 92 +++++++++++++++++++
 2 files changed, 95 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll

diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
index c235d2fb2a5bd..f99f4487c5540 100644
--- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
+++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
@@ -1249,8 +1249,9 @@ static BasicBlock *buildClonedLoopBlocks(
       assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!");
 
       // Forget SCEVs based on exit phis in case SCEV looked through the phi.
-      if (SE && isa<PHINode>(I))
-        SE->forgetValue(&I);
+      if (SE)
+        if (auto *PN = dyn_cast<PHINode>(&I))
+          SE->forgetLcssaPhiWithNewPredecessor(&L, PN);
 
       BasicBlock::iterator InsertPt = MergeBB->getFirstInsertionPt();
 
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll
new file mode 100644
index 0000000000000..fd61cfab164d3
--- /dev/null
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr117537.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes='print<scalar-evolution>,simple-loop-unswitch<nontrivial>,print<scalar-evolution>' -verify-scev < %s 2>/dev/null | FileCheck %s
+
+; Make sure we don't assert due to insufficient SCEV invalidation.
+
+define void @test(ptr %p) {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr [[P:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CHECK:%.*]] = icmp eq ptr [[P]], null
+; CHECK-NEXT:    br i1 [[CHECK]], label %[[ENTRY_SPLIT_US:.*]], label %[[ENTRY_SPLIT:.*]]
+; CHECK:       [[ENTRY_SPLIT_US]]:
+; CHECK-NEXT:    br label %[[BB0_US:.*]]
+; CHECK:       [[BB0_US]]:
+; CHECK-NEXT:    br label %[[LOOP0_US:.*]]
+; CHECK:       [[LOOP0_US]]:
+; CHECK-NEXT:    [[V_US:%.*]] = load atomic i32, ptr [[P]] unordered, align 8
+; CHECK-NEXT:    [[ADD_US:%.*]] = add i32 [[V_US]], 3
+; CHECK-NEXT:    br i1 true, label %[[PREHEADER_SPLIT_US:.*]], label %[[BB0_US]]
+; CHECK:       [[PREHEADER_SPLIT_US]]:
+; CHECK-NEXT:    [[ADD_LCSSA_US:%.*]] = phi i32 [ [[ADD_US]], %[[LOOP0_US]] ]
+; CHECK-NEXT:    br label %[[PREHEADER:.*]]
+; CHECK:       [[ENTRY_SPLIT]]:
+; CHECK-NEXT:    br label %[[BB0:.*]]
+; CHECK:       [[BB0]]:
+; CHECK-NEXT:    br label %[[LATCH:.*]]
+; CHECK:       [[LATCH]]:
+; CHECK-NEXT:    br i1 false, label %[[EXIT0:.*]], label %[[LOOP0:.*]]
+; CHECK:       [[EXIT0]]:
+; CHECK-NEXT:    ret void
+; CHECK:       [[LOOP0]]:
+; CHECK-NEXT:    [[V:%.*]] = load atomic i32, ptr [[P]] unordered, align 8
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[V]], 3
+; CHECK-NEXT:    br i1 true, label %[[PREHEADER_SPLIT:.*]], label %[[BB0]]
+; CHECK:       [[PREHEADER_SPLIT]]:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], %[[LOOP0]] ]
+; CHECK-NEXT:    br label %[[PREHEADER]]
+; CHECK:       [[PREHEADER]]:
+; CHECK-NEXT:    [[DOTUS_PHI:%.*]] = phi i32 [ [[ADD_LCSSA]], %[[PREHEADER_SPLIT]] ], [ [[ADD_LCSSA_US]], %[[PREHEADER_SPLIT_US]] ]
+; CHECK-NEXT:    br label %[[LOOP1:.*]]
+; CHECK:       [[LOOP1]]:
+; CHECK-NEXT:    [[IV1:%.*]] = phi i32 [ [[DOTUS_PHI]], %[[PREHEADER]] ], [ [[IV1_NEXT:%.*]], %[[BACKEDGE:.*]] ]
+; CHECK-NEXT:    [[IV1_NEXT]] = add i32 [[IV1]], -33
+; CHECK-NEXT:    br label %[[LOOP2:.*]]
+; CHECK:       [[BACKEDGE]]:
+; CHECK-NEXT:    br i1 true, label %[[EXIT1:.*]], label %[[LOOP1]]
+; CHECK:       [[LOOP2]]:
+; CHECK-NEXT:    [[IV0:%.*]] = phi i32 [ [[IV1]], %[[LOOP1]] ], [ [[IV0_NEXT:%.*]], %[[LOOP2]] ]
+; CHECK-NEXT:    [[IV0_NEXT]] = add nsw i32 [[IV0]], 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[IV0_NEXT]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[BACKEDGE]], label %[[LOOP2]]
+; CHECK:       [[EXIT1]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %check = icmp eq ptr %p, null
+  br label %bb0
+
+bb0:                                              ; preds = %loop0, %entry
+  br i1 %check, label %loop0, label %latch
+
+latch:                                            ; preds = %bb0
+  br i1 %check, label %exit0, label %loop0
+
+exit0:                                            ; preds = %latch
+  ret void
+
+loop0:                                            ; preds = %latch, %bb0
+  %v = load atomic i32, ptr %p unordered, align 8
+  %add = add i32 %v, 3
+  br i1 true, label %preheader, label %bb0
+
+preheader:                                        ; preds = %loop0
+  br label %loop1
+
+loop1:                                            ; preds = %backedge, %preheader
+  %iv1 = phi i32 [ %add, %preheader ], [ %iv1.next, %backedge ]
+  %iv1.next = add i32 %iv1, -33
+  br label %loop2
+
+backedge:                                         ; preds = %loop2
+  br i1 true, label %exit1, label %loop1
+
+loop2:                                            ; preds = %loop2, %loop1
+  %iv0 = phi i32 [ %iv1, %loop1 ], [ %iv0.next, %loop2 ]
+  %iv0.next = add nsw i32 %iv0, 1
+  %cmp = icmp sgt i32 %iv0.next, 0
+  br i1 %cmp, label %backedge, label %loop2
+
+exit1:                                            ; preds = %backedge
+  ret void
+}