[clang] [libc] [llvm] AMDGPU: Add a subtarget feature for fine-grained remote memory support (PR #96442)

Tue Jul 2 10:01:25 PDT 2024

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/96442

>From cd95b668b34f5d0834b16f441ab131003988266b Mon Sep 17 00:00:00 2001
From: martinboehme <mboehme at google.com>
Date: Wed, 26 Jun 2024 15:01:57 +0200
Subject: [PATCH 01/14] [clang][dataflow] Teach `AnalysisASTVisitor` that
 `typeid()` can be evaluated. (#96731)

We were previously treating the operand of `typeid()` as being
definitely
unevaluated, but it can be evaluated if it is a glvalue of polymorphic
type.

This patch includes a test that fails without the fix.
---
 .../clang/Analysis/FlowSensitive/ASTOps.h     |  6 ++-
 .../Analysis/FlowSensitive/TransferTest.cpp   | 43 +++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
index 925b99af9141a..f9c923a36ad22 100644
--- a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
+++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
@@ -113,7 +113,11 @@ class AnalysisASTVisitor : public RecursiveASTVisitor<Derived> {
   // nevertheless it appears in the Clang CFG, so we don't exclude it here.
   bool TraverseDecltypeTypeLoc(DecltypeTypeLoc) { return true; }
   bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc) { return true; }
-  bool TraverseCXXTypeidExpr(CXXTypeidExpr *) { return true; }
+  bool TraverseCXXTypeidExpr(CXXTypeidExpr *TIE) {
+    if (TIE->isPotentiallyEvaluated())
+      return RecursiveASTVisitor<Derived>::TraverseCXXTypeidExpr(TIE);
+    return true;
+  }
   bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *) {
     return true;
   }
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index e743eefa5d458..39e7001393e5e 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1637,6 +1637,49 @@ TEST(TransferTest, StructModeledFieldsWithAccessor) {
       });
 }
 
+TEST(TransferTest, StructModeledFieldsInTypeid) {
+  // Test that we model fields mentioned inside a `typeid()` expression only if
+  // that expression is potentially evaluated -- i.e. if the expression inside
+  // `typeid()` is a glvalue of polymorphic type (see
+  // `CXXTypeidExpr::isPotentiallyEvaluated()` and [expr.typeid]p3).
+  std::string Code = R"(
+    // Definitions needed for `typeid`.
+    namespace std {
+      class type_info {};
+      class bad_typeid {};
+    }  // namespace std
+
+    struct NonPolymorphic {};
+
+    struct Polymorphic {
+      virtual ~Polymorphic() = default;
+    };
+
+    struct S {
+      NonPolymorphic *NonPoly;
+      Polymorphic *Poly;
+    };
+
+    void target(S &s) {
+      typeid(*s.NonPoly);
+      typeid(*s.Poly);
+      // [[p]]
+    }
+  )";
+  runDataflow(
+      Code,
+      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
+         ASTContext &ASTCtx) {
+        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
+        auto &SLoc = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "s");
+        std::vector<const ValueDecl *> Fields;
+        for (auto [Field, _] : SLoc.children())
+          Fields.push_back(Field);
+        EXPECT_THAT(Fields,
+                    UnorderedElementsAre(findValueDecl(ASTCtx, "Poly")));
+      });
+}
+
 TEST(TransferTest, StructModeledFieldsWithComplicatedInheritance) {
   std::string Code = R"(
     struct Base1 {

>From 78b2083b4adbac0690ad845ea85ba29dfa88dd6e Mon Sep 17 00:00:00 2001
From: martinboehme <mboehme at google.com>
Date: Wed, 26 Jun 2024 15:40:42 +0200
Subject: [PATCH 02/14] Revert "[clang][dataflow] Teach `AnalysisASTVisitor`
 that `typeid()` can be evaluated." (#96766)

Reverts llvm/llvm-project#96731

It causes CI failures.
---
 .../clang/Analysis/FlowSensitive/ASTOps.h     |  6 +--
 .../Analysis/FlowSensitive/TransferTest.cpp   | 43 -------------------
 2 files changed, 1 insertion(+), 48 deletions(-)

diff --git a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
index f9c923a36ad22..925b99af9141a 100644
--- a/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
+++ b/clang/include/clang/Analysis/FlowSensitive/ASTOps.h
@@ -113,11 +113,7 @@ class AnalysisASTVisitor : public RecursiveASTVisitor<Derived> {
   // nevertheless it appears in the Clang CFG, so we don't exclude it here.
   bool TraverseDecltypeTypeLoc(DecltypeTypeLoc) { return true; }
   bool TraverseTypeOfExprTypeLoc(TypeOfExprTypeLoc) { return true; }
-  bool TraverseCXXTypeidExpr(CXXTypeidExpr *TIE) {
-    if (TIE->isPotentiallyEvaluated())
-      return RecursiveASTVisitor<Derived>::TraverseCXXTypeidExpr(TIE);
-    return true;
-  }
+  bool TraverseCXXTypeidExpr(CXXTypeidExpr *) { return true; }
   bool TraverseUnaryExprOrTypeTraitExpr(UnaryExprOrTypeTraitExpr *) {
     return true;
   }
diff --git a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
index 39e7001393e5e..e743eefa5d458 100644
--- a/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
+++ b/clang/unittests/Analysis/FlowSensitive/TransferTest.cpp
@@ -1637,49 +1637,6 @@ TEST(TransferTest, StructModeledFieldsWithAccessor) {
       });
 }
 
-TEST(TransferTest, StructModeledFieldsInTypeid) {
-  // Test that we model fields mentioned inside a `typeid()` expression only if
-  // that expression is potentially evaluated -- i.e. if the expression inside
-  // `typeid()` is a glvalue of polymorphic type (see
-  // `CXXTypeidExpr::isPotentiallyEvaluated()` and [expr.typeid]p3).
-  std::string Code = R"(
-    // Definitions needed for `typeid`.
-    namespace std {
-      class type_info {};
-      class bad_typeid {};
-    }  // namespace std
-
-    struct NonPolymorphic {};
-
-    struct Polymorphic {
-      virtual ~Polymorphic() = default;
-    };
-
-    struct S {
-      NonPolymorphic *NonPoly;
-      Polymorphic *Poly;
-    };
-
-    void target(S &s) {
-      typeid(*s.NonPoly);
-      typeid(*s.Poly);
-      // [[p]]
-    }
-  )";
-  runDataflow(
-      Code,
-      [](const llvm::StringMap<DataflowAnalysisState<NoopLattice>> &Results,
-         ASTContext &ASTCtx) {
-        const Environment &Env = getEnvironmentAtAnnotation(Results, "p");
-        auto &SLoc = getLocForDecl<RecordStorageLocation>(ASTCtx, Env, "s");
-        std::vector<const ValueDecl *> Fields;
-        for (auto [Field, _] : SLoc.children())
-          Fields.push_back(Field);
-        EXPECT_THAT(Fields,
-                    UnorderedElementsAre(findValueDecl(ASTCtx, "Poly")));
-      });
-}
-
 TEST(TransferTest, StructModeledFieldsWithComplicatedInheritance) {
   std::string Code = R"(
     struct Base1 {

>From fd278aadc31becd99c2845adf36860c03591f935 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 26 Jun 2024 11:24:48 -0500
Subject: [PATCH 03/14] [libc] Fix Fuscia builder failing on atomic warnings
 (#96791)

Summary:
This function uses atomics now, which emit warnings on some platforms
that don't support full lock-free atomics. These aren't specifically
wrong, and in the future we could investigate a libc configuration
specialized for single-threaded microprocessors, but for now we should
get the bot running again.
---
 libc/src/stdlib/rand.cpp  | 6 ++++++
 libc/src/stdlib/srand.cpp | 6 ++++++
 2 files changed, 12 insertions(+)

diff --git a/libc/src/stdlib/rand.cpp b/libc/src/stdlib/rand.cpp
index ff3875c2f6959..8f2ae90336d51 100644
--- a/libc/src/stdlib/rand.cpp
+++ b/libc/src/stdlib/rand.cpp
@@ -13,6 +13,10 @@
 
 namespace LIBC_NAMESPACE {
 
+// Silence warnings on targets with slow atomics.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Watomic-alignment"
+
 // An implementation of the xorshift64star pseudo random number generator. This
 // is a good general purpose generator for most non-cryptographics applications.
 LLVM_LIBC_FUNCTION(int, rand, (void)) {
@@ -29,4 +33,6 @@ LLVM_LIBC_FUNCTION(int, rand, (void)) {
   }
 }
 
+#pragma GCC diagnostic pop
+
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/srand.cpp b/libc/src/stdlib/srand.cpp
index 21166c7a6754e..681aad8fac4e8 100644
--- a/libc/src/stdlib/srand.cpp
+++ b/libc/src/stdlib/srand.cpp
@@ -12,8 +12,14 @@
 
 namespace LIBC_NAMESPACE {
 
+// Silence warnings on targets with slow atomics.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Watomic-alignment"
+
 LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) {
   rand_next.store(seed, cpp::MemoryOrder::RELAXED);
 }
 
+#pragma GCC diagnostic pop
+
 } // namespace LIBC_NAMESPACE

>From eb0a391745d2c0e73a40897cda8dc760694f1945 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 26 Jun 2024 09:53:13 -0700
Subject: [PATCH 04/14] [RISCV][GISel] Support fcmp and fclass for Zfh.
 (#96696)

---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp   | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index f033ea7250030..f9aac8ced5cde 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -404,13 +404,20 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
                 typeIs(1, s16)(Query));
       });
 
-  getActionDefinitionsBuilder(G_FCMP)
-      .legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)))
-      .clampScalar(0, sXLen, sXLen);
+  auto &FCmpActions = getActionDefinitionsBuilder(G_FCMP).legalIf(
+      all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)));
+  // TODO: Fold this into typeIsScalarFPArith.
+  if (ST.hasStdExtZfh())
+    FCmpActions.legalFor({sXLen, s16});
+  FCmpActions.clampScalar(0, sXLen, sXLen);
 
   // TODO: Support vector version of G_IS_FPCLASS.
-  getActionDefinitionsBuilder(G_IS_FPCLASS)
-      .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
+  auto &FClassActions =
+      getActionDefinitionsBuilder(G_IS_FPCLASS)
+          .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
+  // TODO: Fold this into typeIsScalarFPArith.
+  if (ST.hasStdExtZfh())
+    FClassActions.customFor({s1, s16});
 
   getActionDefinitionsBuilder(G_FCONSTANT)
       .legalIf(typeIsScalarFPArith(0, ST))

>From 116db1f9963605d3b9ba6af44b970b5cdb99b0a2 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 26 Jun 2024 09:53:37 -0700
Subject: [PATCH 05/14] [RISCV][GISel] Support fptoi and itofp for Zfh.
 (#96707)

---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index f9aac8ced5cde..4b634d2ac5384 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -423,16 +423,19 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
       .legalIf(typeIsScalarFPArith(0, ST))
       .lowerFor({s32, s64});
 
-  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
-      .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)))
-      .widenScalarToNextPow2(0)
-      .clampScalar(0, s32, sXLen)
-      .libcall();
+  auto &FPToIActions =
+      getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+          .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)));
+  if (ST.hasStdExtZfh())
+    FPToIActions.legalFor({{s32, s16}, {sXLen, s16}});
+  FPToIActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).libcall();
 
-  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
-      .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})))
-      .widenScalarToNextPow2(1)
-      .clampScalar(1, s32, sXLen);
+  auto &IToFPActions =
+      getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+          .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})));
+  if (ST.hasStdExtZfh())
+    IToFPActions.legalFor({{s16, s32}, {s16, sXLen}});
+  IToFPActions.widenScalarToNextPow2(1).clampScalar(1, s32, sXLen);
 
   // FIXME: We can do custom inline expansion like SelectionDAG.
   // FIXME: Legal with Zfa.

>From e15d28d8536bcf6977af0e92656fec8fe18da38f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Wed, 26 Jun 2024 10:44:10 -0700
Subject: [PATCH 06/14] [RISCV] Add Zfh to typeIsScalarFPArith and simplify
 code. NFC

Now that we have Zfh supported and tested for every opcode that uses
typeIsScalarFPArith, we can fold Zfh into it.
---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 40 +++++++------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index 4b634d2ac5384..f033ea7250030 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -404,38 +404,28 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
                 typeIs(1, s16)(Query));
       });
 
-  auto &FCmpActions = getActionDefinitionsBuilder(G_FCMP).legalIf(
-      all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)));
-  // TODO: Fold this into typeIsScalarFPArith.
-  if (ST.hasStdExtZfh())
-    FCmpActions.legalFor({sXLen, s16});
-  FCmpActions.clampScalar(0, sXLen, sXLen);
+  getActionDefinitionsBuilder(G_FCMP)
+      .legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)))
+      .clampScalar(0, sXLen, sXLen);
 
   // TODO: Support vector version of G_IS_FPCLASS.
-  auto &FClassActions =
-      getActionDefinitionsBuilder(G_IS_FPCLASS)
-          .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
-  // TODO: Fold this into typeIsScalarFPArith.
-  if (ST.hasStdExtZfh())
-    FClassActions.customFor({s1, s16});
+  getActionDefinitionsBuilder(G_IS_FPCLASS)
+      .customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));
 
   getActionDefinitionsBuilder(G_FCONSTANT)
       .legalIf(typeIsScalarFPArith(0, ST))
       .lowerFor({s32, s64});
 
-  auto &FPToIActions =
-      getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
-          .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)));
-  if (ST.hasStdExtZfh())
-    FPToIActions.legalFor({{s32, s16}, {sXLen, s16}});
-  FPToIActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).libcall();
-
-  auto &IToFPActions =
-      getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
-          .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})));
-  if (ST.hasStdExtZfh())
-    IToFPActions.legalFor({{s16, s32}, {s16, sXLen}});
-  IToFPActions.widenScalarToNextPow2(1).clampScalar(1, s32, sXLen);
+  getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
+      .legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)))
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, s32, sXLen)
+      .libcall();
+
+  getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
+      .legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})))
+      .widenScalarToNextPow2(1)
+      .clampScalar(1, s32, sXLen);
 
   // FIXME: We can do custom inline expansion like SelectionDAG.
   // FIXME: Legal with Zfa.

>From 61674f27fc8b1ab41ae31e8139ad8c11a5355e69 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Wed, 26 Jun 2024 21:48:38 +0100
Subject: [PATCH 07/14] [ADT] Always use 32-bit size type for SmallVector with
 16-bit elements (#95536)

`SmallVector` has a special case to allow vector of char to exceed 4 GB
in
size on 64-bit hosts. Apply this special case only for 8-bit element
types, instead of all element types < 32 bits.

This makes `SmallVector<MCPhysReg>` more compact because `MCPhysReg` is
`uint16_t`.

---------

Co-authored-by: Nikita Popov <github at npopov.com>
---
 llvm/include/llvm/ADT/SmallVector.h |  3 +--
 llvm/lib/Support/SmallVector.cpp    | 12 ++++++++----
 2 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index 09676d792dfeb..db34b16ecf9e7 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -116,8 +116,7 @@ template <class Size_T> class SmallVectorBase {
 
 template <class T>
 using SmallVectorSizeType =
-    std::conditional_t<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
-                       uint32_t>;
+    std::conditional_t<sizeof(T) == 1, size_t, uint32_t>;
 
 /// Figure out the offset of the first element.
 template <class T, typename = void> struct SmallVectorAlignmentAndSize {
diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index b6ce37842040b..e77b747984173 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -37,9 +37,7 @@ struct Struct32B {
 #pragma GCC diagnostic pop
 #endif
 }
-static_assert(sizeof(SmallVector<void *, 0>) ==
-                  sizeof(unsigned) * 2 + sizeof(void *),
-              "wasted space in SmallVector size 0");
+
 static_assert(alignof(SmallVector<Struct16B, 0>) >= alignof(Struct16B),
               "wrong alignment for 16-byte aligned T");
 static_assert(alignof(SmallVector<Struct32B, 0>) >= alignof(Struct32B),
@@ -48,13 +46,19 @@ static_assert(sizeof(SmallVector<Struct16B, 0>) >= alignof(Struct16B),
               "missing padding for 16-byte aligned T");
 static_assert(sizeof(SmallVector<Struct32B, 0>) >= alignof(Struct32B),
               "missing padding for 32-byte aligned T");
+
+static_assert(sizeof(SmallVector<void *, 0>) ==
+                  sizeof(unsigned) * 2 + sizeof(void *),
+              "wasted space in SmallVector size 0");
 static_assert(sizeof(SmallVector<void *, 1>) ==
                   sizeof(unsigned) * 2 + sizeof(void *) * 2,
               "wasted space in SmallVector size 1");
-
 static_assert(sizeof(SmallVector<char, 0>) ==
                   sizeof(void *) * 2 + sizeof(void *),
               "1 byte elements have word-sized type for size and capacity");
+static_assert(sizeof(SmallVector<int16_t, 0>) ==
+                  sizeof(unsigned) * 2 + sizeof(void *),
+              "2 byte elements have 32-bit type for size and capacity");
 
 /// Report that MinSize doesn't fit into this vector's size type. Throws
 /// std::length_error or calls report_fatal_error.

>From 0d1e0e52851bfa1aaffbf1e1c7205739adb2e903 Mon Sep 17 00:00:00 2001
From: Chelsea Cassanova <chelsea_cassanova at apple.com>
Date: Wed, 26 Jun 2024 15:24:51 -0700
Subject: [PATCH 08/14] Revert "[ADT] Always use 32-bit size type for
 SmallVector with 16-bit elements" (#96826)

Reverts llvm/llvm-project#95536, this is breaking macOS GreenDragon
buildbots on arm64 and x86_64
https://green.lab.llvm.org/job/llvm.org/view/LLDB/job/as-lldb-cmake/6522/console,
also breaks the Darwin LLVM buildbots:
https://lab.llvm.org/buildbot/#/builders/23/builds/398
---
 llvm/include/llvm/ADT/SmallVector.h |  3 ++-
 llvm/lib/Support/SmallVector.cpp    | 12 ++++--------
 2 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallVector.h b/llvm/include/llvm/ADT/SmallVector.h
index db34b16ecf9e7..09676d792dfeb 100644
--- a/llvm/include/llvm/ADT/SmallVector.h
+++ b/llvm/include/llvm/ADT/SmallVector.h
@@ -116,7 +116,8 @@ template <class Size_T> class SmallVectorBase {
 
 template <class T>
 using SmallVectorSizeType =
-    std::conditional_t<sizeof(T) == 1, size_t, uint32_t>;
+    std::conditional_t<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
+                       uint32_t>;
 
 /// Figure out the offset of the first element.
 template <class T, typename = void> struct SmallVectorAlignmentAndSize {
diff --git a/llvm/lib/Support/SmallVector.cpp b/llvm/lib/Support/SmallVector.cpp
index e77b747984173..b6ce37842040b 100644
--- a/llvm/lib/Support/SmallVector.cpp
+++ b/llvm/lib/Support/SmallVector.cpp
@@ -37,7 +37,9 @@ struct Struct32B {
 #pragma GCC diagnostic pop
 #endif
 }
-
+static_assert(sizeof(SmallVector<void *, 0>) ==
+                  sizeof(unsigned) * 2 + sizeof(void *),
+              "wasted space in SmallVector size 0");
 static_assert(alignof(SmallVector<Struct16B, 0>) >= alignof(Struct16B),
               "wrong alignment for 16-byte aligned T");
 static_assert(alignof(SmallVector<Struct32B, 0>) >= alignof(Struct32B),
@@ -46,19 +48,13 @@ static_assert(sizeof(SmallVector<Struct16B, 0>) >= alignof(Struct16B),
               "missing padding for 16-byte aligned T");
 static_assert(sizeof(SmallVector<Struct32B, 0>) >= alignof(Struct32B),
               "missing padding for 32-byte aligned T");
-
-static_assert(sizeof(SmallVector<void *, 0>) ==
-                  sizeof(unsigned) * 2 + sizeof(void *),
-              "wasted space in SmallVector size 0");
 static_assert(sizeof(SmallVector<void *, 1>) ==
                   sizeof(unsigned) * 2 + sizeof(void *) * 2,
               "wasted space in SmallVector size 1");
+
 static_assert(sizeof(SmallVector<char, 0>) ==
                   sizeof(void *) * 2 + sizeof(void *),
               "1 byte elements have word-sized type for size and capacity");
-static_assert(sizeof(SmallVector<int16_t, 0>) ==
-                  sizeof(unsigned) * 2 + sizeof(void *),
-              "2 byte elements have 32-bit type for size and capacity");
 
 /// Report that MinSize doesn't fit into this vector's size type. Throws
 /// std::length_error or calls report_fatal_error.

>From a40e4736b5888f646d9779423301e7717ad1ba9a Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 27 Jun 2024 06:49:40 +0800
Subject: [PATCH 09/14] Revert "[SimplifyCFG] Forward indirect switch condition
 value if it can help fold the PHI (#95932)"

This reverts commit 0c56fd0a29ffb0425ca2ee2a4ff8f380880fdbfa.
This is breaking https://lab.llvm.org/buildbot/#/builders/72/builds/483.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |   3 +-
 llvm/test/CodeGen/AArch64/arm64-jumptable.ll  |   2 +-
 .../ForwardSwitchConditionToPHI.ll            | 127 ------------------
 .../Hexagon/switch-to-lookup-table.ll         |   4 +-
 4 files changed, 4 insertions(+), 132 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 69daa6352f95e..a73976bba56f5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5818,8 +5818,7 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   for (auto &ForwardingNode : ForwardingNodes) {
     PHINode *Phi = ForwardingNode.first;
     SmallVectorImpl<int> &Indexes = ForwardingNode.second;
-    // Check if it helps to fold PHI.
-    if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
+    if (Indexes.size() < 2)
       continue;
 
     for (int Index : Indexes)
diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
index 40b8f721c0505..7d9adf92a6a95 100644
--- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
@@ -18,7 +18,7 @@ bb3:
 bb4:
   br label %exit.sink.split
 exit.sink.split:
-  %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 7, %bb3 ], [ %a, %entry ]
+  %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 3, %bb3 ], [ %a, %entry ]
   store i32 %.sink, ptr %to
   br label %exit
 exit:
diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
index 8ad455eb9e7f2..5c266f4867398 100644
--- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -59,76 +59,6 @@ return:                                           ; preds = %entry, %sw.bb4, %sw
   ret i32 %retval.0
 }
 
-; We should not forward `%m` to 1, as this does not simplify the CFG.
-define i32 @forward_one(i32 %m) {
-; NO_FWD-LABEL: @forward_one(
-; NO_FWD-NEXT:  entry:
-; NO_FWD-NEXT:    switch i32 [[M:%.*]], label [[SW_BB4:%.*]] [
-; NO_FWD-NEXT:      i32 0, label [[RETURN:%.*]]
-; NO_FWD-NEXT:      i32 1, label [[SW_BB1:%.*]]
-; NO_FWD-NEXT:      i32 2, label [[SW_BB2:%.*]]
-; NO_FWD-NEXT:      i32 3, label [[SW_BB3:%.*]]
-; NO_FWD-NEXT:    ]
-; NO_FWD:       sw.bb1:
-; NO_FWD-NEXT:    br label [[RETURN]]
-; NO_FWD:       sw.bb2:
-; NO_FWD-NEXT:    br label [[RETURN]]
-; NO_FWD:       sw.bb3:
-; NO_FWD-NEXT:    br label [[RETURN]]
-; NO_FWD:       sw.bb4:
-; NO_FWD-NEXT:    br label [[RETURN]]
-; NO_FWD:       return:
-; NO_FWD-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 4, [[SW_BB4]] ], [ 5, [[SW_BB3]] ], [ 6, [[SW_BB2]] ], [ 1, [[SW_BB1]] ], [ 8, [[ENTRY:%.*]] ]
-; NO_FWD-NEXT:    ret i32 [[RETVAL_0]]
-;
-; FWD-LABEL: @forward_one(
-; FWD-NEXT:  entry:
-; FWD-NEXT:    switch i32 [[M:%.*]], label [[SW_BB4:%.*]] [
-; FWD-NEXT:      i32 0, label [[RETURN:%.*]]
-; FWD-NEXT:      i32 1, label [[SW_BB1:%.*]]
-; FWD-NEXT:      i32 2, label [[SW_BB2:%.*]]
-; FWD-NEXT:      i32 3, label [[SW_BB3:%.*]]
-; FWD-NEXT:    ]
-; FWD:       sw.bb1:
-; FWD-NEXT:    br label [[RETURN]]
-; FWD:       sw.bb2:
-; FWD-NEXT:    br label [[RETURN]]
-; FWD:       sw.bb3:
-; FWD-NEXT:    br label [[RETURN]]
-; FWD:       sw.bb4:
-; FWD-NEXT:    br label [[RETURN]]
-; FWD:       return:
-; FWD-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 4, [[SW_BB4]] ], [ 5, [[SW_BB3]] ], [ 6, [[SW_BB2]] ], [ 1, [[SW_BB1]] ], [ 8, [[ENTRY:%.*]] ]
-; FWD-NEXT:    ret i32 [[RETVAL_0]]
-;
-entry:
-  switch i32 %m, label %sw.bb4 [
-  i32 0, label %sw.bb0
-  i32 1, label %sw.bb1
-  i32 2, label %sw.bb2
-  i32 3, label %sw.bb3
-  ]
-
-sw.bb0:                                           ; preds = %entry
-  br label %return
-
-sw.bb1:                                           ; preds = %entry
-  br label %return
-
-sw.bb2:                                           ; preds = %entry
-  br label %return
-
-sw.bb3:                                           ; preds = %entry
-  br label %return
-
-sw.bb4:                                           ; preds = %entry
-  br label %return
-
-return:                                           ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1
-  %retval.0 = phi i32 [ 4, %sw.bb4 ], [ 5, %sw.bb3 ], [ 6, %sw.bb2 ], [ 1, %sw.bb1 ], [ 8, %sw.bb0 ]
-  ret i32 %retval.0
-}
-
 ; If 1 incoming phi value is a case constant of a switch, convert it to the switch condition:
 ; https://bugs.llvm.org/show_bug.cgi?id=34471
 ; This then subsequently should allow squashing of the other trivial case blocks.
@@ -185,60 +115,3 @@ return:
   ret i32 %r
 }
 
-; We can replace `[ 1, %bb2 ]` with `[ %arg1, %bb2 ]`.
-define { i64, i64 } @PR95919(i64 noundef %arg, i64 noundef %arg1) {
-; NO_FWD-LABEL: @PR95919(
-; NO_FWD-NEXT:  bb:
-; NO_FWD-NEXT:    switch i64 [[ARG1:%.*]], label [[BB3:%.*]] [
-; NO_FWD-NEXT:      i64 0, label [[BB5:%.*]]
-; NO_FWD-NEXT:      i64 1, label [[BB2:%.*]]
-; NO_FWD-NEXT:    ]
-; NO_FWD:       bb2:
-; NO_FWD-NEXT:    br label [[BB5]]
-; NO_FWD:       bb3:
-; NO_FWD-NEXT:    [[I:%.*]] = udiv i64 [[ARG:%.*]], [[ARG1]]
-; NO_FWD-NEXT:    [[I4:%.*]] = shl nuw i64 [[I]], 1
-; NO_FWD-NEXT:    br label [[BB5]]
-; NO_FWD:       bb5:
-; NO_FWD-NEXT:    [[I6:%.*]] = phi i64 [ [[I4]], [[BB3]] ], [ [[ARG]], [[BB2]] ], [ undef, [[BB:%.*]] ]
-; NO_FWD-NEXT:    [[I7:%.*]] = phi i64 [ 1, [[BB3]] ], [ 1, [[BB2]] ], [ [[ARG1]], [[BB]] ]
-; NO_FWD-NEXT:    [[I8:%.*]] = insertvalue { i64, i64 } poison, i64 [[I7]], 0
-; NO_FWD-NEXT:    [[I9:%.*]] = insertvalue { i64, i64 } [[I8]], i64 [[I6]], 1
-; NO_FWD-NEXT:    ret { i64, i64 } [[I9]]
-;
-; FWD-LABEL: @PR95919(
-; FWD-NEXT:  bb:
-; FWD-NEXT:    [[SWITCH:%.*]] = icmp ult i64 [[ARG1:%.*]], 2
-; FWD-NEXT:    br i1 [[SWITCH]], label [[BB5:%.*]], label [[BB3:%.*]]
-; FWD:       bb3:
-; FWD-NEXT:    [[I:%.*]] = udiv i64 [[ARG:%.*]], [[ARG1]]
-; FWD-NEXT:    [[I4:%.*]] = shl nuw i64 [[I]], 1
-; FWD-NEXT:    br label [[BB5]]
-; FWD:       bb5:
-; FWD-NEXT:    [[I6:%.*]] = phi i64 [ [[I4]], [[BB3]] ], [ [[ARG]], [[BB:%.*]] ]
-; FWD-NEXT:    [[I7:%.*]] = phi i64 [ 1, [[BB3]] ], [ [[ARG1]], [[BB]] ]
-; FWD-NEXT:    [[I8:%.*]] = insertvalue { i64, i64 } poison, i64 [[I7]], 0
-; FWD-NEXT:    [[I9:%.*]] = insertvalue { i64, i64 } [[I8]], i64 [[I6]], 1
-; FWD-NEXT:    ret { i64, i64 } [[I9]]
-;
-bb:
-  switch i64 %arg1, label %bb3 [
-  i64 0, label %bb5
-  i64 1, label %bb2
-  ]
-
-bb2: ; preds = %bb
-  br label %bb5
-
-bb3: ; preds = %bb
-  %i = udiv i64 %arg, %arg1
-  %i4 = shl nuw i64 %i, 1
-  br label %bb5
-
-bb5: ; preds = %bb3, %bb2, %bb
-  %i6 = phi i64 [ %i4, %bb3 ], [ %arg, %bb2 ], [ undef, %bb ]
-  %i7 = phi i64 [ 1, %bb3 ], [ 1, %bb2 ], [ %arg1, %bb ]
-  %i8 = insertvalue { i64, i64 } poison, i64 %i7, 0
-  %i9 = insertvalue { i64, i64 } %i8, i64 %i6, 1
-  ret { i64, i64 } %i9
-}
diff --git a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
index b3b6abe314e6f..47dd24e194c39 100644
--- a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
@@ -43,7 +43,7 @@ define i32 @foo(i32 %x) #0 section ".tcm_text" {
 ; DISABLE:       sw.default:
 ; DISABLE-NEXT:    br label [[RETURN]]
 ; DISABLE:       return:
-; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 19, [[SW_DEFAULT]] ], [ 33, [[SW_BB5]] ], [ 12, [[SW_BB4]] ], [ 22, [[SW_BB3]] ], [ 14, [[SW_BB2]] ], [ 20, [[SW_BB1]] ], [ 9, [[ENTRY:%.*]] ]
+; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 19, [[SW_DEFAULT]] ], [ 5, [[SW_BB5]] ], [ 12, [[SW_BB4]] ], [ 22, [[SW_BB3]] ], [ 14, [[SW_BB2]] ], [ 20, [[SW_BB1]] ], [ 9, [[ENTRY:%.*]] ]
 ; DISABLE-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
@@ -81,7 +81,7 @@ sw.bb4:                                           ; preds = %entry
   br label %return
 
 sw.bb5:                                           ; preds = %entry
-  store i32 33, ptr %retval, align 4
+  store i32 5, ptr %retval, align 4
   br label %return
 
 sw.default:                                       ; preds = %entry

>From 4ee87b32a68172ee80f7f911b12ffc469eeb1887 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Wed, 26 Jun 2024 18:12:46 -0500
Subject: [PATCH 10/14] [libc] Remove atomic alignment diagnostics globally
 (#96803)

Summary:
These warnings mean that it will lower to a libcall. Previously we just
disabled it locally, which didn't work with GCC. This patch does it
globally in the compiler options if the compiler is clang.
---
 libc/src/stdlib/rand.cpp  | 6 ------
 libc/src/stdlib/srand.cpp | 6 ------
 2 files changed, 12 deletions(-)

diff --git a/libc/src/stdlib/rand.cpp b/libc/src/stdlib/rand.cpp
index 8f2ae90336d51..ff3875c2f6959 100644
--- a/libc/src/stdlib/rand.cpp
+++ b/libc/src/stdlib/rand.cpp
@@ -13,10 +13,6 @@
 
 namespace LIBC_NAMESPACE {
 
-// Silence warnings on targets with slow atomics.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Watomic-alignment"
-
 // An implementation of the xorshift64star pseudo random number generator. This
 // is a good general purpose generator for most non-cryptographics applications.
 LLVM_LIBC_FUNCTION(int, rand, (void)) {
@@ -33,6 +29,4 @@ LLVM_LIBC_FUNCTION(int, rand, (void)) {
   }
 }
 
-#pragma GCC diagnostic pop
-
 } // namespace LIBC_NAMESPACE
diff --git a/libc/src/stdlib/srand.cpp b/libc/src/stdlib/srand.cpp
index 681aad8fac4e8..21166c7a6754e 100644
--- a/libc/src/stdlib/srand.cpp
+++ b/libc/src/stdlib/srand.cpp
@@ -12,14 +12,8 @@
 
 namespace LIBC_NAMESPACE {
 
-// Silence warnings on targets with slow atomics.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Watomic-alignment"
-
 LLVM_LIBC_FUNCTION(void, srand, (unsigned int seed)) {
   rand_next.store(seed, cpp::MemoryOrder::RELAXED);
 }
 
-#pragma GCC diagnostic pop
-
 } // namespace LIBC_NAMESPACE

>From 728cba9d0522d4f8795617554399114d2ec29f9e Mon Sep 17 00:00:00 2001
From: DianQK <dianqk at dianqk.net>
Date: Thu, 27 Jun 2024 07:23:59 +0800
Subject: [PATCH 11/14] Reapply "[SimplifyCFG] Forward indirect switch
 condition value if it can help fold the PHI (#95932)"

This reverts commit c7adfb5e715334c9de176f434088bd9f89aa9eb3.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     |   3 +-
 llvm/test/CodeGen/AArch64/arm64-jumptable.ll  |   2 +-
 .../ForwardSwitchConditionToPHI.ll            | 127 ++++++++++++++++++
 .../Hexagon/switch-to-lookup-table.ll         |   4 +-
 4 files changed, 132 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a73976bba56f5..69daa6352f95e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -5818,7 +5818,8 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
   for (auto &ForwardingNode : ForwardingNodes) {
     PHINode *Phi = ForwardingNode.first;
     SmallVectorImpl<int> &Indexes = ForwardingNode.second;
-    if (Indexes.size() < 2)
+    // Check if it helps to fold PHI.
+    if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
       continue;
 
     for (int Index : Indexes)
diff --git a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
index 7d9adf92a6a95..40b8f721c0505 100644
--- a/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-jumptable.ll
@@ -18,7 +18,7 @@ bb3:
 bb4:
   br label %exit.sink.split
 exit.sink.split:
-  %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 3, %bb3 ], [ %a, %entry ]
+  %.sink = phi i32 [ 5, %bb4 ], [ %b, %bb1 ], [ 7, %bb3 ], [ %a, %entry ]
   store i32 %.sink, ptr %to
   br label %exit
 exit:
diff --git a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
index 5c266f4867398..8ad455eb9e7f2 100644
--- a/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
+++ b/llvm/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -59,6 +59,76 @@ return:                                           ; preds = %entry, %sw.bb4, %sw
   ret i32 %retval.0
 }
 
+; We should not forward `%m` to 1, as this does not simplify the CFG.
+define i32 @forward_one(i32 %m) {
+; NO_FWD-LABEL: @forward_one(
+; NO_FWD-NEXT:  entry:
+; NO_FWD-NEXT:    switch i32 [[M:%.*]], label [[SW_BB4:%.*]] [
+; NO_FWD-NEXT:      i32 0, label [[RETURN:%.*]]
+; NO_FWD-NEXT:      i32 1, label [[SW_BB1:%.*]]
+; NO_FWD-NEXT:      i32 2, label [[SW_BB2:%.*]]
+; NO_FWD-NEXT:      i32 3, label [[SW_BB3:%.*]]
+; NO_FWD-NEXT:    ]
+; NO_FWD:       sw.bb1:
+; NO_FWD-NEXT:    br label [[RETURN]]
+; NO_FWD:       sw.bb2:
+; NO_FWD-NEXT:    br label [[RETURN]]
+; NO_FWD:       sw.bb3:
+; NO_FWD-NEXT:    br label [[RETURN]]
+; NO_FWD:       sw.bb4:
+; NO_FWD-NEXT:    br label [[RETURN]]
+; NO_FWD:       return:
+; NO_FWD-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 4, [[SW_BB4]] ], [ 5, [[SW_BB3]] ], [ 6, [[SW_BB2]] ], [ 1, [[SW_BB1]] ], [ 8, [[ENTRY:%.*]] ]
+; NO_FWD-NEXT:    ret i32 [[RETVAL_0]]
+;
+; FWD-LABEL: @forward_one(
+; FWD-NEXT:  entry:
+; FWD-NEXT:    switch i32 [[M:%.*]], label [[SW_BB4:%.*]] [
+; FWD-NEXT:      i32 0, label [[RETURN:%.*]]
+; FWD-NEXT:      i32 1, label [[SW_BB1:%.*]]
+; FWD-NEXT:      i32 2, label [[SW_BB2:%.*]]
+; FWD-NEXT:      i32 3, label [[SW_BB3:%.*]]
+; FWD-NEXT:    ]
+; FWD:       sw.bb1:
+; FWD-NEXT:    br label [[RETURN]]
+; FWD:       sw.bb2:
+; FWD-NEXT:    br label [[RETURN]]
+; FWD:       sw.bb3:
+; FWD-NEXT:    br label [[RETURN]]
+; FWD:       sw.bb4:
+; FWD-NEXT:    br label [[RETURN]]
+; FWD:       return:
+; FWD-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 4, [[SW_BB4]] ], [ 5, [[SW_BB3]] ], [ 6, [[SW_BB2]] ], [ 1, [[SW_BB1]] ], [ 8, [[ENTRY:%.*]] ]
+; FWD-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  switch i32 %m, label %sw.bb4 [
+  i32 0, label %sw.bb0
+  i32 1, label %sw.bb1
+  i32 2, label %sw.bb2
+  i32 3, label %sw.bb3
+  ]
+
+sw.bb0:                                           ; preds = %entry
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  br label %return
+
+sw.bb2:                                           ; preds = %entry
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+  br label %return
+
+sw.bb4:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1
+  %retval.0 = phi i32 [ 4, %sw.bb4 ], [ 5, %sw.bb3 ], [ 6, %sw.bb2 ], [ 1, %sw.bb1 ], [ 8, %sw.bb0 ]
+  ret i32 %retval.0
+}
+
 ; If 1 incoming phi value is a case constant of a switch, convert it to the switch condition:
 ; https://bugs.llvm.org/show_bug.cgi?id=34471
 ; This then subsequently should allow squashing of the other trivial case blocks.
@@ -115,3 +185,60 @@ return:
   ret i32 %r
 }
 
+; We can replace `[ 1, %bb2 ]` with `[ %arg1, %bb2 ]`.
+define { i64, i64 } @PR95919(i64 noundef %arg, i64 noundef %arg1) {
+; NO_FWD-LABEL: @PR95919(
+; NO_FWD-NEXT:  bb:
+; NO_FWD-NEXT:    switch i64 [[ARG1:%.*]], label [[BB3:%.*]] [
+; NO_FWD-NEXT:      i64 0, label [[BB5:%.*]]
+; NO_FWD-NEXT:      i64 1, label [[BB2:%.*]]
+; NO_FWD-NEXT:    ]
+; NO_FWD:       bb2:
+; NO_FWD-NEXT:    br label [[BB5]]
+; NO_FWD:       bb3:
+; NO_FWD-NEXT:    [[I:%.*]] = udiv i64 [[ARG:%.*]], [[ARG1]]
+; NO_FWD-NEXT:    [[I4:%.*]] = shl nuw i64 [[I]], 1
+; NO_FWD-NEXT:    br label [[BB5]]
+; NO_FWD:       bb5:
+; NO_FWD-NEXT:    [[I6:%.*]] = phi i64 [ [[I4]], [[BB3]] ], [ [[ARG]], [[BB2]] ], [ undef, [[BB:%.*]] ]
+; NO_FWD-NEXT:    [[I7:%.*]] = phi i64 [ 1, [[BB3]] ], [ 1, [[BB2]] ], [ [[ARG1]], [[BB]] ]
+; NO_FWD-NEXT:    [[I8:%.*]] = insertvalue { i64, i64 } poison, i64 [[I7]], 0
+; NO_FWD-NEXT:    [[I9:%.*]] = insertvalue { i64, i64 } [[I8]], i64 [[I6]], 1
+; NO_FWD-NEXT:    ret { i64, i64 } [[I9]]
+;
+; FWD-LABEL: @PR95919(
+; FWD-NEXT:  bb:
+; FWD-NEXT:    [[SWITCH:%.*]] = icmp ult i64 [[ARG1:%.*]], 2
+; FWD-NEXT:    br i1 [[SWITCH]], label [[BB5:%.*]], label [[BB3:%.*]]
+; FWD:       bb3:
+; FWD-NEXT:    [[I:%.*]] = udiv i64 [[ARG:%.*]], [[ARG1]]
+; FWD-NEXT:    [[I4:%.*]] = shl nuw i64 [[I]], 1
+; FWD-NEXT:    br label [[BB5]]
+; FWD:       bb5:
+; FWD-NEXT:    [[I6:%.*]] = phi i64 [ [[I4]], [[BB3]] ], [ [[ARG]], [[BB:%.*]] ]
+; FWD-NEXT:    [[I7:%.*]] = phi i64 [ 1, [[BB3]] ], [ [[ARG1]], [[BB]] ]
+; FWD-NEXT:    [[I8:%.*]] = insertvalue { i64, i64 } poison, i64 [[I7]], 0
+; FWD-NEXT:    [[I9:%.*]] = insertvalue { i64, i64 } [[I8]], i64 [[I6]], 1
+; FWD-NEXT:    ret { i64, i64 } [[I9]]
+;
+bb:
+  switch i64 %arg1, label %bb3 [
+  i64 0, label %bb5
+  i64 1, label %bb2
+  ]
+
+bb2: ; preds = %bb
+  br label %bb5
+
+bb3: ; preds = %bb
+  %i = udiv i64 %arg, %arg1
+  %i4 = shl nuw i64 %i, 1
+  br label %bb5
+
+bb5: ; preds = %bb3, %bb2, %bb
+  %i6 = phi i64 [ %i4, %bb3 ], [ %arg, %bb2 ], [ undef, %bb ]
+  %i7 = phi i64 [ 1, %bb3 ], [ 1, %bb2 ], [ %arg1, %bb ]
+  %i8 = insertvalue { i64, i64 } poison, i64 %i7, 0
+  %i9 = insertvalue { i64, i64 } %i8, i64 %i6, 1
+  ret { i64, i64 } %i9
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
index 47dd24e194c39..b3b6abe314e6f 100644
--- a/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
+++ b/llvm/test/Transforms/SimplifyCFG/Hexagon/switch-to-lookup-table.ll
@@ -43,7 +43,7 @@ define i32 @foo(i32 %x) #0 section ".tcm_text" {
 ; DISABLE:       sw.default:
 ; DISABLE-NEXT:    br label [[RETURN]]
 ; DISABLE:       return:
-; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 19, [[SW_DEFAULT]] ], [ 5, [[SW_BB5]] ], [ 12, [[SW_BB4]] ], [ 22, [[SW_BB3]] ], [ 14, [[SW_BB2]] ], [ 20, [[SW_BB1]] ], [ 9, [[ENTRY:%.*]] ]
+; DISABLE-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ 19, [[SW_DEFAULT]] ], [ 33, [[SW_BB5]] ], [ 12, [[SW_BB4]] ], [ 22, [[SW_BB3]] ], [ 14, [[SW_BB2]] ], [ 20, [[SW_BB1]] ], [ 9, [[ENTRY:%.*]] ]
 ; DISABLE-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
@@ -81,7 +81,7 @@ sw.bb4:                                           ; preds = %entry
   br label %return
 
 sw.bb5:                                           ; preds = %entry
-  store i32 5, ptr %retval, align 4
+  store i32 33, ptr %retval, align 4
   br label %return
 
 sw.default:                                       ; preds = %entry

>From d2c1ec26ca8e06ae4ee624fd766a82ae8a8c5612 Mon Sep 17 00:00:00 2001
From: paperchalice <liujunchang97 at outlook.com>
Date: Thu, 27 Jun 2024 14:04:51 +0800
Subject: [PATCH 12/14] [CodeGen][NewPM] Port machine-branch-prob to new pass
 manager (#96389)

Like IR version `print<branch-prob>`, there is also a
`print<machine-branch-prob>`.
---
 llvm/test/CodeGen/Generic/MachineBranchProb.ll | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/test/CodeGen/Generic/MachineBranchProb.ll b/llvm/test/CodeGen/Generic/MachineBranchProb.ll
index 9a5ea0c5e13cc..fa39e7182cef6 100644
--- a/llvm/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/llvm/test/CodeGen/Generic/MachineBranchProb.ll
@@ -1,4 +1,5 @@
 ; RUN: llc < %s -print-after=finalize-isel -o /dev/null 2>&1 | FileCheck %s
+; RUN: llc %s -stop-after=finalize-isel -o - | llc -passes='print<machine-branch-prob>' -x mir -filetype=null 2>&1 | FileCheck -check-prefix=NPM %s
 
 ; Hexagon runs passes that renumber the basic blocks, causing this test
 ; to fail.
@@ -27,6 +28,10 @@ entry:
 ; CHECK: bb.6.entry:
 ; CHECK: successors: %bb.1(0x2e8ba2d7), %bb.3(0x51745d29)
 
+; NPM: Printing analysis 'Machine Branch Probability Analysis' for machine function 'test2':
+; NPM: edge %bb.4 -> %bb.6 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+; NPM: edge %bb.5 -> %bb.6 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
+
 sw.bb:
 ; this call will prevent simplifyCFG from optimizing the block away in ARM/AArch64.
   tail call void @foo()

>From dac7f6f6bf7d5a37e87967e9f9a4a198b4afe664 Mon Sep 17 00:00:00 2001
From: paperchalice <liujunchang97 at outlook.com>
Date: Thu, 27 Jun 2024 15:00:17 +0800
Subject: [PATCH 13/14] Revert "[CodeGen][NewPM] Port machine-branch-prob to
 new pass manager" (#96858)

Reverts llvm/llvm-project#96389
Some ppc bots failed.
---
 .../CodeGen/MachineBranchProbabilityInfo.h    | 53 ++++---------------
 llvm/include/llvm/InitializePasses.h          |  2 +-
 .../llvm/Passes/MachinePassRegistry.def       |  4 --
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  7 ++-
 llvm/lib/CodeGen/BranchFolding.cpp            |  9 ++--
 llvm/lib/CodeGen/EarlyIfConversion.cpp        | 10 ++--
 llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp | 10 ++--
 llvm/lib/CodeGen/IfConversion.cpp             |  6 +--
 .../CodeGen/LazyMachineBlockFrequencyInfo.cpp |  6 +--
 .../lib/CodeGen/MachineBlockFrequencyInfo.cpp |  6 +--
 llvm/lib/CodeGen/MachineBlockPlacement.cpp    | 12 ++---
 .../CodeGen/MachineBranchProbabilityInfo.cpp  | 44 +++------------
 llvm/lib/CodeGen/MachineSink.cpp              |  6 +--
 llvm/lib/CodeGen/MachineTraceMetrics.cpp      |  4 +-
 llvm/lib/CodeGen/TailDuplication.cpp          |  4 +-
 llvm/lib/Passes/PassBuilder.cpp               |  1 -
 .../AArch64/AArch64ConditionalCompares.cpp    |  6 +--
 .../lib/Target/Hexagon/HexagonEarlyIfConv.cpp |  7 ++-
 llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp  |  4 +-
 .../Target/Hexagon/HexagonNewValueJump.cpp    |  6 +--
 .../Target/Hexagon/HexagonVLIWPacketizer.cpp  |  7 ++-
 llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp  |  4 +-
 .../Target/PowerPC/PPCReduceCRLogicals.cpp    |  4 +-
 .../test/CodeGen/Generic/MachineBranchProb.ll |  5 --
 24 files changed, 73 insertions(+), 154 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
index 12d33f96edd11..bd544421bc0ff 100644
--- a/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -14,13 +14,14 @@
 #define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/BranchProbability.h"
 
 namespace llvm {
 
-class MachineBranchProbabilityInfo {
+class MachineBranchProbabilityInfo : public ImmutablePass {
+  virtual void anchor();
+
   // Default weight value. Used when we don't have information about the edge.
   // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
   // the successors have a weight yet. But it doesn't make sense when providing
@@ -30,8 +31,13 @@ class MachineBranchProbabilityInfo {
   static const uint32_t DEFAULT_WEIGHT = 16;
 
 public:
-  bool invalidate(MachineFunction &, const PreservedAnalyses &PA,
-                  MachineFunctionAnalysisManager::Invalidator &);
+  static char ID;
+
+  MachineBranchProbabilityInfo();
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
 
   // Return edge probability.
   BranchProbability getEdgeProbability(const MachineBasicBlock *Src,
@@ -55,45 +61,6 @@ class MachineBranchProbabilityInfo {
                                     const MachineBasicBlock *Dst) const;
 };
 
-class MachineBranchProbabilityAnalysis
-    : public AnalysisInfoMixin<MachineBranchProbabilityAnalysis> {
-  friend AnalysisInfoMixin<MachineBranchProbabilityAnalysis>;
-
-  static AnalysisKey Key;
-
-public:
-  using Result = MachineBranchProbabilityInfo;
-
-  Result run(MachineFunction &, MachineFunctionAnalysisManager &);
-};
-
-class MachineBranchProbabilityPrinterPass
-    : public PassInfoMixin<MachineBranchProbabilityPrinterPass> {
-  raw_ostream &OS;
-
-public:
-  MachineBranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(MachineFunction &MF,
-                        MachineFunctionAnalysisManager &MFAM);
-};
-
-class MachineBranchProbabilityInfoWrapperPass : public ImmutablePass {
-  virtual void anchor();
-
-  MachineBranchProbabilityInfo MBPI;
-
-public:
-  static char ID;
-
-  MachineBranchProbabilityInfoWrapperPass();
-
-  void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.setPreservesAll();
-  }
-
-  MachineBranchProbabilityInfo &getMBPI() { return MBPI; }
-  const MachineBranchProbabilityInfo &getMBPI() const { return MBPI; }
-};
 }
 
 
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 421c09ada7a19..4ddb7112a47bb 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -182,7 +182,7 @@ void initializeMIRPrintingPassPass(PassRegistry&);
 void initializeMachineBlockFrequencyInfoPass(PassRegistry&);
 void initializeMachineBlockPlacementPass(PassRegistry&);
 void initializeMachineBlockPlacementStatsPass(PassRegistry&);
-void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
+void initializeMachineBranchProbabilityInfoPass(PassRegistry&);
 void initializeMachineCFGPrinterPass(PassRegistry &);
 void initializeMachineCSEPass(PassRegistry&);
 void initializeMachineCombinerPass(PassRegistry&);
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index b1542ab139286..b43f41bb3097e 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -89,8 +89,6 @@ LOOP_PASS("loop-reduce", LoopStrengthReducePass())
 #ifndef MACHINE_FUNCTION_ANALYSIS
 #define MACHINE_FUNCTION_ANALYSIS(NAME, CREATE_PASS)
 #endif
-MACHINE_FUNCTION_ANALYSIS("machine-branch-prob",
-                          MachineBranchProbabilityAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-dom-tree", MachineDominatorTreeAnalysis())
 MACHINE_FUNCTION_ANALYSIS("machine-post-dom-tree",
                           MachinePostDominatorTreeAnalysis())
@@ -132,8 +130,6 @@ MACHINE_FUNCTION_PASS("finalize-isel", FinalizeISelPass())
 MACHINE_FUNCTION_PASS("localstackalloc", LocalStackSlotAllocationPass())
 MACHINE_FUNCTION_PASS("no-op-machine-function", NoOpMachineFunctionPass())
 MACHINE_FUNCTION_PASS("print", PrintMIRPass())
-MACHINE_FUNCTION_PASS("print<machine-branch-prob>",
-                      MachineBranchProbabilityPrinterPass(dbgs()))
 MACHINE_FUNCTION_PASS("print<machine-dom-tree>",
                       MachineDominatorTreePrinterPass(dbgs()))
 MACHINE_FUNCTION_PASS("print<machine-post-dom-tree>",
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index c52cbff689dc5..702941924e4f8 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -428,7 +428,7 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineOptimizationRemarkEmitterPass>();
   AU.addRequired<GCModuleInfo>();
   AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
 }
 
 bool AsmPrinter::doInitialization(Module &M) {
@@ -1444,9 +1444,8 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) {
             ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
             : nullptr;
     const MachineBranchProbabilityInfo *MBPI =
-        Features.BrProb
-            ? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
-            : nullptr;
+        Features.BrProb ? &getAnalysis<MachineBranchProbabilityInfo>()
+                        : nullptr;
 
     if (Features.BBFreq || Features.BrProb) {
       for (const MachineBasicBlock &MBB : MF) {
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 1b6a6ee2bbc72..c6c48cfc320c9 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -99,7 +99,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineBlockFrequencyInfo>();
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addRequired<ProfileSummaryInfoWrapperPass>();
       AU.addRequired<TargetPassConfig>();
       MachineFunctionPass::getAnalysisUsage(AU);
@@ -131,10 +131,9 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
                          PassConfig->getEnableTailMerge();
   MBFIWrapper MBBFreqInfo(
       getAnalysis<MachineBlockFrequencyInfo>());
-  BranchFolder Folder(
-      EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
-      getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI(),
-      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
+  BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
+                      getAnalysis<MachineBranchProbabilityInfo>(),
+                      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI());
   return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
                                  MF.getSubtarget().getRegisterInfo());
 }
diff --git a/llvm/lib/CodeGen/EarlyIfConversion.cpp b/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 5f3e85077cb56..0135f330bf445 100644
--- a/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -787,14 +787,14 @@ char &llvm::EarlyIfConverterID = EarlyIfConverter::ID;
 
 INITIALIZE_PASS_BEGIN(EarlyIfConverter, DEBUG_TYPE,
                       "Early If Converter", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
 INITIALIZE_PASS_END(EarlyIfConverter, DEBUG_TYPE,
                     "Early If Converter", false, false)
 
 void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineDominatorTreeWrapperPass>();
   AU.addPreserved<MachineDominatorTreeWrapperPass>();
   AU.addRequired<MachineLoopInfo>();
@@ -1142,12 +1142,12 @@ char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
 INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
                     false)
 
 void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineDominatorTreeWrapperPass>();
   AU.addPreserved<MachineDominatorTreeWrapperPass>();
   AU.addRequired<MachineLoopInfo>();
@@ -1222,7 +1222,7 @@ bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
   SchedModel.init(&STI);
   DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
   Loops = &getAnalysis<MachineLoopInfo>();
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   bool Changed = false;
   IfConv.runOnMachineFunction(MF);
diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index e067bd8961a23..383cb61aed410 100644
--- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -63,7 +63,7 @@ INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
                       "Assign register bank of generic virtual registers",
                       false, false);
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
 INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
                     "Assign register bank of generic virtual registers", false,
@@ -86,7 +86,7 @@ void RegBankSelect::init(MachineFunction &MF) {
   TPC = &getAnalysis<TargetPassConfig>();
   if (OptMode != Mode::Fast) {
     MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
-    MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+    MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   } else {
     MBFI = nullptr;
     MBPI = nullptr;
@@ -100,7 +100,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
     // We could preserve the information from these two analysis but
     // the APIs do not allow to do so yet.
     AU.addRequired<MachineBlockFrequencyInfo>();
-    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfo>();
   }
   AU.addRequired<TargetPassConfig>();
   getSelectionDAGFallbackAnalysisUsage(AU);
@@ -955,10 +955,8 @@ uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const {
   if (WasMaterialized)
     return MBFI->getBlockFreq(DstOrSplit).getFrequency();
 
-  auto *MBPIWrapper =
-      P.getAnalysisIfAvailable<MachineBranchProbabilityInfoWrapperPass>();
   const MachineBranchProbabilityInfo *MBPI =
-      MBPIWrapper ? &MBPIWrapper->getMBPI() : nullptr;
+      P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>();
   if (!MBPI)
     return 1;
   // The basic block will be on the edge.
diff --git a/llvm/lib/CodeGen/IfConversion.cpp b/llvm/lib/CodeGen/IfConversion.cpp
index 02cb95c5d7664..e8e276a8558d8 100644
--- a/llvm/lib/CodeGen/IfConversion.cpp
+++ b/llvm/lib/CodeGen/IfConversion.cpp
@@ -210,7 +210,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineBlockFrequencyInfo>();
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addRequired<ProfileSummaryInfoWrapperPass>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
@@ -432,7 +432,7 @@ char IfConverter::ID = 0;
 char &llvm::IfConverterID = IfConverter::ID;
 
 INITIALIZE_PASS_BEGIN(IfConverter, DEBUG_TYPE, "If Converter", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_END(IfConverter, DEBUG_TYPE, "If Converter", false, false)
 
@@ -445,7 +445,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = ST.getInstrInfo();
   TRI = ST.getRegisterInfo();
   MBFIWrapper MBFI(getAnalysis<MachineBlockFrequencyInfo>());
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   ProfileSummaryInfo *PSI =
       &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   MRI = &MF.getRegInfo();
diff --git a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 83b16fc883e8b..721b75900c8ef 100644
--- a/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
 
 INITIALIZE_PASS_BEGIN(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
                       "Lazy Machine Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(LazyMachineBlockFrequencyInfoPass, DEBUG_TYPE,
                     "Lazy Machine Block Frequency Analysis", true, true)
@@ -43,7 +43,7 @@ void LazyMachineBlockFrequencyInfoPass::print(raw_ostream &OS,
 
 void LazyMachineBlockFrequencyInfoPass::getAnalysisUsage(
     AnalysisUsage &AU) const {
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
@@ -62,7 +62,7 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
     return *MBFI;
   }
 
-  auto &MBPI = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  auto &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
   auto *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
   auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
   auto *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
diff --git a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 2a68f18d9bc76..7ebecc6beb17d 100644
--- a/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -163,7 +163,7 @@ struct DOTGraphTraits<MachineBlockFrequencyInfo *>
 
 INITIALIZE_PASS_BEGIN(MachineBlockFrequencyInfo, DEBUG_TYPE,
                       "Machine Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(MachineBlockFrequencyInfo, DEBUG_TYPE,
                     "Machine Block Frequency Analysis", true, true)
@@ -185,7 +185,7 @@ MachineBlockFrequencyInfo::MachineBlockFrequencyInfo(
 MachineBlockFrequencyInfo::~MachineBlockFrequencyInfo() = default;
 
 void MachineBlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineLoopInfo>();
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -209,7 +209,7 @@ void MachineBlockFrequencyInfo::calculate(
 
 bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) {
   MachineBranchProbabilityInfo &MBPI =
-      getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+      getAnalysis<MachineBranchProbabilityInfo>();
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   calculate(F, MBPI, MLI);
   return false;
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index a229475df8fee..1cb71f39efbe1 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -608,7 +608,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
     if (TailDupPlacement)
       AU.addRequired<MachinePostDominatorTreeWrapperPass>();
@@ -627,7 +627,7 @@ char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
 
 INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
                       "Branch Probability Basic Block Placement", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
@@ -3425,7 +3425,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
     return false;
 
   F = &MF;
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   MBFI = std::make_unique<MBFIWrapper>(
       getAnalysis<MachineBlockFrequencyInfo>());
   MLI = &getAnalysis<MachineLoopInfo>();
@@ -3726,7 +3726,7 @@ class MachineBlockPlacementStats : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &F) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
     AU.setPreservesAll();
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -3741,7 +3741,7 @@ char &llvm::MachineBlockPlacementStatsID = MachineBlockPlacementStats::ID;
 
 INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats",
                       "Basic Block Placement Stats", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats",
                     "Basic Block Placement Stats", false, false)
@@ -3754,7 +3754,7 @@ bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) {
   if (!isFunctionInPrintList(F.getName()))
     return false;
 
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
 
   for (MachineBasicBlock &MBB : F) {
diff --git a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index 56ffffff62240..a84377d708558 100644
--- a/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -18,11 +18,9 @@
 
 using namespace llvm;
 
-INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfoWrapperPass,
-                      "machine-branch-prob",
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
                       "Machine Branch Probability Analysis", false, true)
-INITIALIZE_PASS_END(MachineBranchProbabilityInfoWrapperPass,
-                    "machine-branch-prob",
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
                     "Machine Branch Probability Analysis", false, true)
 
 namespace llvm {
@@ -39,45 +37,15 @@ cl::opt<unsigned> ProfileLikelyProb(
     cl::init(51), cl::Hidden);
 } // namespace llvm
 
-MachineBranchProbabilityAnalysis::Result
-MachineBranchProbabilityAnalysis::run(MachineFunction &,
-                                      MachineFunctionAnalysisManager &) {
-  return MachineBranchProbabilityInfo();
-}
-
-PreservedAnalyses
-MachineBranchProbabilityPrinterPass::run(MachineFunction &MF,
-                                         MachineFunctionAnalysisManager &MFAM) {
-  OS << "Printing analysis 'Machine Branch Probability Analysis' for machine "
-        "function '"
-     << MF.getName() << "':\n";
-  auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
-  for (const MachineBasicBlock &MBB : MF) {
-    for (const MachineBasicBlock *Succ : MBB.successors())
-      MBPI.printEdgeProbability(OS << "  ", &MBB, Succ);
-  }
-  return PreservedAnalyses::all();
-}
-
-char MachineBranchProbabilityInfoWrapperPass::ID = 0;
+char MachineBranchProbabilityInfo::ID = 0;
 
-MachineBranchProbabilityInfoWrapperPass::
-    MachineBranchProbabilityInfoWrapperPass()
+MachineBranchProbabilityInfo::MachineBranchProbabilityInfo()
     : ImmutablePass(ID) {
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
-  initializeMachineBranchProbabilityInfoWrapperPassPass(Registry);
+  initializeMachineBranchProbabilityInfoPass(Registry);
 }
 
-void MachineBranchProbabilityInfoWrapperPass::anchor() {}
-
-AnalysisKey MachineBranchProbabilityAnalysis::Key;
-
-bool MachineBranchProbabilityInfo::invalidate(
-    MachineFunction &, const PreservedAnalyses &PA,
-    MachineFunctionAnalysisManager::Invalidator &) {
-  auto PAC = PA.getChecker<MachineBranchProbabilityAnalysis>();
-  return !PAC.preservedWhenStateless();
-}
+void MachineBranchProbabilityInfo::anchor() {}
 
 BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
     const MachineBasicBlock *Src,
diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 4dabaabe3659f..76947a419c347 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -187,7 +187,7 @@ namespace {
       AU.addRequired<MachineDominatorTreeWrapperPass>();
       AU.addRequired<MachinePostDominatorTreeWrapperPass>();
       AU.addRequired<MachineCycleInfoWrapperPass>();
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addPreserved<MachineCycleInfoWrapperPass>();
       AU.addPreserved<MachineLoopInfo>();
       if (UseBlockFreqInfo)
@@ -273,7 +273,7 @@ char &llvm::MachineSinkingID = MachineSinking::ID;
 
 INITIALIZE_PASS_BEGIN(MachineSinking, DEBUG_TYPE,
                       "Machine code sinking", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineCycleInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -712,7 +712,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   PDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
   CI = &getAnalysis<MachineCycleInfoWrapperPass>().getCycleInfo();
   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   RegClassInfo.runOnMachineFunction(MF);
   TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
diff --git a/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 0f44777634e6d..3892f85b4d090 100644
--- a/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -46,7 +46,7 @@ char &llvm::MachineTraceMetricsID = MachineTraceMetrics::ID;
 
 INITIALIZE_PASS_BEGIN(MachineTraceMetrics, DEBUG_TYPE,
                       "Machine Trace Metrics", false, true)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_END(MachineTraceMetrics, DEBUG_TYPE,
                     "Machine Trace Metrics", false, true)
@@ -57,7 +57,7 @@ MachineTraceMetrics::MachineTraceMetrics() : MachineFunctionPass(ID) {
 
 void MachineTraceMetrics::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineLoopInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index 25f20d9c899bb..bf3d2088e196c 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -40,7 +40,7 @@ class TailDuplicateBase : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
@@ -84,7 +84,7 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
-  auto MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
   auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
                &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 17cc156846d36..8d2a713b7f78e 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -93,7 +93,6 @@
 #include "llvm/CodeGen/LocalStackSlotAllocation.h"
 #include "llvm/CodeGen/LowerEmuTLS.h"
 #include "llvm/CodeGen/MIRPrinter.h"
-#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachinePassManager.h"
diff --git a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index 3c2201ec4c998..9a788123b1ffa 100644
--- a/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -794,7 +794,7 @@ char AArch64ConditionalCompares::ID = 0;
 
 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
                       "AArch64 CCMP Pass", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineTraceMetrics)
 INITIALIZE_PASS_END(AArch64ConditionalCompares, "aarch64-ccmp",
@@ -805,7 +805,7 @@ FunctionPass *llvm::createAArch64ConditionalCompares() {
 }
 
 void AArch64ConditionalCompares::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+  AU.addRequired<MachineBranchProbabilityInfo>();
   AU.addRequired<MachineDominatorTreeWrapperPass>();
   AU.addPreserved<MachineDominatorTreeWrapperPass>();
   AU.addRequired<MachineLoopInfo>();
@@ -935,7 +935,7 @@ bool AArch64ConditionalCompares::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   DomTree = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
   Loops = &getAnalysis<MachineLoopInfo>();
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   Traces = &getAnalysis<MachineTraceMetrics>();
   MinInstr = nullptr;
   MinSize = MF.getFunction().hasMinSize();
diff --git a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index e99496da8a260..03f6882e6889f 100644
--- a/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -161,7 +161,7 @@ namespace {
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addRequired<MachineDominatorTreeWrapperPass>();
       AU.addPreserved<MachineDominatorTreeWrapperPass>();
       AU.addRequired<MachineLoopInfo>();
@@ -1056,9 +1056,8 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
   MRI = &MF.getRegInfo();
   MDT = &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
   MLI = &getAnalysis<MachineLoopInfo>();
-  MBPI = EnableHexagonBP
-             ? &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI()
-             : nullptr;
+  MBPI = EnableHexagonBP ? &getAnalysis<MachineBranchProbabilityInfo>() :
+    nullptr;
 
   Deleted.clear();
   bool Changed = false;
diff --git a/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp b/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp
index 6aa95ff6fd8f5..c79b528ff2f3f 100644
--- a/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonLoopAlign.cpp
@@ -74,7 +74,7 @@ class HexagonLoopAlign : public MachineFunctionPass {
   bool attemptToBalignSmallLoop(MachineFunction &MF, MachineBasicBlock &MBB);
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineBlockFrequencyInfo>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -148,7 +148,7 @@ bool HexagonLoopAlign::attemptToBalignSmallLoop(MachineFunction &MF,
     return false;
 
   const MachineBranchProbabilityInfo *MBPI =
-      &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+      &getAnalysis<MachineBranchProbabilityInfo>();
   const MachineBlockFrequencyInfo *MBFI =
       &getAnalysis<MachineBlockFrequencyInfo>();
 
diff --git a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 36c52dfe81d5a..f539717e42d59 100644
--- a/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -78,7 +78,7 @@ namespace {
     HexagonNewValueJump() : MachineFunctionPass(ID) {}
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -107,7 +107,7 @@ char HexagonNewValueJump::ID = 0;
 
 INITIALIZE_PASS_BEGIN(HexagonNewValueJump, "hexagon-nvj",
                       "Hexagon NewValueJump", false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(HexagonNewValueJump, "hexagon-nvj",
                     "Hexagon NewValueJump", false, false)
 
@@ -459,7 +459,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
   QII = static_cast<const HexagonInstrInfo *>(MF.getSubtarget().getInstrInfo());
   QRI = static_cast<const HexagonRegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   if (DisableNewValueJumps ||
       !MF.getSubtarget<HexagonSubtarget>().useNewValueJumps())
diff --git a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index ef2677a6af46a..2d5352b08caed 100644
--- a/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -96,7 +96,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       AU.addRequired<AAResultsWrapperPass>();
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addRequired<MachineDominatorTreeWrapperPass>();
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineDominatorTreeWrapperPass>();
@@ -125,7 +125,7 @@ char HexagonPacketizer::ID = 0;
 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "hexagon-packetizer",
                       "Hexagon Packetizer", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(HexagonPacketizer, "hexagon-packetizer",
@@ -213,8 +213,7 @@ bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
   HRI = HST.getRegisterInfo();
   auto &MLI = getAnalysis<MachineLoopInfo>();
   auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
-  auto *MBPI =
-      &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   if (EnableGenAllInsnClass)
     HII->genAllInsnTimingClasses(MF);
diff --git a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index 4ec01ab7b4565..b525606b1f8fd 100644
--- a/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -236,7 +236,7 @@ namespace {
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
@@ -872,7 +872,7 @@ MipsDelaySlotFiller::selectSuccBB(MachineBasicBlock &B) const {
     return nullptr;
 
   // Select the successor with the larget edge weight.
-  auto &Prob = getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  auto &Prob = getAnalysis<MachineBranchProbabilityInfo>();
   MachineBasicBlock *S = *std::max_element(
       B.succ_begin(), B.succ_end(),
       [&](const MachineBasicBlock *Dst0, const MachineBasicBlock *Dst1) {
diff --git a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
index 0bfcba9a52486..d1cc2ad5c481f 100644
--- a/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
+++ b/llvm/lib/Target/PowerPC/PPCReduceCRLogicals.cpp
@@ -426,7 +426,7 @@ class PPCReduceCRLogicals : public MachineFunctionPass {
   }
   CRLogicalOpInfo createCRLogicalOpInfo(MachineInstr &MI);
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfo>();
     AU.addRequired<MachineDominatorTreeWrapperPass>();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -570,7 +570,7 @@ void PPCReduceCRLogicals::initialize(MachineFunction &MFParam) {
   MF = &MFParam;
   MRI = &MF->getRegInfo();
   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
   AllCRLogicalOps.clear();
 }
diff --git a/llvm/test/CodeGen/Generic/MachineBranchProb.ll b/llvm/test/CodeGen/Generic/MachineBranchProb.ll
index fa39e7182cef6..9a5ea0c5e13cc 100644
--- a/llvm/test/CodeGen/Generic/MachineBranchProb.ll
+++ b/llvm/test/CodeGen/Generic/MachineBranchProb.ll
@@ -1,5 +1,4 @@
 ; RUN: llc < %s -print-after=finalize-isel -o /dev/null 2>&1 | FileCheck %s
-; RUN: llc %s -stop-after=finalize-isel -o - | llc -passes='print<machine-branch-prob>' -x mir -filetype=null 2>&1 | FileCheck -check-prefix=NPM %s
 
 ; Hexagon runs passes that renumber the basic blocks, causing this test
 ; to fail.
@@ -28,10 +27,6 @@ entry:
 ; CHECK: bb.6.entry:
 ; CHECK: successors: %bb.1(0x2e8ba2d7), %bb.3(0x51745d29)
 
-; NPM: Printing analysis 'Machine Branch Probability Analysis' for machine function 'test2':
-; NPM: edge %bb.4 -> %bb.6 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
-; NPM: edge %bb.5 -> %bb.6 probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
-
 sw.bb:
 ; this call will prevent simplifyCFG from optimizing the block away in ARM/AArch64.
   tail call void @foo()

>From 8a87e14737f121a931fe7f1b15f5263627fc883d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sun, 23 Jun 2024 22:02:36 +0200
Subject: [PATCH 14/14] AMDGPU: Add a subtarget feature for fine-grained remote
 memory support

Atomic access to fine-grained remote memory does not work on all
subtargets. Add a feature for targets where this is expected to work.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td      | 16 ++++++++++++++--
 llvm/lib/Target/AMDGPU/GCNSubtarget.h |  8 ++++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 2f3890e0ff2ae..3f35db8883716 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -788,6 +788,16 @@ def FeatureFlatAtomicFaddF32Inst
   "Has flat_atomic_add_f32 instruction"
 >;
 
+def FeatureAgentScopeFineGrainedRemoteMemoryAtomics
+  : SubtargetFeature<"agent-scope-fine-grained-remote-memory-atomics",
+  "HasAgentScopeFineGrainedRemoteMemoryAtomics",
+  "true",
+  "Agent (device) scoped atomic operations, excluding those directly "
+  "supported by PCIe (i.e. integer atomic add, exchange, and "
+  "compare-and-swap), are functional for allocations in host or peer "
+  "device memory."
+>;
+
 def FeatureDefaultComponentZero : SubtargetFeature<"default-component-zero",
   "HasDefaultComponentZero",
   "true",
@@ -1207,7 +1217,8 @@ def FeatureGFX12 : GCNSubtargetFeatureGeneration<"GFX12",
    FeatureUnalignedBufferAccess, FeatureUnalignedDSAccess,
    FeatureTrue16BitInsts, FeatureDefaultComponentBroadcast,
    FeatureMaxHardClauseLength32,
-   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts
+   FeatureAtomicFMinFMaxF32GlobalInsts, FeatureAtomicFMinFMaxF32FlatInsts,
+   FeatureAgentScopeFineGrainedRemoteMemoryAtomics
   ]
 >;
 
@@ -1415,7 +1426,8 @@ def FeatureISAVersion9_4_Common : FeatureSet<
    FeatureBackOffBarrier,
    FeatureKernargPreload,
    FeatureAtomicFMinFMaxF64GlobalInsts,
-   FeatureAtomicFMinFMaxF64FlatInsts
+   FeatureAtomicFMinFMaxF64FlatInsts,
+   FeatureAgentScopeFineGrainedRemoteMemoryAtomics
    ]>;
 
 def FeatureISAVersion9_4_0 : FeatureSet<
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 07ff855756ec9..9e2a316a9ed28 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -174,6 +174,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAtomicBufferPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
   bool HasDefaultComponentZero = false;
+  bool HasAgentScopeFineGrainedRemoteMemoryAtomics = false;
   bool HasDefaultComponentBroadcast = false;
   /// The maximum number of instructions that may be placed within an S_CLAUSE,
   /// which is one greater than the maximum argument to S_CLAUSE. A value of 0
@@ -871,6 +872,13 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
 
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
+  /// \return true if atomic operations targeting fine-grained memory work
+  /// correctly at device scope, in allocations in host or peer PCIe device
+  /// memory.
+  bool supportsAgentScopeFineGrainedRemoteMemoryAtomics() const {
+    return HasAgentScopeFineGrainedRemoteMemoryAtomics;
+  }
+
   bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
 
   bool hasDefaultComponentBroadcast() const {