[llvm] [X86, SimplifyCFG] Support hoisting load/store with conditional faulting (Part II) (PR #108812)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 26 06:02:28 PDT 2024


https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/108812

>From 1077be1b780dfb905620c5b86adad4ed97ddd6eb Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sat, 21 Sep 2024 20:24:34 +0800
Subject: [PATCH 1/8] [X86,SimplifyCFG] Support hoisting load/store with
 conditional faulting (Part II)

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 42 ++++++++++++++---
 .../X86/hoist-loads-stores-with-cf.ll         | 46 +++++++++++++++----
 2 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 03db86a3baae7a..60e3b50b9b3534 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1663,18 +1663,29 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
 static void hoistConditionalLoadsStores(
     BranchInst *BI,
     SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
-    bool Invert) {
+    std::optional<bool> Invert) {
   auto &Context = BI->getParent()->getContext();
   auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
   auto *Cond = BI->getOperand(0);
   // Construct the condition if needed.
   BasicBlock *BB = BI->getParent();
-  IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
-  Value *Mask = Builder.CreateBitCast(
-      Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
-      VCondTy);
+  IRBuilder<> Builder(Invert ? SpeculatedConditionalLoadsStores.back() : BI);
+  Value *Mask = nullptr;
+  Value *Mask0 = nullptr;
+  Value *Mask1 = nullptr;
+  if (Invert) {
+    Mask = Builder.CreateBitCast(
+        *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
+        VCondTy);
+  } else {
+    Mask0 = Builder.CreateBitCast(
+        Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
+    Mask1 = Builder.CreateBitCast(Cond, VCondTy);
+  }
   for (auto *I : SpeculatedConditionalLoadsStores) {
-    IRBuilder<> Builder(I);
+    IRBuilder<> Builder(Invert ? I : BI);
+    if (!Invert)
+      Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
     // We currently assume conditional faulting load/store is supported for
     // scalar types only when creating new instructions. This can be easily
     // extended for vector types in the future.
@@ -1771,6 +1782,25 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
     if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
       return false;
 
+  auto *BI = dyn_cast<BranchInst>(TI);
+  if (BI && HoistLoadsStoresWithCondFaulting &&
+      Options.HoistLoadsStoresWithCondFaulting) {
+    SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+    for (auto *Succ : successors(BB)) {
+      for (Instruction &I : drop_end(*Succ)) {
+        if (!isSafeCheapLoadStore(&I, TTI) ||
+            SpeculatedConditionalLoadsStores.size() ==
+                HoistLoadsStoresWithCondFaultingThreshold)
+          return false;
+        SpeculatedConditionalLoadsStores.push_back(&I);
+      }
+    }
+
+    if (!SpeculatedConditionalLoadsStores.empty())
+      hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+                                  std::nullopt);
+  }
+
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
   SmallVector<SuccIterPair, 8> SuccIterPairs;
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 6ea0cf290ffc82..6a776abd6008d1 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -276,21 +276,19 @@ if.false:                                    ; preds = %if.true, %entry
 }
 
 ;; Both of successor 0 and successor 1 have a single predecessor.
-;; TODO: Support transform for this case.
 define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
 ; CHECK-LABEL: @single_predecessor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       common.ret:
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
 ; CHECK-NEXT:    ret void
-; CHECK:       if.end:
-; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %tobool = icmp ne i32 %a, 0
@@ -728,6 +726,34 @@ if.true:
   ret i32 %res
 }
 
+define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
+; CHECK-LABEL: @diamondCFG(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL_NOT]], true
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP1]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tobool.not = icmp eq i32 %a, 0
+  br i1 %tobool.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 %a, ptr %c, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 0, ptr %d, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
 declare i32 @read_memory_only() readonly nounwind willreturn speculatable
 
 !llvm.dbg.cu = !{!0}

>From 9d73fdde331b875d4d210c90a811a4224080afd3 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Sep 2024 17:45:41 +0800
Subject: [PATCH 2/8] Address review comments

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 26 +++++++++------
 .../X86/hoist-loads-stores-with-cf.ll         | 33 ++-----------------
 2 files changed, 18 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 60e3b50b9b3534..97080aa975147e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1671,21 +1671,26 @@ static void hoistConditionalLoadsStores(
   BasicBlock *BB = BI->getParent();
   IRBuilder<> Builder(Invert ? SpeculatedConditionalLoadsStores.back() : BI);
   Value *Mask = nullptr;
-  Value *Mask0 = nullptr;
-  Value *Mask1 = nullptr;
+  Value *MaskFalse = nullptr;
+  Value *MaskTrue = nullptr;
   if (Invert) {
     Mask = Builder.CreateBitCast(
         *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
         VCondTy);
   } else {
-    Mask0 = Builder.CreateBitCast(
+    MaskFalse = Builder.CreateBitCast(
         Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
-    Mask1 = Builder.CreateBitCast(Cond, VCondTy);
+    MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
   }
+  auto PeekThroughBitcasts = [](Value *V) {
+    while (auto *BitCast = dyn_cast<BitCastInst>(V))
+      V = BitCast->getOperand(0);
+    return V;
+  };
   for (auto *I : SpeculatedConditionalLoadsStores) {
     IRBuilder<> Builder(Invert ? I : BI);
-    if (!Invert)
-      Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
+    if (!Mask)
+      Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
     // We currently assume conditional faulting load/store is supported for
     // scalar types only when creating new instructions. This can be easily
     // extended for vector types in the future.
@@ -1699,8 +1704,9 @@ static void hoistConditionalLoadsStores(
       Value *PassThru = nullptr;
       for (User *U : I->users())
         if ((PN = dyn_cast<PHINode>(U))) {
-          PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
-                                           FixedVectorType::get(Ty, 1));
+          PassThru = Builder.CreateBitCast(
+              PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
+              FixedVectorType::get(Ty, 1));
           break;
         }
       MaskedLoadStore = Builder.CreateMaskedLoad(
@@ -1711,8 +1717,8 @@ static void hoistConditionalLoadsStores(
       I->replaceAllUsesWith(NewLoadStore);
     } else {
       // Handle Store.
-      auto *StoredVal =
-          Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+      auto *StoredVal = Builder.CreateBitCast(
+          PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
       MaskedLoadStore = Builder.CreateMaskedStore(
           StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
     }
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 6a776abd6008d1..d88366eb8116c2 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -284,10 +284,9 @@ define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
 ; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -726,34 +725,6 @@ if.true:
   ret i32 %res
 }
 
-define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
-; CHECK-LABEL: @diamondCFG(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[TOBOOL_NOT]], true
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT:    ret void
-;
-entry:
-  %tobool.not = icmp eq i32 %a, 0
-  br i1 %tobool.not, label %if.else, label %if.then
-
-if.then:                                          ; preds = %entry
-  store i32 %a, ptr %c, align 4
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  store i32 0, ptr %d, align 4
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  ret void
-}
-
 declare i32 @read_memory_only() readonly nounwind willreturn speculatable
 
 !llvm.dbg.cu = !{!0}

>From 15e25fb9527527e479ef0987600c45081833bcb4 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Sep 2024 21:58:43 +0800
Subject: [PATCH 3/8] Revert one change

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp                     | 2 +-
 .../Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 97080aa975147e..97216d116b29c6 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1689,7 +1689,7 @@ static void hoistConditionalLoadsStores(
   };
   for (auto *I : SpeculatedConditionalLoadsStores) {
     IRBuilder<> Builder(Invert ? I : BI);
-    if (!Mask)
+    if (!Invert)
       Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
     // We currently assume conditional faulting load/store is supported for
     // scalar types only when creating new instructions. This can be easily
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index d88366eb8116c2..bb9c567b386252 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -284,9 +284,9 @@ define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
 ; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
 ; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
 entry:

>From 40e21304b32fbc80359d8cc3d1448c0c859283b0 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Sep 2024 22:25:00 +0800
Subject: [PATCH 4/8] Early return after hoist load/store

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 97216d116b29c6..a0aafc360ff8a8 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1660,7 +1660,7 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
 /// \endcode
 ///
 /// So we need to turn hoisted load/store into cload/cstore.
-static void hoistConditionalLoadsStores(
+static bool hoistConditionalLoadsStores(
     BranchInst *BI,
     SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
     std::optional<bool> Invert) {
@@ -1742,6 +1742,7 @@ static void hoistConditionalLoadsStores(
     MaskedLoadStore->copyMetadata(*I);
     I->eraseFromParent();
   }
+  return true;
 }
 
 static bool isSafeCheapLoadStore(const Instruction *I,
@@ -1803,8 +1804,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
     }
 
     if (!SpeculatedConditionalLoadsStores.empty())
-      hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
-                                  std::nullopt);
+      return hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+                                         std::nullopt);
   }
 
   // The second of pair is a SkipFlags bitmask.

>From d6d50c4abf9636bd2f21b924193e0fa680f37ec6 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 25 Sep 2024 15:40:37 +0800
Subject: [PATCH 5/8] Limit TrueBB and False BB has no more than 1 successor

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 26 ++++++++++++-------
 .../X86/hoist-loads-stores-with-cf.ll         |  9 ++++---
 2 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a0aafc360ff8a8..91ae73fae0ef71 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1702,13 +1702,14 @@ static bool hoistConditionalLoadsStores(
       auto *Ty = I->getType();
       PHINode *PN = nullptr;
       Value *PassThru = nullptr;
-      for (User *U : I->users())
-        if ((PN = dyn_cast<PHINode>(U))) {
-          PassThru = Builder.CreateBitCast(
-              PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
-              FixedVectorType::get(Ty, 1));
-          break;
-        }
+      if (Invert)
+        for (User *U : I->users())
+          if ((PN = dyn_cast<PHINode>(U))) {
+            PassThru = Builder.CreateBitCast(
+                PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
+                FixedVectorType::get(Ty, 1));
+            break;
+          }
       MaskedLoadStore = Builder.CreateMaskedLoad(
           FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
       Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
@@ -1794,11 +1795,16 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
       Options.HoistLoadsStoresWithCondFaulting) {
     SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
     for (auto *Succ : successors(BB)) {
-      for (Instruction &I : drop_end(*Succ)) {
-        if (!isSafeCheapLoadStore(&I, TTI) ||
+      for (Instruction &I : *Succ) {
+        if (I.isTerminator()) {
+          if (I.getNumSuccessors() > 1)
+            return false;
+          continue;
+        } else if (!isSafeCheapLoadStore(&I, TTI) ||
             SpeculatedConditionalLoadsStores.size() ==
-                HoistLoadsStoresWithCondFaultingThreshold)
+                HoistLoadsStoresWithCondFaultingThreshold) {
           return false;
+        }
         SpeculatedConditionalLoadsStores.push_back(&I);
       }
     }
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index bb9c567b386252..c888b8182bdc44 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -276,7 +276,7 @@ if.false:                                    ; preds = %if.true, %entry
 }
 
 ;; Both of successor 0 and successor 1 have a single predecessor.
-define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
+define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) {
 ; CHECK-LABEL: @single_predecessor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
@@ -287,7 +287,8 @@ define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
 ; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
 ; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
 ; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT:    ret void
+; CHECK-NEXT:    [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3
+; CHECK-NEXT:    ret i32 [[DOT]]
 ;
 entry:
   %tobool = icmp ne i32 %a, 0
@@ -295,12 +296,12 @@ entry:
 
 if.end:
   store i32 1, ptr %q
-  ret void
+  ret i32 2
 
 if.then:
   %0 = load i32, ptr %q
   store i32 %0, ptr %p
-  ret void
+  ret i32 3
 }
 
 ;; Hoist 6 stores.

>From de28ed92a001488b47fe8723c6182566a0828360 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 25 Sep 2024 15:58:04 +0800
Subject: [PATCH 6/8] clang-format

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 91ae73fae0ef71..066b899b57011a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1801,8 +1801,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
             return false;
           continue;
         } else if (!isSafeCheapLoadStore(&I, TTI) ||
-            SpeculatedConditionalLoadsStores.size() ==
-                HoistLoadsStoresWithCondFaultingThreshold) {
+                   SpeculatedConditionalLoadsStores.size() ==
+                       HoistLoadsStoresWithCondFaultingThreshold) {
           return false;
         }
         SpeculatedConditionalLoadsStores.push_back(&I);

>From 8ec9409e9e46439bc4c29a846ecd81e9a69dc4e5 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Thu, 26 Sep 2024 17:24:17 +0800
Subject: [PATCH 7/8] Use `Invert.has_value()`

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 066b899b57011a..5d6a348c6dd943 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1669,11 +1669,12 @@ static bool hoistConditionalLoadsStores(
   auto *Cond = BI->getOperand(0);
   // Construct the condition if needed.
   BasicBlock *BB = BI->getParent();
-  IRBuilder<> Builder(Invert ? SpeculatedConditionalLoadsStores.back() : BI);
+  IRBuilder<> Builder(
+      Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
   Value *Mask = nullptr;
   Value *MaskFalse = nullptr;
   Value *MaskTrue = nullptr;
-  if (Invert) {
+  if (Invert.has_value()) {
     Mask = Builder.CreateBitCast(
         *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
         VCondTy);
@@ -1688,8 +1689,8 @@ static bool hoistConditionalLoadsStores(
     return V;
   };
   for (auto *I : SpeculatedConditionalLoadsStores) {
-    IRBuilder<> Builder(Invert ? I : BI);
-    if (!Invert)
+    IRBuilder<> Builder(Invert.has_value() ? I : BI);
+    if (!Invert.has_value())
       Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
     // We currently assume conditional faulting load/store is supported for
     // scalar types only when creating new instructions. This can be easily
@@ -1702,7 +1703,7 @@ static bool hoistConditionalLoadsStores(
       auto *Ty = I->getType();
       PHINode *PN = nullptr;
       Value *PassThru = nullptr;
-      if (Invert)
+      if (Invert.has_value())
         for (User *U : I->users())
           if ((PN = dyn_cast<PHINode>(U))) {
             PassThru = Builder.CreateBitCast(

>From fc7df9a6b8ccb544bc31f4c30a2ad690275acf78 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Thu, 26 Sep 2024 21:01:45 +0800
Subject: [PATCH 8/8] Address review comment

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 51 ++++++++++++-----------
 1 file changed, 27 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5d6a348c6dd943..621633eab8637c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1791,30 +1791,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
     if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
       return false;
 
-  auto *BI = dyn_cast<BranchInst>(TI);
-  if (BI && HoistLoadsStoresWithCondFaulting &&
-      Options.HoistLoadsStoresWithCondFaulting) {
-    SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
-    for (auto *Succ : successors(BB)) {
-      for (Instruction &I : *Succ) {
-        if (I.isTerminator()) {
-          if (I.getNumSuccessors() > 1)
-            return false;
-          continue;
-        } else if (!isSafeCheapLoadStore(&I, TTI) ||
-                   SpeculatedConditionalLoadsStores.size() ==
-                       HoistLoadsStoresWithCondFaultingThreshold) {
-          return false;
-        }
-        SpeculatedConditionalLoadsStores.push_back(&I);
-      }
-    }
-
-    if (!SpeculatedConditionalLoadsStores.empty())
-      return hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
-                                         std::nullopt);
-  }
-
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
   SmallVector<SuccIterPair, 8> SuccIterPairs;
@@ -7859,6 +7835,33 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
       if (HoistCommon &&
           hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
         return requestResimplify();
+
+      if (BI && HoistLoadsStoresWithCondFaulting &&
+          Options.HoistLoadsStoresWithCondFaulting) {
+        SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+        auto CanSpeculateConditionalLoadsStores = [&]() {
+          for (auto *Succ : successors(BB)) {
+            for (Instruction &I : *Succ) {
+              if (I.isTerminator()) {
+                if (I.getNumSuccessors() > 1)
+                  return false;
+                continue;
+              } else if (!isSafeCheapLoadStore(&I, TTI) ||
+                         SpeculatedConditionalLoadsStores.size() ==
+                             HoistLoadsStoresWithCondFaultingThreshold) {
+                return false;
+              }
+              SpeculatedConditionalLoadsStores.push_back(&I);
+            }
+          }
+          return !SpeculatedConditionalLoadsStores.empty();
+        };
+
+        if (CanSpeculateConditionalLoadsStores() &&
+            hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+                                        std::nullopt))
+          return requestResimplify();
+      }
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
       // execute Successor #0 if it branches to Successor #1.



More information about the llvm-commits mailing list