[llvm] [X86, SimplifyCFG] Support hoisting load/store with conditional faulting (Part II) (PR #108812)

Phoebe Wang via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 16 03:10:53 PDT 2024


https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/108812

This is a follow up of #96878 to support hoisting load/store for diamond CFG.

```
void test (int a, int *c, int *d) {
  if (a)
   *c = a;
  else
   *d = a;
}
```

>From 3a4796ccffe6d3ede7a4e19b7df55e9f88eab2a0 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 16 Sep 2024 17:52:53 +0800
Subject: [PATCH] [X86,SimplifyCFG] Support hoisting load/store with
 conditional faulting (Part II)

This is a follow up of #96878 to support hoisting load/store for diamond CFG.

```
void test (int a, int *c, int *d) {
  if (a)
   *c = a;
  else
   *d = a;
}
```
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 107 ++++++++++++++----
 .../X86/hoist-loads-stores-with-cf.ll         |  46 ++++++--
 2 files changed, 118 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f9db996cdc3583..420c59789813c1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
   bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
                                              IRBuilder<> &Builder);
 
-  bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
+  bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly);
   bool hoistSuccIdenticalTerminatorToSwitchOrIf(
       Instruction *TI, Instruction *I1,
       SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1615,12 +1615,31 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
   return false;
 }
 
+static bool isSafeCheapLoadStore(const Instruction *I,
+                                 const TargetTransformInfo &TTI) {
+  // Not handle volatile or atomic.
+  if (auto *L = dyn_cast<LoadInst>(I)) {
+    if (!L->isSimple())
+      return false;
+  } else if (auto *S = dyn_cast<StoreInst>(I)) {
+    if (!S->isSimple())
+      return false;
+  } else
+    return false;
+
+  // llvm.masked.load/store use i32 for alignment while load/store use i64.
+  // That's why we have the alignment limitation.
+  // FIXME: Update the prototype of the intrinsics?
+  return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
+         getLoadStoreAlignment(I) < Value::MaximumAlignment;
+}
+
 /// Hoist any common code in the successor blocks up into the block. This
 /// function guarantees that BB dominates all successors. If EqTermsOnly is
 /// given, only perform hoisting in case both blocks only contain a terminator.
 /// In that case, only the original BI will be replaced and selects for PHIs are
 /// added.
-bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
+bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
                                                    bool EqTermsOnly) {
   // This does very trivial matching, with limited scanning, to find identical
   // instructions in the two blocks. In particular, we don't want to get into
@@ -1628,6 +1647,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
   // such, we currently just scan for obviously identical instructions in an
   // identical order, possibly separated by the same number of non-identical
   // instructions.
+  BasicBlock *BB = TI->getParent();
   unsigned int SuccSize = succ_size(BB);
   if (SuccSize < 2)
     return false;
@@ -1639,7 +1659,63 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
     if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
       return false;
 
-  auto *TI = BB->getTerminator();
+  auto *BI = dyn_cast<BranchInst>(TI);
+  if (BI && HoistLoadsStoresWithCondFaulting &&
+      Options.HoistLoadsStoresWithCondFaulting) {
+    SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+    for (auto *Succ : successors(BB)) {
+      for (Instruction &I : drop_end(*Succ)) {
+        if (!isSafeCheapLoadStore(&I, TTI) ||
+            SpeculatedConditionalLoadsStores.size() ==
+                HoistLoadsStoresWithCondFaultingThreshold)
+          return false;
+        SpeculatedConditionalLoadsStores.push_back(&I);
+      }
+    }
+
+    // TODO: Move below code to a function to share with #96878.
+    if (SpeculatedConditionalLoadsStores.empty())
+      return false;
+
+    auto &Context = BI->getParent()->getContext();
+    auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
+    auto *Cond = BI->getOperand(0);
+    IRBuilder<> Builder(BI);
+    Value *Mask1 = Builder.CreateBitCast(Cond, VCondTy);
+    Value *Mask0 = Builder.CreateBitCast(
+        Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
+    for (auto *I : SpeculatedConditionalLoadsStores) {
+      Value *Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
+      assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
+      auto *Op0 = I->getOperand(0);
+      Instruction *MaskedLoadStore = nullptr;
+      if (auto *LI = dyn_cast<LoadInst>(I)) {
+        // Handle Load.
+        auto *Ty = I->getType();
+        MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
+                                                   Op0, LI->getAlign(), Mask);
+        I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+      } else {
+        // Handle Store.
+        auto *StoredVal =
+            Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+        MaskedLoadStore = Builder.CreateMaskedStore(
+            StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
+      }
+      I->dropUBImplyingAttrsAndUnknownMetadata(
+          {LLVMContext::MD_range, LLVMContext::MD_annotation});
+      // FIXME: DIAssignID is not supported for masked store yet.
+      // (Verifier::visitDIAssignIDMetadata)
+      at::deleteAssignmentMarkers(I);
+      I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
+        return Node->getMetadataID() == Metadata::DIAssignIDKind;
+      });
+      MaskedLoadStore->copyMetadata(*I);
+      I->eraseFromParent();
+    }
+
+    return true;
+  }
 
   // The second of pair is a SkipFlags bitmask.
   using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
@@ -2998,25 +3074,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
   return BIEndProb < Likely;
 }
 
-static bool isSafeCheapLoadStore(const Instruction *I,
-                                 const TargetTransformInfo &TTI) {
-  // Not handle volatile or atomic.
-  if (auto *L = dyn_cast<LoadInst>(I)) {
-    if (!L->isSimple())
-      return false;
-  } else if (auto *S = dyn_cast<StoreInst>(I)) {
-    if (!S->isSimple())
-      return false;
-  } else
-    return false;
-
-  // llvm.masked.load/store use i32 for alignment while load/store use i64.
-  // That's why we have the alignment limitation.
-  // FIXME: Update the prototype of the intrinsics?
-  return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
-         getLoadStoreAlignment(I) < Value::MaximumAlignment;
-}
-
 /// Speculate a conditional basic block flattening the CFG.
 ///
 /// Note that this is a very risky transform currently. Speculating
@@ -7436,7 +7493,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
     return requestResimplify();
 
   if (HoistCommon &&
-      hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
+      hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
     return requestResimplify();
 
   return false;
@@ -7794,8 +7851,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
   // can hoist it up to the branching block.
   if (BI->getSuccessor(0)->getSinglePredecessor()) {
     if (BI->getSuccessor(1)->getSinglePredecessor()) {
-      if (HoistCommon && hoistCommonCodeFromSuccessors(
-                             BI->getParent(), !Options.HoistCommonInsts))
+      if (HoistCommon &&
+          hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
         return requestResimplify();
     } else {
       // If Successor #1 has multiple preds, we may be able to conditionally
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 047ca717da8009..87ff4ba2af9a41 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -278,21 +278,19 @@ if.false:                                    ; preds = %if.true, %entry
 }
 
 ;; Both of successor 0 and successor 1 have a single predecessor.
-;; TODO: Support transform for this case.
 define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
 ; CHECK-LABEL: @single_predecessor(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK:       common.ret:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[TOBOOL]], true
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
 ; CHECK-NEXT:    ret void
-; CHECK:       if.end:
-; CHECK-NEXT:    store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT:    br label [[COMMON_RET:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
-; CHECK-NEXT:    store i32 [[TMP0]], ptr [[P:%.*]], align 4
-; CHECK-NEXT:    br label [[COMMON_RET]]
 ;
 entry:
   %tobool = icmp ne i32 %a, 0
@@ -674,6 +672,34 @@ if.false:
   ret void
 }
 
+define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
+; CHECK-LABEL: @diamondCFG(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i1 [[TOBOOL_NOT]], true
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
+; CHECK-NEXT:    call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %tobool.not = icmp eq i32 %a, 0
+  br i1 %tobool.not, label %if.else, label %if.then
+
+if.then:                                          ; preds = %entry
+  store i32 %a, ptr %c, align 4
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  store i32 0, ptr %d, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
 declare i32 @read_memory_only() readonly nounwind willreturn speculatable
 
 !llvm.dbg.cu = !{!0}



More information about the llvm-commits mailing list