[llvm] [X86, SimplifyCFG] Support hoisting load/store with conditional faulting (Part II) (PR #108812)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 16 03:10:53 PDT 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/108812
This is a follow up of #96878 to support hoisting load/store for diamond CFG.
```
void test (int a, int *c, int *d) {
if (a)
*c = a;
else
*d = a;
}
```
>From 3a4796ccffe6d3ede7a4e19b7df55e9f88eab2a0 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 16 Sep 2024 17:52:53 +0800
Subject: [PATCH] [X86,SimplifyCFG] Support hoisting load/store with
conditional faulting (Part II)
This is a follow up of #96878 to support hoisting load/store for diamond CFG.
```
void test (int a, int *c, int *d) {
if (a)
*c = a;
else
*d = a;
}
```
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 107 ++++++++++++++----
.../X86/hoist-loads-stores-with-cf.ll | 46 ++++++--
2 files changed, 118 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index f9db996cdc3583..420c59789813c1 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -283,7 +283,7 @@ class SimplifyCFGOpt {
bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
IRBuilder<> &Builder);
- bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
+ bool hoistCommonCodeFromSuccessors(Instruction *TI, bool EqTermsOnly);
bool hoistSuccIdenticalTerminatorToSwitchOrIf(
Instruction *TI, Instruction *I1,
SmallVectorImpl<Instruction *> &OtherSuccTIs);
@@ -1615,12 +1615,31 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
return false;
}
+static bool isSafeCheapLoadStore(const Instruction *I,
+ const TargetTransformInfo &TTI) {
+ // Not handle volatile or atomic.
+ if (auto *L = dyn_cast<LoadInst>(I)) {
+ if (!L->isSimple())
+ return false;
+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
+ if (!S->isSimple())
+ return false;
+ } else
+ return false;
+
+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
+ // That's why we have the alignment limitation.
+ // FIXME: Update the prototype of the intrinsics?
+ return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
+ getLoadStoreAlignment(I) < Value::MaximumAlignment;
+}
+
/// Hoist any common code in the successor blocks up into the block. This
/// function guarantees that BB dominates all successors. If EqTermsOnly is
/// given, only perform hoisting in case both blocks only contain a terminator.
/// In that case, only the original BI will be replaced and selects for PHIs are
/// added.
-bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
+bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
bool EqTermsOnly) {
// This does very trivial matching, with limited scanning, to find identical
// instructions in the two blocks. In particular, we don't want to get into
@@ -1628,6 +1647,7 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
// such, we currently just scan for obviously identical instructions in an
// identical order, possibly separated by the same number of non-identical
// instructions.
+ BasicBlock *BB = TI->getParent();
unsigned int SuccSize = succ_size(BB);
if (SuccSize < 2)
return false;
@@ -1639,7 +1659,63 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
return false;
- auto *TI = BB->getTerminator();
+ auto *BI = dyn_cast<BranchInst>(TI);
+ if (BI && HoistLoadsStoresWithCondFaulting &&
+ Options.HoistLoadsStoresWithCondFaulting) {
+ SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+ for (auto *Succ : successors(BB)) {
+ for (Instruction &I : drop_end(*Succ)) {
+ if (!isSafeCheapLoadStore(&I, TTI) ||
+ SpeculatedConditionalLoadsStores.size() ==
+ HoistLoadsStoresWithCondFaultingThreshold)
+ return false;
+ SpeculatedConditionalLoadsStores.push_back(&I);
+ }
+ }
+
+ // TODO: Move below code to a function to share with #96878.
+ if (SpeculatedConditionalLoadsStores.empty())
+ return false;
+
+ auto &Context = BI->getParent()->getContext();
+ auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
+ auto *Cond = BI->getOperand(0);
+ IRBuilder<> Builder(BI);
+ Value *Mask1 = Builder.CreateBitCast(Cond, VCondTy);
+ Value *Mask0 = Builder.CreateBitCast(
+ Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
+ for (auto *I : SpeculatedConditionalLoadsStores) {
+ Value *Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
+ assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
+ auto *Op0 = I->getOperand(0);
+ Instruction *MaskedLoadStore = nullptr;
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ // Handle Load.
+ auto *Ty = I->getType();
+ MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
+ Op0, LI->getAlign(), Mask);
+ I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+ } else {
+ // Handle Store.
+ auto *StoredVal =
+ Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+ MaskedLoadStore = Builder.CreateMaskedStore(
+ StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
+ }
+ I->dropUBImplyingAttrsAndUnknownMetadata(
+ {LLVMContext::MD_range, LLVMContext::MD_annotation});
+ // FIXME: DIAssignID is not supported for masked store yet.
+ // (Verifier::visitDIAssignIDMetadata)
+ at::deleteAssignmentMarkers(I);
+ I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
+ return Node->getMetadataID() == Metadata::DIAssignIDKind;
+ });
+ MaskedLoadStore->copyMetadata(*I);
+ I->eraseFromParent();
+ }
+
+ return true;
+ }
// The second of pair is a SkipFlags bitmask.
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
@@ -2998,25 +3074,6 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
return BIEndProb < Likely;
}
-static bool isSafeCheapLoadStore(const Instruction *I,
- const TargetTransformInfo &TTI) {
- // Not handle volatile or atomic.
- if (auto *L = dyn_cast<LoadInst>(I)) {
- if (!L->isSimple())
- return false;
- } else if (auto *S = dyn_cast<StoreInst>(I)) {
- if (!S->isSimple())
- return false;
- } else
- return false;
-
- // llvm.masked.load/store use i32 for alignment while load/store use i64.
- // That's why we have the alignment limitation.
- // FIXME: Update the prototype of the intrinsics?
- return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
- getLoadStoreAlignment(I) < Value::MaximumAlignment;
-}
-
/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -7436,7 +7493,7 @@ bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
return requestResimplify();
if (HoistCommon &&
- hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
+ hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
return requestResimplify();
return false;
@@ -7794,8 +7851,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
// can hoist it up to the branching block.
if (BI->getSuccessor(0)->getSinglePredecessor()) {
if (BI->getSuccessor(1)->getSinglePredecessor()) {
- if (HoistCommon && hoistCommonCodeFromSuccessors(
- BI->getParent(), !Options.HoistCommonInsts))
+ if (HoistCommon &&
+ hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
return requestResimplify();
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 047ca717da8009..87ff4ba2af9a41 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -278,21 +278,19 @@ if.false: ; preds = %if.true, %entry
}
;; Both of successor 0 and successor 1 have a single predecessor.
-;; TODO: Support transform for this case.
define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @single_predecessor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK: common.ret:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL]], true
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
; CHECK-NEXT: ret void
-; CHECK: if.end:
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: br label [[COMMON_RET:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
-; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -674,6 +672,34 @@ if.false:
ret void
}
+define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
+; CHECK-LABEL: @diamondCFG(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[TOBOOL_NOT]], true
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TMP1]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tobool.not = icmp eq i32 %a, 0
+ br i1 %tobool.not, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ store i32 %a, ptr %c, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ store i32 0, ptr %d, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
!llvm.dbg.cu = !{!0}
More information about the llvm-commits
mailing list