[llvm] [X86, SimplifyCFG] Support hoisting load/store with conditional faulting (Part II) (PR #108812)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Nov 24 22:13:56 PST 2024
https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/108812
>From 1077be1b780dfb905620c5b86adad4ed97ddd6eb Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Sat, 21 Sep 2024 20:24:34 +0800
Subject: [PATCH 01/10] [X86,SimplifyCFG] Support hoisting load/store with
conditional faulting (Part II)
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 42 ++++++++++++++---
.../X86/hoist-loads-stores-with-cf.ll | 46 +++++++++++++++----
2 files changed, 72 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 03db86a3baae7a..60e3b50b9b3534 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1663,18 +1663,29 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
static void hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
- bool Invert) {
+ std::optional<bool> Invert) {
auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
// Construct the condition if needed.
BasicBlock *BB = BI->getParent();
- IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
- Value *Mask = Builder.CreateBitCast(
- Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
- VCondTy);
+ IRBuilder<> Builder(Invert ? SpeculatedConditionalLoadsStores.back() : BI);
+ Value *Mask = nullptr;
+ Value *Mask0 = nullptr;
+ Value *Mask1 = nullptr;
+ if (Invert) {
+ Mask = Builder.CreateBitCast(
+ *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
+ VCondTy);
+ } else {
+ Mask0 = Builder.CreateBitCast(
+ Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
+ Mask1 = Builder.CreateBitCast(Cond, VCondTy);
+ }
for (auto *I : SpeculatedConditionalLoadsStores) {
- IRBuilder<> Builder(I);
+ IRBuilder<> Builder(Invert ? I : BI);
+ if (!Invert)
+ Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
// extended for vector types in the future.
@@ -1771,6 +1782,25 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
return false;
+ auto *BI = dyn_cast<BranchInst>(TI);
+ if (BI && HoistLoadsStoresWithCondFaulting &&
+ Options.HoistLoadsStoresWithCondFaulting) {
+ SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+ for (auto *Succ : successors(BB)) {
+ for (Instruction &I : drop_end(*Succ)) {
+ if (!isSafeCheapLoadStore(&I, TTI) ||
+ SpeculatedConditionalLoadsStores.size() ==
+ HoistLoadsStoresWithCondFaultingThreshold)
+ return false;
+ SpeculatedConditionalLoadsStores.push_back(&I);
+ }
+ }
+
+ if (!SpeculatedConditionalLoadsStores.empty())
+ hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+ std::nullopt);
+ }
+
// The second of pair is a SkipFlags bitmask.
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
SmallVector<SuccIterPair, 8> SuccIterPairs;
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 6ea0cf290ffc82..6a776abd6008d1 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -276,21 +276,19 @@ if.false: ; preds = %if.true, %entry
}
;; Both of successor 0 and successor 1 have a single predecessor.
-;; TODO: Support transform for this case.
define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @single_predecessor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
-; CHECK: common.ret:
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
; CHECK-NEXT: ret void
-; CHECK: if.end:
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: br label [[COMMON_RET:%.*]]
-; CHECK: if.then:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
-; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: br label [[COMMON_RET]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -728,6 +726,34 @@ if.true:
ret i32 %res
}
+define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
+; CHECK-LABEL: @diamondCFG(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL_NOT]], true
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tobool.not = icmp eq i32 %a, 0
+ br i1 %tobool.not, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ store i32 %a, ptr %c, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ store i32 0, ptr %d, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
!llvm.dbg.cu = !{!0}
>From 9d73fdde331b875d4d210c90a811a4224080afd3 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Sep 2024 17:45:41 +0800
Subject: [PATCH 02/10] Address review comments
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 26 +++++++++------
.../X86/hoist-loads-stores-with-cf.ll | 33 ++-----------------
2 files changed, 18 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 60e3b50b9b3534..97080aa975147e 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1671,21 +1671,26 @@ static void hoistConditionalLoadsStores(
BasicBlock *BB = BI->getParent();
IRBuilder<> Builder(Invert ? SpeculatedConditionalLoadsStores.back() : BI);
Value *Mask = nullptr;
- Value *Mask0 = nullptr;
- Value *Mask1 = nullptr;
+ Value *MaskFalse = nullptr;
+ Value *MaskTrue = nullptr;
if (Invert) {
Mask = Builder.CreateBitCast(
*Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
} else {
- Mask0 = Builder.CreateBitCast(
+ MaskFalse = Builder.CreateBitCast(
Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
- Mask1 = Builder.CreateBitCast(Cond, VCondTy);
+ MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
}
+ auto PeekThroughBitcasts = [](Value *V) {
+ while (auto *BitCast = dyn_cast<BitCastInst>(V))
+ V = BitCast->getOperand(0);
+ return V;
+ };
for (auto *I : SpeculatedConditionalLoadsStores) {
IRBuilder<> Builder(Invert ? I : BI);
- if (!Invert)
- Mask = I->getParent() == BI->getSuccessor(0) ? Mask1 : Mask0;
+ if (!Mask)
+ Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
// extended for vector types in the future.
@@ -1699,8 +1704,9 @@ static void hoistConditionalLoadsStores(
Value *PassThru = nullptr;
for (User *U : I->users())
if ((PN = dyn_cast<PHINode>(U))) {
- PassThru = Builder.CreateBitCast(PN->getIncomingValueForBlock(BB),
- FixedVectorType::get(Ty, 1));
+ PassThru = Builder.CreateBitCast(
+ PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
+ FixedVectorType::get(Ty, 1));
break;
}
MaskedLoadStore = Builder.CreateMaskedLoad(
@@ -1711,8 +1717,8 @@ static void hoistConditionalLoadsStores(
I->replaceAllUsesWith(NewLoadStore);
} else {
// Handle Store.
- auto *StoredVal =
- Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+ auto *StoredVal = Builder.CreateBitCast(
+ PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
MaskedLoadStore = Builder.CreateMaskedStore(
StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
}
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 6a776abd6008d1..d88366eb8116c2 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -284,10 +284,9 @@ define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
; CHECK-NEXT: ret void
;
entry:
@@ -726,34 +725,6 @@ if.true:
ret i32 %res
}
-define void @diamondCFG(i32 %a, ptr %c, ptr %d) {
-; CHECK-LABEL: @diamondCFG(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL_NOT]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL_NOT]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[D:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[A]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[C:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: ret void
-;
-entry:
- %tobool.not = icmp eq i32 %a, 0
- br i1 %tobool.not, label %if.else, label %if.then
-
-if.then: ; preds = %entry
- store i32 %a, ptr %c, align 4
- br label %if.end
-
-if.else: ; preds = %entry
- store i32 0, ptr %d, align 4
- br label %if.end
-
-if.end: ; preds = %if.else, %if.then
- ret void
-}
-
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
!llvm.dbg.cu = !{!0}
>From 15e25fb9527527e479ef0987600c45081833bcb4 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Sep 2024 21:58:43 +0800
Subject: [PATCH 03/10] Revert one change
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 2 +-
.../Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 97080aa975147e..97216d116b29c6 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1689,7 +1689,7 @@ static void hoistConditionalLoadsStores(
};
for (auto *I : SpeculatedConditionalLoadsStores) {
IRBuilder<> Builder(Invert ? I : BI);
- if (!Mask)
+ if (!Invert)
Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index d88366eb8116c2..bb9c567b386252 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -284,9 +284,9 @@ define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP2]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
; CHECK-NEXT: ret void
;
entry:
>From 40e21304b32fbc80359d8cc3d1448c0c859283b0 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Sep 2024 22:25:00 +0800
Subject: [PATCH 04/10] Early return after hoist load/store
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 97216d116b29c6..a0aafc360ff8a8 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1660,7 +1660,7 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
/// \endcode
///
/// So we need to turn hoisted load/store into cload/cstore.
-static void hoistConditionalLoadsStores(
+static bool hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
std::optional<bool> Invert) {
@@ -1742,6 +1742,7 @@ static void hoistConditionalLoadsStores(
MaskedLoadStore->copyMetadata(*I);
I->eraseFromParent();
}
+ return true;
}
static bool isSafeCheapLoadStore(const Instruction *I,
@@ -1803,8 +1804,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
}
if (!SpeculatedConditionalLoadsStores.empty())
- hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
- std::nullopt);
+ return hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+ std::nullopt);
}
// The second of pair is a SkipFlags bitmask.
>From d6d50c4abf9636bd2f21b924193e0fa680f37ec6 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 25 Sep 2024 15:40:37 +0800
Subject: [PATCH 05/10] Limit TrueBB and False BB has no more than 1 successor
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 26 ++++++++++++-------
.../X86/hoist-loads-stores-with-cf.ll | 9 ++++---
2 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a0aafc360ff8a8..91ae73fae0ef71 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1702,13 +1702,14 @@ static bool hoistConditionalLoadsStores(
auto *Ty = I->getType();
PHINode *PN = nullptr;
Value *PassThru = nullptr;
- for (User *U : I->users())
- if ((PN = dyn_cast<PHINode>(U))) {
- PassThru = Builder.CreateBitCast(
- PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
- FixedVectorType::get(Ty, 1));
- break;
- }
+ if (Invert)
+ for (User *U : I->users())
+ if ((PN = dyn_cast<PHINode>(U))) {
+ PassThru = Builder.CreateBitCast(
+ PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
+ FixedVectorType::get(Ty, 1));
+ break;
+ }
MaskedLoadStore = Builder.CreateMaskedLoad(
FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
@@ -1794,11 +1795,16 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
Options.HoistLoadsStoresWithCondFaulting) {
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
for (auto *Succ : successors(BB)) {
- for (Instruction &I : drop_end(*Succ)) {
- if (!isSafeCheapLoadStore(&I, TTI) ||
+ for (Instruction &I : *Succ) {
+ if (I.isTerminator()) {
+ if (I.getNumSuccessors() > 1)
+ return false;
+ continue;
+ } else if (!isSafeCheapLoadStore(&I, TTI) ||
SpeculatedConditionalLoadsStores.size() ==
- HoistLoadsStoresWithCondFaultingThreshold)
+ HoistLoadsStoresWithCondFaultingThreshold) {
return false;
+ }
SpeculatedConditionalLoadsStores.push_back(&I);
}
}
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index bb9c567b386252..c888b8182bdc44 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -276,7 +276,7 @@ if.false: ; preds = %if.true, %entry
}
;; Both of successor 0 and successor 1 have a single predecessor.
-define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
+define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-LABEL: @single_predecessor(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
@@ -287,7 +287,8 @@ define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: ret void
+; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3
+; CHECK-NEXT: ret i32 [[DOT]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -295,12 +296,12 @@ entry:
if.end:
store i32 1, ptr %q
- ret void
+ ret i32 2
if.then:
%0 = load i32, ptr %q
store i32 %0, ptr %p
- ret void
+ ret i32 3
}
;; Hoist 6 stores.
>From de28ed92a001488b47fe8723c6182566a0828360 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Wed, 25 Sep 2024 15:58:04 +0800
Subject: [PATCH 06/10] clang-format
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 91ae73fae0ef71..066b899b57011a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1801,8 +1801,8 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
return false;
continue;
} else if (!isSafeCheapLoadStore(&I, TTI) ||
- SpeculatedConditionalLoadsStores.size() ==
- HoistLoadsStoresWithCondFaultingThreshold) {
+ SpeculatedConditionalLoadsStores.size() ==
+ HoistLoadsStoresWithCondFaultingThreshold) {
return false;
}
SpeculatedConditionalLoadsStores.push_back(&I);
>From 8ec9409e9e46439bc4c29a846ecd81e9a69dc4e5 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Thu, 26 Sep 2024 17:24:17 +0800
Subject: [PATCH 07/10] Use `Invert.has_value()`
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 066b899b57011a..5d6a348c6dd943 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1669,11 +1669,12 @@ static bool hoistConditionalLoadsStores(
auto *Cond = BI->getOperand(0);
// Construct the condition if needed.
BasicBlock *BB = BI->getParent();
- IRBuilder<> Builder(Invert ? SpeculatedConditionalLoadsStores.back() : BI);
+ IRBuilder<> Builder(
+ Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
Value *Mask = nullptr;
Value *MaskFalse = nullptr;
Value *MaskTrue = nullptr;
- if (Invert) {
+ if (Invert.has_value()) {
Mask = Builder.CreateBitCast(
*Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
@@ -1688,8 +1689,8 @@ static bool hoistConditionalLoadsStores(
return V;
};
for (auto *I : SpeculatedConditionalLoadsStores) {
- IRBuilder<> Builder(Invert ? I : BI);
- if (!Invert)
+ IRBuilder<> Builder(Invert.has_value() ? I : BI);
+ if (!Invert.has_value())
Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
// We currently assume conditional faulting load/store is supported for
// scalar types only when creating new instructions. This can be easily
@@ -1702,7 +1703,7 @@ static bool hoistConditionalLoadsStores(
auto *Ty = I->getType();
PHINode *PN = nullptr;
Value *PassThru = nullptr;
- if (Invert)
+ if (Invert.has_value())
for (User *U : I->users())
if ((PN = dyn_cast<PHINode>(U))) {
PassThru = Builder.CreateBitCast(
>From fc7df9a6b8ccb544bc31f4c30a2ad690275acf78 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Thu, 26 Sep 2024 21:01:45 +0800
Subject: [PATCH 08/10] Address review comment
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 51 ++++++++++++-----------
1 file changed, 27 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5d6a348c6dd943..621633eab8637c 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1791,30 +1791,6 @@ bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
return false;
- auto *BI = dyn_cast<BranchInst>(TI);
- if (BI && HoistLoadsStoresWithCondFaulting &&
- Options.HoistLoadsStoresWithCondFaulting) {
- SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
- for (auto *Succ : successors(BB)) {
- for (Instruction &I : *Succ) {
- if (I.isTerminator()) {
- if (I.getNumSuccessors() > 1)
- return false;
- continue;
- } else if (!isSafeCheapLoadStore(&I, TTI) ||
- SpeculatedConditionalLoadsStores.size() ==
- HoistLoadsStoresWithCondFaultingThreshold) {
- return false;
- }
- SpeculatedConditionalLoadsStores.push_back(&I);
- }
- }
-
- if (!SpeculatedConditionalLoadsStores.empty())
- return hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
- std::nullopt);
- }
-
// The second of pair is a SkipFlags bitmask.
using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
SmallVector<SuccIterPair, 8> SuccIterPairs;
@@ -7859,6 +7835,33 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (HoistCommon &&
hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
return requestResimplify();
+
+ if (BI && HoistLoadsStoresWithCondFaulting &&
+ Options.HoistLoadsStoresWithCondFaulting) {
+ SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
+ auto CanSpeculateConditionalLoadsStores = [&]() {
+ for (auto *Succ : successors(BB)) {
+ for (Instruction &I : *Succ) {
+ if (I.isTerminator()) {
+ if (I.getNumSuccessors() > 1)
+ return false;
+ continue;
+ } else if (!isSafeCheapLoadStore(&I, TTI) ||
+ SpeculatedConditionalLoadsStores.size() ==
+ HoistLoadsStoresWithCondFaultingThreshold) {
+ return false;
+ }
+ SpeculatedConditionalLoadsStores.push_back(&I);
+ }
+ }
+ return !SpeculatedConditionalLoadsStores.empty();
+ };
+
+ if (CanSpeculateConditionalLoadsStores() &&
+ hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+ std::nullopt))
+ return requestResimplify();
+ }
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
// execute Successor #0 if it branches to Successor #1.
>From dfe6cc6986d7f46350b087b3d46aeb6bed1fa6e9 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Fri, 27 Sep 2024 16:20:18 +0800
Subject: [PATCH 09/10] Address review comments
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 21 +++++++----
.../X86/hoist-loads-stores-with-cf.ll | 37 +++++++++++++++++++
2 files changed, 50 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 621633eab8637c..8e0b5c5a578ba7 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1660,7 +1660,7 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
/// \endcode
///
/// So we need to turn hoisted load/store into cload/cstore.
-static bool hoistConditionalLoadsStores(
+static void hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
std::optional<bool> Invert) {
@@ -1744,7 +1744,6 @@ static bool hoistConditionalLoadsStores(
MaskedLoadStore->copyMetadata(*I);
I->eraseFromParent();
}
- return true;
}
static bool isSafeCheapLoadStore(const Instruction *I,
@@ -3133,7 +3132,8 @@ static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
return HaveRewritablePHIs;
}
-static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
+static bool isProfitableToSpeculate(const BranchInst *BI,
+ std::optional<bool> Invert,
const TargetTransformInfo &TTI) {
// If the branch is non-unpredictable, and is predicted to *not* branch to
// the `then` block, then avoid speculating it.
@@ -3144,7 +3144,10 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
return true;
- uint64_t EndWeight = Invert ? TWeight : FWeight;
+ if (!Invert.has_value())
+ return false;
+
+ uint64_t EndWeight = *Invert ? TWeight : FWeight;
BranchProbability BIEndProb =
BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
BranchProbability Likely = TTI.getPredictableBranchThreshold();
@@ -7837,7 +7840,8 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return requestResimplify();
if (BI && HoistLoadsStoresWithCondFaulting &&
- Options.HoistLoadsStoresWithCondFaulting) {
+ Options.HoistLoadsStoresWithCondFaulting &&
+ isProfitableToSpeculate(BI, std::nullopt, TTI)) {
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
auto CanSpeculateConditionalLoadsStores = [&]() {
for (auto *Succ : successors(BB)) {
@@ -7857,10 +7861,11 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
return !SpeculatedConditionalLoadsStores.empty();
};
- if (CanSpeculateConditionalLoadsStores() &&
- hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
- std::nullopt))
+ if (CanSpeculateConditionalLoadsStores()) {
+ hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
+ std::nullopt);
return requestResimplify();
+ }
}
} else {
// If Successor #1 has multiple preds, we may be able to conditionally
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index c888b8182bdc44..b4a77fa5563cda 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -726,6 +726,43 @@ if.true:
ret i32 %res
}
+define i32 @multi_successors(i1 %c1, i32 %c2, ptr %p) {
+; CHECK-LABEL: @multi_successors(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]]
+; CHECK: entry.if:
+; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: switch i32 [[C2:%.*]], label [[COMMON_RET]] [
+; CHECK-NEXT: i32 0, label [[SW_BB:%.*]]
+; CHECK-NEXT: i32 1, label [[SW_BB]]
+; CHECK-NEXT: ]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL]], [[ENTRY_IF]] ], [ 0, [[SW_BB]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: sw.bb:
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ br i1 %c1, label %entry.if, label %entry.else
+
+entry.if: ; preds = %entry
+ %val = load i32, ptr %p, align 4
+ switch i32 %c2, label %return [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb
+ ]
+
+entry.else: ; preds = %entry
+ ret i32 0
+
+sw.bb: ; preds = %entry.if, %entry.if
+ br label %return
+
+return: ; preds = %sw.bb, %entry.if
+ %ret = phi i32 [ %val, %entry.if ], [ 0, %sw.bb ]
+ ret i32 %ret
+}
+
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
!llvm.dbg.cu = !{!0}
>From b1bda56b3f6514fb72bae2227270aeb7ff87a081 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 25 Nov 2024 14:13:33 +0800
Subject: [PATCH 10/10] Address review comments
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 +++++
.../Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll | 5 +++--
2 files changed, 8 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2f02591eb7f9c6..52a678e3f0bcbc 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1661,6 +1661,11 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
/// \endcode
///
/// So we need to turn hoisted load/store into cload/cstore.
+///
+/// \param BI The branch instruction.
+/// \param SpeculatedConditionalLoadsStores The load/store instructions that
+/// will be speculated.
+/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
static void hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 1a826fbda91d95..5c9058b4823202 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -757,8 +757,9 @@ if.true:
ret i32 %res
}
-define i32 @multi_successors(i1 %c1, i32 %c2, ptr %p) {
-; CHECK-LABEL: @multi_successors(
+;; Not transform if either BB has multiple successors.
+define i32 @not_multi_successors(i1 %c1, i32 %c2, ptr %p) {
+; CHECK-LABEL: @not_multi_successors(
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 [[C1:%.*]], label [[ENTRY_IF:%.*]], label [[COMMON_RET:%.*]]
; CHECK: entry.if:
More information about the llvm-commits
mailing list