[llvm] 369be31 - [X86, SimplifyCFG] Support conditional faulting load or store only (#132032)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 06:19:50 PDT 2025
Author: Phoebe Wang
Date: 2025-03-21T21:19:46+08:00
New Revision: 369be311a7b8344699d12719a8fa557fe8934e46
URL: https://github.com/llvm/llvm-project/commit/369be311a7b8344699d12719a8fa557fe8934e46
DIFF: https://github.com/llvm/llvm-project/commit/369be311a7b8344699d12719a8fa557fe8934e46.diff
LOG: [X86,SimplifyCFG] Support conditional faulting load or store only (#132032)
This is to fix a bug when a target only support conditional faulting
load, see test case hoist_store_without_cstore.
Split `-simplifycfg-hoist-loads-stores-with-cond-faulting` into
`-simplifycfg-hoist-loads-with-cond-faulting` and
`-simplifycfg-hoist-stores-with-cond-faulting` to control conditional
faulting load and store respectively.
Added:
Modified:
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 2de966e00542d..02f1d08759129 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -120,11 +120,13 @@ static cl::opt<bool>
HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
cl::desc("Hoist common instructions up to the parent block"));
-static cl::opt<bool> HoistLoadsStoresWithCondFaulting(
- "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
- cl::init(true),
- cl::desc("Hoist loads/stores if the target supports "
- "conditional faulting"));
+static cl::opt<bool> HoistLoadsWithCondFaulting(
+ "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
+ cl::desc("Hoist loads if the target supports conditional faulting"));
+
+static cl::opt<bool> HoistStoresWithCondFaulting(
+ "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
+ cl::desc("Hoist stores if the target supports conditional faulting"));
static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
@@ -1682,22 +1684,22 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
static void hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
- std::optional<bool> Invert) {
+ std::optional<bool> Invert, Instruction *Sel) {
auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
// Construct the condition if needed.
BasicBlock *BB = BI->getParent();
- IRBuilder<> Builder(
- Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
Value *Mask = nullptr;
Value *MaskFalse = nullptr;
Value *MaskTrue = nullptr;
if (Invert.has_value()) {
+ IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
Mask = Builder.CreateBitCast(
*Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
} else {
+ IRBuilder<> Builder(BI);
MaskFalse = Builder.CreateBitCast(
Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
@@ -1723,13 +1725,20 @@ static void hoistConditionalLoadsStores(
PHINode *PN = nullptr;
Value *PassThru = nullptr;
if (Invert.has_value())
- for (User *U : I->users())
+ for (User *U : I->users()) {
if ((PN = dyn_cast<PHINode>(U))) {
PassThru = Builder.CreateBitCast(
PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
FixedVectorType::get(Ty, 1));
- break;
+ } else if (auto *Ins = cast<Instruction>(U);
+ Sel && Ins->getParent() == BB) {
+ // This happens when store or/and a speculative instruction between
+ // load and store were hoisted to the BB. Make sure the masked load
+ // inserted before its use.
+ // We assume there's one of such use.
+ Builder.SetInsertPoint(Ins);
}
+ }
MaskedLoadStore = Builder.CreateMaskedLoad(
FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
@@ -1770,10 +1779,10 @@ static bool isSafeCheapLoadStore(const Instruction *I,
// Not handle volatile or atomic.
bool IsStore = false;
if (auto *L = dyn_cast<LoadInst>(I)) {
- if (!L->isSimple())
+ if (!L->isSimple() || !HoistLoadsWithCondFaulting)
return false;
} else if (auto *S = dyn_cast<StoreInst>(I)) {
- if (!S->isSimple())
+ if (!S->isSimple() || !HoistStoresWithCondFaulting)
return false;
IsStore = true;
} else
@@ -3214,8 +3223,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
unsigned SpeculatedInstructions = 0;
- bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
- Options.HoistLoadsStoresWithCondFaulting;
+ bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
@@ -3310,6 +3318,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
// If we get here, we can hoist the instruction and if-convert.
LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ Instruction *Sel = nullptr;
// Insert a select of the value of the speculated store.
if (SpeculatedStoreValue) {
IRBuilder<NoFolder> Builder(BI);
@@ -3320,6 +3329,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
std::swap(TrueV, FalseV);
Value *S = Builder.CreateSelect(
BrCond, TrueV, FalseV, "spec.store.select", BI);
+ Sel = cast<Instruction>(S);
SpeculatedStore->setOperand(0, S);
SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
SpeculatedStore->getDebugLoc());
@@ -3392,7 +3402,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
std::prev(ThenBB->end()));
if (!SpeculatedConditionalLoadsStores.empty())
- hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
+ hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
+ Sel);
// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
@@ -8020,8 +8031,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
return requestResimplify();
- if (BI && HoistLoadsStoresWithCondFaulting &&
- Options.HoistLoadsStoresWithCondFaulting &&
+ if (BI && Options.HoistLoadsStoresWithCondFaulting &&
isProfitableToSpeculate(BI, std::nullopt, TTI)) {
SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
auto CanSpeculateConditionalLoadsStores = [&]() {
@@ -8044,7 +8054,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (CanSpeculateConditionalLoadsStores()) {
hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
- std::nullopt);
+ std::nullopt, nullptr);
return requestResimplify();
}
}
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 5c9058b482320..100806612dffc 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -1,24 +1,41 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-hoist-loads-with-cond-faulting=false -S | FileCheck %s --check-prefixes=CHECK,NONE,STOREONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-hoist-stores-with-cond-faulting=false -S | FileCheck %s --check-prefixes=CHECK,NONE,LOADONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -simplifycfg-hoist-stores-with-cond-faulting=false -simplifycfg-hoist-loads-with-cond-faulting=false -S | FileCheck %s --check-prefixes=CHECK,NONE,NONEONLY
;; Basic case: check masked.load/store is generated for i16/i32/i64.
define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @basic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
-; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
-; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @basic(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; LOADSTORE-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
+; LOADSTORE-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @basic(
+; NONE-NEXT: entry:
+; NONE-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_END:%.*]]
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT: [[TMP1:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
+; NONE-NEXT: store i16 [[TMP0]], ptr [[B]], align 2
+; NONE-NEXT: store i32 [[TMP1]], ptr [[P]], align 4
+; NONE-NEXT: store i64 [[TMP2]], ptr [[Q]], align 8
+; NONE-NEXT: br label [[IF_END]]
+; NONE: if.end:
+; NONE-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
@@ -41,16 +58,27 @@ if.end:
;; Successor 1 branches to successor 0.
define void @succ1to0(ptr %p, ptr %q, i32 %a) {
-; CHECK-LABEL: @succ1to0(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @succ1to0(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @succ1to0(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; NONE: if.end:
+; NONE-NEXT: ret void
+; NONE: if.then:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[IF_END]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -67,14 +95,45 @@ if.then:
;; Successor 1 branches to successor 0 and there is a phi node.
define i32 @succ1to0_phi(ptr %p) {
-; CHECK-LABEL: @succ1to0_phi(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LOADSTORE-LABEL: @succ1to0_phi(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: ret i32 [[TMP3]]
+;
+; STOREONLY-LABEL: @succ1to0_phi(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; STOREONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; STOREONLY: if.false:
+; STOREONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; STOREONLY-NEXT: br label [[IF_TRUE]]
+; STOREONLY: if.true:
+; STOREONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; STOREONLY-NEXT: ret i32 [[RES]]
+;
+; LOADONLY-LABEL: @succ1to0_phi(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADONLY-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADONLY-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADONLY-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADONLY-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADONLY-NEXT: ret i32 [[TMP3]]
+;
+; NONEONLY-LABEL: @succ1to0_phi(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONEONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; NONEONLY-NEXT: br label [[IF_TRUE]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONEONLY-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
@@ -91,16 +150,28 @@ if.true:
;; Successor 0 branches to successor 1.
define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @succ0to1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @succ0to1(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @succ0to1(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE: if.false:
+; NONE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; NONE-NEXT: ret void
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[IF_FALSE]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -121,16 +192,29 @@ if.end:
;; Load after store can be hoisted.
define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
-; CHECK-LABEL: @load_after_store(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
-; CHECK-NEXT: ret i64 [[SPEC_SELECT]]
+; LOADSTORE-LABEL: @load_after_store(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
+; LOADSTORE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
+; LOADSTORE-NEXT: ret i64 [[SPEC_SELECT]]
+;
+; NONE-LABEL: @load_after_store(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[COMMON_RET:%.*]]
+; NONE: common.ret:
+; NONE-NEXT: [[COMMON_RET_OP:%.*]] = phi i64 [ [[ZEXT:%.*]], [[IF_TRUE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONE-NEXT: ret i64 [[COMMON_RET_OP]]
+; NONE: if.true:
+; NONE-NEXT: store i32 1, ptr [[B:%.*]], align 4
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT: [[ZEXT]] = zext i16 [[TMP0]] to i64
+; NONE-NEXT: br label [[COMMON_RET]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -148,15 +232,49 @@ if.end:
;; Speculatable memory read doesn't prevent the hoist.
define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
-; CHECK-LABEL: @load_skip_speculatable_memory_read(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @load_skip_speculatable_memory_read(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: ret void
+;
+; STOREONLY-LABEL: @load_skip_speculatable_memory_read(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; STOREONLY-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; STOREONLY-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; STOREONLY-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; STOREONLY-NEXT: ret void
+;
+; LOADONLY-LABEL: @load_skip_speculatable_memory_read(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; LOADONLY: if.false:
+; LOADONLY-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADONLY-NEXT: ret void
+; LOADONLY: if.true:
+; LOADONLY-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; LOADONLY-NEXT: store i32 [[READ]], ptr [[P:%.*]], align 4
+; LOADONLY-NEXT: br label [[IF_FALSE]]
+;
+; NONEONLY-LABEL: @load_skip_speculatable_memory_read(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONEONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; NONEONLY-NEXT: ret void
+; NONEONLY: if.true:
+; NONEONLY-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; NONEONLY-NEXT: store i32 [[READ]], ptr [[P:%.*]], align 4
+; NONEONLY-NEXT: br label [[IF_FALSE]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -177,15 +295,49 @@ if.end:
;; Source of the load can be a GEP.
define i32 @load_from_gep(ptr %p) {
-; CHECK-LABEL: @load_from_gep(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LOADSTORE-LABEL: @load_from_gep(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: ret i32 [[TMP3]]
+;
+; STOREONLY-LABEL: @load_from_gep(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; STOREONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; STOREONLY: if.false:
+; STOREONLY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; STOREONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; STOREONLY-NEXT: br label [[IF_TRUE]]
+; STOREONLY: if.true:
+; STOREONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; STOREONLY-NEXT: ret i32 [[RES]]
+;
+; LOADONLY-LABEL: @load_from_gep(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADONLY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; LOADONLY-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADONLY-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADONLY-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADONLY-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADONLY-NEXT: ret i32 [[TMP3]]
+;
+; NONEONLY-LABEL: @load_from_gep(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONEONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; NONEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; NONEONLY-NEXT: br label [[IF_TRUE]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONEONLY-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
@@ -203,18 +355,30 @@ if.true:
;; Metadata range/annotation are kept.
define void @nondebug_metadata(i1 %cond, ptr %p, ptr %q) {
-; CHECK-LABEL: @nondebug_metadata(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call range(i16 0, 10) <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META5:![0-9]+]]
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
-; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META5]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @nondebug_metadata(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call range(i16 0, 10) <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META5:![0-9]+]]
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META5]]
+; LOADSTORE-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @nondebug_metadata(
+; NONE-NEXT: entry:
+; NONE-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE: if.false:
+; NONE-NEXT: ret void
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2, !range [[RNG5:![0-9]+]]
+; NONE-NEXT: [[TMP1:%.*]] = load i32, ptr [[Q:%.*]], align 4, !annotation [[META6:![0-9]+]]
+; NONE-NEXT: store i16 [[TMP0]], ptr [[Q]], align 4, !annotation [[META6]]
+; NONE-NEXT: store i32 [[TMP1]], ptr [[P]], align 2
+; NONE-NEXT: br label [[IF_FALSE]]
;
entry:
br i1 %cond, label %if.true, label %if.false
@@ -231,12 +395,41 @@ if.true:
}
define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) {
-; CHECK-LABEL: @debug_metadata_diassign(
-; CHECK-NEXT: bb0:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
-; CHECK-NEXT: ret i16 [[SPEC_SELECT]]
+; LOADSTORE-LABEL: @debug_metadata_diassign(
+; LOADSTORE-NEXT: bb0:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
+; LOADSTORE-NEXT: ret i16 [[SPEC_SELECT]]
+;
+; STOREONLY-LABEL: @debug_metadata_diassign(
+; STOREONLY-NEXT: bb0:
+; STOREONLY-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> splat (i16 7), ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
+; STOREONLY-NEXT: ret i16 [[SPEC_SELECT]]
+;
+; LOADONLY-LABEL: @debug_metadata_diassign(
+; LOADONLY-NEXT: bb0:
+; LOADONLY-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; LOADONLY: if.true:
+; LOADONLY-NEXT: store i16 7, ptr [[P:%.*]], align 4, !DIAssignID [[DIASSIGNID7:![0-9]+]]
+; LOADONLY-NEXT: br label [[IF_FALSE]]
+; LOADONLY: if.false:
+; LOADONLY-NEXT: [[RET:%.*]] = phi i16 [ 2, [[BB0:%.*]] ], [ 3, [[IF_TRUE]] ]
+; LOADONLY-NEXT: #dbg_assign(i16 [[RET]], [[META8:![0-9]+]], !DIExpression(), [[DIASSIGNID7]], ptr [[P]], !DIExpression(), [[META11:![0-9]+]])
+; LOADONLY-NEXT: ret i16 [[RET]]
+;
+; NONEONLY-LABEL: @debug_metadata_diassign(
+; NONEONLY-NEXT: bb0:
+; NONEONLY-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: store i16 7, ptr [[P:%.*]], align 4, !DIAssignID [[DIASSIGNID7:![0-9]+]]
+; NONEONLY-NEXT: br label [[IF_FALSE]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: [[RET:%.*]] = phi i16 [ 2, [[BB0:%.*]] ], [ 3, [[IF_TRUE]] ]
+; NONEONLY-NEXT: #dbg_assign(i16 [[RET]], [[META8:![0-9]+]], !DIExpression(), [[DIASSIGNID7]], ptr [[P]], !DIExpression(), [[META11:![0-9]+]])
+; NONEONLY-NEXT: ret i16 [[RET]]
;
bb0:
br i1 %cond, label %if.true, label %if.false
@@ -253,14 +446,45 @@ if.false:
;; Not crash when working with opt controlled by simplifycfg-hoist-cond-stores.
define i32 @hoist_cond_stores(i1 %cond, ptr %p) {
-; CHECK-LABEL: @hoist_cond_stores(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: store i1 false, ptr [[P:%.*]], align 2
-; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND:%.*]], i1 false, i1 false
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2
-; CHECK-NEXT: ret i32 0
+; LOADSTORE-LABEL: @hoist_cond_stores(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: store i1 false, ptr [[P:%.*]], align 2
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND]], i1 false, i1 false
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2
+; LOADSTORE-NEXT: ret i32 0
+;
+; STOREONLY-LABEL: @hoist_cond_stores(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: store i1 false, ptr [[P:%.*]], align 2
+; STOREONLY-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; STOREONLY-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND]], i1 false, i1 false
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2
+; STOREONLY-NEXT: ret i32 0
+;
+; LOADONLY-LABEL: @hoist_cond_stores(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: store i1 false, ptr [[P:%.*]], align 2
+; LOADONLY-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; LOADONLY: if.true:
+; LOADONLY-NEXT: store i32 0, ptr [[P]], align 8
+; LOADONLY-NEXT: store i1 false, ptr [[P]], align 2
+; LOADONLY-NEXT: br label [[IF_FALSE]]
+; LOADONLY: if.false:
+; LOADONLY-NEXT: ret i32 0
+;
+; NONEONLY-LABEL: @hoist_cond_stores(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: store i1 false, ptr [[P:%.*]], align 2
+; NONEONLY-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: store i32 0, ptr [[P]], align 8
+; NONEONLY-NEXT: store i1 false, ptr [[P]], align 2
+; NONEONLY-NEXT: br label [[IF_FALSE]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: ret i32 0
;
entry:
store i1 false, ptr %p, align 2
@@ -277,18 +501,33 @@ if.false: ; preds = %if.true, %entry
;; Both of successor 0 and successor 1 have a single predecessor.
define i32 @single_predecessor(ptr %p, ptr %q, i32 %a) {
-; CHECK-LABEL: @single_predecessor(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3
-; CHECK-NEXT: ret i32 [[DOT]]
+; LOADSTORE-LABEL: @single_predecessor(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast i1 [[TOBOOL]] to <1 x i1>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP2]])
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 2, i32 3
+; LOADSTORE-NEXT: ret i32 [[DOT]]
+;
+; NONE-LABEL: @single_predecessor(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; NONE: common.ret:
+; NONE-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 2, [[IF_END]] ], [ 3, [[IF_THEN]] ]
+; NONE-NEXT: ret i32 [[COMMON_RET_OP]]
+; NONE: if.end:
+; NONE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; NONE-NEXT: br label [[COMMON_RET:%.*]]
+; NONE: if.then:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[COMMON_RET]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -306,16 +545,55 @@ if.then:
;; Hoist 6 stores.
define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6) {
-; CHECK-LABEL: @threshold_6(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @threshold_6(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: ret void
+;
+; STOREONLY-LABEL: @threshold_6(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 2), ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 3), ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 4), ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 5), ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 6), ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
+; STOREONLY-NEXT: ret void
+;
+; LOADONLY-LABEL: @threshold_6(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; LOADONLY: if.true:
+; LOADONLY-NEXT: store i32 1, ptr [[P1:%.*]], align 4
+; LOADONLY-NEXT: store i32 2, ptr [[P2:%.*]], align 4
+; LOADONLY-NEXT: store i32 3, ptr [[P3:%.*]], align 4
+; LOADONLY-NEXT: store i32 4, ptr [[P4:%.*]], align 4
+; LOADONLY-NEXT: store i32 5, ptr [[P5:%.*]], align 4
+; LOADONLY-NEXT: store i32 6, ptr [[P6:%.*]], align 4
+; LOADONLY-NEXT: br label [[IF_FALSE]]
+; LOADONLY: if.false:
+; LOADONLY-NEXT: ret void
+;
+; NONEONLY-LABEL: @threshold_6(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: store i32 1, ptr [[P1:%.*]], align 4
+; NONEONLY-NEXT: store i32 2, ptr [[P2:%.*]], align 4
+; NONEONLY-NEXT: store i32 3, ptr [[P3:%.*]], align 4
+; NONEONLY-NEXT: store i32 4, ptr [[P4:%.*]], align 4
+; NONEONLY-NEXT: store i32 5, ptr [[P5:%.*]], align 4
+; NONEONLY-NEXT: store i32 6, ptr [[P6:%.*]], align 4
+; NONEONLY-NEXT: br label [[IF_FALSE]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
@@ -578,16 +856,49 @@ if.true:
;; Not hoist if the branch is predictable and the `then` BB is not likely to execute.
define void @not_likely_to_execute(ptr %p, ptr %q, i32 %a) {
-; CHECK-LABEL: @not_likely_to_execute(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF6:![0-9]+]]
-; CHECK: if.end:
-; CHECK-NEXT: ret void
-; CHECK: if.then:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
-; CHECK-NEXT: br label [[IF_END]]
+; LOADSTORE-LABEL: @not_likely_to_execute(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF6:![0-9]+]]
+; LOADSTORE: if.end:
+; LOADSTORE-NEXT: ret void
+; LOADSTORE: if.then:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; LOADSTORE-NEXT: br label [[IF_END]]
+;
+; STOREONLY-LABEL: @not_likely_to_execute(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; STOREONLY-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF7:![0-9]+]]
+; STOREONLY: if.end:
+; STOREONLY-NEXT: ret void
+; STOREONLY: if.then:
+; STOREONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; STOREONLY-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; STOREONLY-NEXT: br label [[IF_END]]
+;
+; LOADONLY-LABEL: @not_likely_to_execute(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADONLY-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF12:![0-9]+]]
+; LOADONLY: if.end:
+; LOADONLY-NEXT: ret void
+; LOADONLY: if.then:
+; LOADONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; LOADONLY-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; LOADONLY-NEXT: br label [[IF_END]]
+;
+; NONEONLY-LABEL: @not_likely_to_execute(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; NONEONLY-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF12:![0-9]+]]
+; NONEONLY: if.end:
+; NONEONLY-NEXT: ret void
+; NONEONLY: if.then:
+; NONEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONEONLY-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONEONLY-NEXT: br label [[IF_END]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -671,18 +982,57 @@ if.false:
}
define i32 @succ_phi_has_3input(i1 %cond1, ptr %p, i1 %cond2) {
-; CHECK-LABEL: @succ_phi_has_3input(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
-; CHECK: bb1:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
-; CHECK-NEXT: br label [[BB3]]
-; CHECK: bb3:
-; CHECK-NEXT: [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
-; CHECK-NEXT: store i64 [[Y]], ptr [[P]], align 8
-; CHECK-NEXT: ret i32 0
+; LOADSTORE-LABEL: @succ_phi_has_3input(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
+; LOADSTORE: bb1:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
+; LOADSTORE-NEXT: br label [[BB3]]
+; LOADSTORE: bb3:
+; LOADSTORE-NEXT: [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
+; LOADSTORE-NEXT: store i64 [[Y]], ptr [[P]], align 8
+; LOADSTORE-NEXT: ret i32 0
+;
+; STOREONLY-LABEL: @succ_phi_has_3input(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND2_NOT:%.*]] = xor i1 [[COND2:%.*]], true
+; STOREONLY-NEXT: [[BRMERGE:%.*]] = select i1 [[COND1:%.*]], i1 true, i1 [[COND2_NOT]]
+; STOREONLY-NEXT: br i1 [[BRMERGE]], label [[BB3:%.*]], label [[BB2:%.*]]
+; STOREONLY: bb2:
+; STOREONLY-NEXT: [[X:%.*]] = load i64, ptr [[P:%.*]], align 8
+; STOREONLY-NEXT: br label [[BB3]]
+; STOREONLY: bb3:
+; STOREONLY-NEXT: [[Y:%.*]] = phi i64 [ [[X]], [[BB2]] ], [ 0, [[ENTRY:%.*]] ]
+; STOREONLY-NEXT: store i64 [[Y]], ptr [[P]], align 8
+; STOREONLY-NEXT: ret i32 0
+;
+; LOADONLY-LABEL: @succ_phi_has_3input(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: br i1 [[COND1:%.*]], label [[BB3:%.*]], label [[BB1:%.*]]
+; LOADONLY: bb1:
+; LOADONLY-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND2:%.*]] to <1 x i1>
+; LOADONLY-NEXT: [[TMP1:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[P:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> zeroinitializer)
+; LOADONLY-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[TMP1]] to i64
+; LOADONLY-NEXT: br label [[BB3]]
+; LOADONLY: bb3:
+; LOADONLY-NEXT: [[Y:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP2]], [[BB1]] ]
+; LOADONLY-NEXT: store i64 [[Y]], ptr [[P]], align 8
+; LOADONLY-NEXT: ret i32 0
+;
+; NONEONLY-LABEL: @succ_phi_has_3input(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[COND2_NOT:%.*]] = xor i1 [[COND2:%.*]], true
+; NONEONLY-NEXT: [[BRMERGE:%.*]] = select i1 [[COND1:%.*]], i1 true, i1 [[COND2_NOT]]
+; NONEONLY-NEXT: br i1 [[BRMERGE]], label [[BB3:%.*]], label [[BB2:%.*]]
+; NONEONLY: bb2:
+; NONEONLY-NEXT: [[X:%.*]] = load i64, ptr [[P:%.*]], align 8
+; NONEONLY-NEXT: br label [[BB3]]
+; NONEONLY: bb3:
+; NONEONLY-NEXT: [[Y:%.*]] = phi i64 [ [[X]], [[BB2]] ], [ 0, [[ENTRY:%.*]] ]
+; NONEONLY-NEXT: store i64 [[Y]], ptr [[P]], align 8
+; NONEONLY-NEXT: ret i32 0
;
entry:
br i1 %cond1, label %bb3, label %bb1
@@ -701,16 +1051,28 @@ bb3: ; preds = %bb2, %bb1, %entry
}
define i32 @succ1to0_phi2(ptr %p, ptr %p2) {
-; CHECK-LABEL: @succ1to0_phi2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LOADSTORE-LABEL: @succ1to0_phi2(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT: ret i32 [[TMP3]]
+;
+; NONE-LABEL: @succ1to0_phi2(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE: if.false:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P2:%.*]], align 4
+; NONE-NEXT: br label [[IF_TRUE]]
+; NONE: if.true:
+; NONE-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONE-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
@@ -727,19 +1089,33 @@ if.true:
}
define i32 @succ1to0_phi3(ptr %p, ptr %p2, i32 %x) {
-; CHECK-LABEL: @succ1to0_phi3(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[X:%.*]] to <1 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> [[TMP2]])
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = add i32 [[SPEC_SELECT]], [[TMP4]]
-; CHECK-NEXT: ret i32 [[RES]]
+; LOADSTORE-LABEL: @succ1to0_phi3(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast i32 [[X:%.*]] to <1 x i32>
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 [[X]], i32 [[TMP4]]
+; LOADSTORE-NEXT: [[RES:%.*]] = add i32 [[TMP4]], [[SPEC_SELECT]]
+; LOADSTORE-NEXT: ret i32 [[RES]]
+;
+; NONE-LABEL: @succ1to0_phi3(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE: if.false:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P2:%.*]], align 4
+; NONE-NEXT: br label [[IF_TRUE]]
+; NONE: if.true:
+; NONE-NEXT: [[RES0:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONE-NEXT: [[RES1:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ [[X:%.*]], [[ENTRY]] ]
+; NONE-NEXT: [[RES:%.*]] = add i32 [[RES0]], [[RES1]]
+; NONE-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
@@ -795,6 +1171,63 @@ return: ; preds = %sw.bb, %entry.if
ret i32 %ret
}
+;; Check cond-faulting-load can work with hoisted store when no cond-faulting-store.
+define void @hoist_store_without_cstore(ptr %0, ptr %1, i1 %cmp) {
+; LOADSTORE-LABEL: @hoist_store_without_cstore(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: store i32 0, ptr [[TMP1:%.*]], align 8
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast i1 [[CMP:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[TMP0:%.*]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: [[TMP5:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP5]], ptr [[TMP1]], i32 8, <1 x i1> [[TMP2]])
+; LOADSTORE-NEXT: ret void
+;
+; STOREONLY-LABEL: @hoist_store_without_cstore(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: store i32 0, ptr [[TMP1:%.*]], align 8
+; STOREONLY-NEXT: br i1 [[CMP:%.*]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; STOREONLY: if.then1:
+; STOREONLY-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0:%.*]], align 4
+; STOREONLY-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8
+; STOREONLY-NEXT: br label [[IF_END]]
+; STOREONLY: if.end:
+; STOREONLY-NEXT: ret void
+;
+; LOADONLY-LABEL: @hoist_store_without_cstore(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: store i32 0, ptr [[TMP1:%.*]], align 8
+; LOADONLY-NEXT: [[TMP2:%.*]] = bitcast i1 [[CMP:%.*]] to <1 x i1>
+; LOADONLY-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[TMP0:%.*]], i32 4, <1 x i1> [[TMP2]], <1 x i32> poison)
+; LOADONLY-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADONLY-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[CMP]], i32 [[TMP4]], i32 0
+; LOADONLY-NEXT: store i32 [[SPEC_STORE_SELECT]], ptr [[TMP1]], align 8
+; LOADONLY-NEXT: ret void
+;
+; NONEONLY-LABEL: @hoist_store_without_cstore(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: store i32 0, ptr [[TMP1:%.*]], align 8
+; NONEONLY-NEXT: br i1 [[CMP:%.*]], label [[IF_THEN1:%.*]], label [[IF_END:%.*]]
+; NONEONLY: if.then1:
+; NONEONLY-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0:%.*]], align 4
+; NONEONLY-NEXT: store i32 [[TMP2]], ptr [[TMP1]], align 8
+; NONEONLY-NEXT: br label [[IF_END]]
+; NONEONLY: if.end:
+; NONEONLY-NEXT: ret void
+;
+entry:
+ store i32 0, ptr %1, align 8
+ br i1 %cmp, label %if.then1, label %if.end
+
+if.then1: ; preds = %entry
+ %2 = load i32, ptr %0, align 4
+ store i32 %2, ptr %1, align 8
+ br label %if.end
+
+if.end: ; preds = %if.then1, %entry
+ ret void
+}
+
declare i32 @read_memory_only() readonly nounwind willreturn speculatable
!llvm.dbg.cu = !{!0}
More information about the llvm-commits
mailing list