[llvm] 87c86aa - [X86,SimplifyCFG] Support hoisting load/store with conditional faulting (Part I) (#96878)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 19:42:48 PDT 2024
Author: Shengchen Kan
Date: 2024-08-29T10:42:44+08:00
New Revision: 87c86aa6b93aea3d1603c1759a17fb6b5ba6e814
URL: https://github.com/llvm/llvm-project/commit/87c86aa6b93aea3d1603c1759a17fb6b5ba6e814
DIFF: https://github.com/llvm/llvm-project/commit/87c86aa6b93aea3d1603c1759a17fb6b5ba6e814.diff
LOG: [X86,SimplifyCFG] Support hoisting load/store with conditional faulting (Part I) (#96878)
This is simplifycfg part of
https://github.com/llvm/llvm-project/pull/95515
In this PR, we support hoisting load/store with conditional faulting in
`SimplifyCFGOpt::speculativelyExecuteBB` to eliminate conditional
branches.
This is for cases like
```
void test (int a, int *b) {
if (a)
*b = a;
}
```
In the following patches, we will support the hoist in
`SimplifyCFGOpt::hoistCommonCodeFromSuccessors`.
That is for cases like
```
void test (int a, int *c, int *d) {
if (a)
*c = a;
else
*d = a;
}
```
Added:
llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll
llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
Modified:
llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
llvm/lib/Passes/PassBuilder.cpp
llvm/lib/Passes/PassBuilderPipelines.cpp
llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
llvm/lib/Transforms/Utils/SimplifyCFG.cpp
llvm/test/Other/new-pm-print-pipeline.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index 2ea9d64f03cb64..ee3cc950cdb503 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -27,6 +27,7 @@ struct SimplifyCFGOptions {
bool ConvertSwitchToLookupTable = false;
bool NeedCanonicalLoop = true;
bool HoistCommonInsts = false;
+ bool HoistLoadsStoresWithCondFaulting = false;
bool SinkCommonInsts = false;
bool SimplifyCondBranch = true;
bool SpeculateBlocks = true;
@@ -59,6 +60,10 @@ struct SimplifyCFGOptions {
HoistCommonInsts = B;
return *this;
}
+ SimplifyCFGOptions &hoistLoadsStoresWithCondFaulting(bool B) {
+ HoistLoadsStoresWithCondFaulting = B;
+ return *this;
+ }
SimplifyCFGOptions &sinkCommonInsts(bool B) {
SinkCommonInsts = B;
return *this;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 17eed97fd950c9..63173c4abb8191 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -848,6 +848,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Result.needCanonicalLoops(Enable);
} else if (ParamName == "hoist-common-insts") {
Result.hoistCommonInsts(Enable);
+ } else if (ParamName == "hoist-loads-stores-with-cond-faulting") {
+ Result.hoistLoadsStoresWithCondFaulting(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
} else if (ParamName == "speculate-unpredictables") {
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 1184123c7710f0..9c3d49cabbd38c 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1534,9 +1534,11 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
- .convertSwitchRangeToICmp(true)
- .speculateUnpredictables(true)));
+ OptimizePM.addPass(
+ SimplifyCFGPass(SimplifyCFGOptions()
+ .convertSwitchRangeToICmp(true)
+ .speculateUnpredictables(true)
+ .hoistLoadsStoresWithCondFaulting(true)));
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 11de37f7a7c109..daa82a8c368e2b 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -73,6 +73,11 @@ static cl::opt<bool> UserHoistCommonInsts(
"hoist-common-insts", cl::Hidden, cl::init(false),
cl::desc("hoist common instructions (default = false)"));
+static cl::opt<bool> UserHoistLoadsStoresWithCondFaulting(
+ "hoist-loads-stores-with-cond-faulting", cl::Hidden, cl::init(false),
+ cl::desc("Hoist loads/stores if the target supports conditional faulting "
+ "(default = false)"));
+
static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
@@ -326,6 +331,9 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.NeedCanonicalLoop = UserKeepLoops;
if (UserHoistCommonInsts.getNumOccurrences())
Options.HoistCommonInsts = UserHoistCommonInsts;
+ if (UserHoistLoadsStoresWithCondFaulting.getNumOccurrences())
+ Options.HoistLoadsStoresWithCondFaulting =
+ UserHoistLoadsStoresWithCondFaulting;
if (UserSinkCommonInsts.getNumOccurrences())
Options.SinkCommonInsts = UserSinkCommonInsts;
if (UserSpeculateUnpredictables.getNumOccurrences())
@@ -354,6 +362,8 @@ void SimplifyCFGPass::printPipeline(
<< "switch-to-lookup;";
OS << (Options.NeedCanonicalLoop ? "" : "no-") << "keep-loops;";
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
+ OS << (Options.HoistLoadsStoresWithCondFaulting ? "" : "no-")
+ << "hoist-loads-stores-with-cond-faulting;";
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 92e2d189aff6ff..15de40c7b09962 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -117,6 +117,18 @@ static cl::opt<bool>
HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
cl::desc("Hoist common instructions up to the parent block"));
+static cl::opt<bool> HoistLoadsStoresWithCondFaulting(
+ "simplifycfg-hoist-loads-stores-with-cond-faulting", cl::Hidden,
+ cl::init(true),
+ cl::desc("Hoist loads/stores if the target supports "
+ "conditional faulting"));
+
+static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
+ "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
+ cl::desc("Control the maximal conditonal load/store that we are willing "
+ "to speculatively execute to eliminate conditional branch "
+ "(default = 6)"));
+
static cl::opt<unsigned>
HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
cl::init(20),
@@ -2986,6 +2998,25 @@ static bool isProfitableToSpeculate(const BranchInst *BI, bool Invert,
return BIEndProb < Likely;
}
+static bool isSafeCheapLoadStore(const Instruction *I,
+ const TargetTransformInfo &TTI) {
+ // Not handle volatile or atomic.
+ if (auto *L = dyn_cast<LoadInst>(I)) {
+ if (!L->isSimple())
+ return false;
+ } else if (auto *S = dyn_cast<StoreInst>(I)) {
+ if (!S->isSimple())
+ return false;
+ } else
+ return false;
+
+ // llvm.masked.load/store use i32 for alignment while load/store use i64.
+ // That's why we have the alignment limitation.
+ // FIXME: Update the prototype of the intrinsics?
+ return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I)) &&
+ getLoadStoreAlignment(I) < Value::MaximumAlignment;
+}
+
/// Speculate a conditional basic block flattening the CFG.
///
/// Note that this is a very risky transform currently. Speculating
@@ -3060,6 +3091,9 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
unsigned SpeculatedInstructions = 0;
+ bool HoistLoadsStores = HoistLoadsStoresWithCondFaulting &&
+ Options.HoistLoadsStoresWithCondFaulting;
+ SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
Value *SpeculatedStoreValue = nullptr;
StoreInst *SpeculatedStore = nullptr;
EphemeralValueTracker EphTracker;
@@ -3088,22 +3122,33 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
// Only speculatively execute a single instruction (not counting the
// terminator) for now.
- ++SpeculatedInstructions;
+ bool IsSafeCheapLoadStore = HoistLoadsStores &&
+ isSafeCheapLoadStore(&I, TTI) &&
+ SpeculatedConditionalLoadsStores.size() <
+ HoistLoadsStoresWithCondFaultingThreshold;
+ // Not count load/store into cost if target supports conditional faulting
+ // b/c it's cheap to speculate it.
+ if (IsSafeCheapLoadStore)
+ SpeculatedConditionalLoadsStores.push_back(&I);
+ else
+ ++SpeculatedInstructions;
+
if (SpeculatedInstructions > 1)
return false;
// Don't hoist the instruction if it's unsafe or expensive.
- if (!isSafeToSpeculativelyExecute(&I) &&
- !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
- &I, BB, ThenBB, EndBB))))
+ if (!IsSafeCheapLoadStore && !isSafeToSpeculativelyExecute(&I) &&
+ !(HoistCondStores && !SpeculatedStoreValue &&
+ (SpeculatedStoreValue =
+ isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
return false;
- if (!SpeculatedStoreValue &&
+ if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
computeSpeculationCost(&I, TTI) >
PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
return false;
// Store the store speculation candidate.
- if (SpeculatedStoreValue)
+ if (!SpeculatedStore && SpeculatedStoreValue)
SpeculatedStore = cast<StoreInst>(&I);
// Do not hoist the instruction if any of its operands are defined but not
@@ -3130,11 +3175,11 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
// Check that we can insert the selects and that it's not too expensive to do
// so.
- bool Convert = SpeculatedStore != nullptr;
+ bool Convert =
+ SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
InstructionCost Cost = 0;
Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
- SpeculatedInstructions,
- Cost, TTI);
+ SpeculatedInstructions, Cost, TTI);
if (!Convert || Cost > Budget)
return false;
@@ -3222,6 +3267,107 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
std::prev(ThenBB->end()));
+ // If the target supports conditional faulting,
+ // we look for the following pattern:
+ // \code
+ // BB:
+ // ...
+ // %cond = icmp ult %x, %y
+ // br i1 %cond, label %TrueBB, label %FalseBB
+ // FalseBB:
+ // store i32 1, ptr %q, align 4
+ // ...
+ // TrueBB:
+ // %maskedloadstore = load i32, ptr %b, align 4
+ // store i32 %maskedloadstore, ptr %p, align 4
+ // ...
+ // \endcode
+ //
+ // and transform it into:
+ //
+ // \code
+ // BB:
+ // ...
+ // %cond = icmp ult %x, %y
+ // %maskedloadstore = cload i32, ptr %b, %cond
+ // cstore i32 %maskedloadstore, ptr %p, %cond
+ // cstore i32 1, ptr %q, ~%cond
+ // br i1 %cond, label %TrueBB, label %FalseBB
+ // FalseBB:
+ // ...
+ // TrueBB:
+ // ...
+ // \endcode
+ //
+ // where cload/cstore are represented by llvm.masked.load/store intrinsics,
+ // e.g.
+ //
+ // \code
+ // %vcond = bitcast i1 %cond to <1 x i1>
+ // %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
+ // (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
+ // %maskedloadstore = bitcast <1 x i32> %v0 to i32
+ // call void @llvm.masked.store.v1i32.p0
+ // (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
+ // %cond.not = xor i1 %cond, true
+ // %vcond.not = bitcast i1 %cond.not to <1 x i>
+ // call void @llvm.masked.store.v1i32.p0
+ // (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
+ // \endcode
+ //
+ // So we need to turn hoisted load/store into cload/cstore.
+ auto &Context = BI->getParent()->getContext();
+ auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
+ auto *Cond = BI->getOperand(0);
+ Value *Mask = nullptr;
+ // Construct the condition if needed.
+ if (!SpeculatedConditionalLoadsStores.empty()) {
+ IRBuilder<> Builder(SpeculatedConditionalLoadsStores.back());
+ Mask = Builder.CreateBitCast(
+ Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
+ VCondTy);
+ }
+ for (auto *I : SpeculatedConditionalLoadsStores) {
+ IRBuilder<> Builder(I);
+ // We currently assume conditional faulting load/store is supported for
+ // scalar types only when creating new instructions. This can be easily
+ // extended for vector types in the future.
+ assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
+ auto *Op0 = I->getOperand(0);
+ Instruction *MaskedLoadStore = nullptr;
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ // Handle Load.
+ auto *Ty = I->getType();
+ MaskedLoadStore = Builder.CreateMaskedLoad(FixedVectorType::get(Ty, 1),
+ Op0, LI->getAlign(), Mask);
+ I->replaceAllUsesWith(Builder.CreateBitCast(MaskedLoadStore, Ty));
+ } else {
+ // Handle Store.
+ auto *StoredVal =
+ Builder.CreateBitCast(Op0, FixedVectorType::get(Op0->getType(), 1));
+ MaskedLoadStore = Builder.CreateMaskedStore(
+ StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
+ }
+ // For non-debug metadata, only !annotation, !range, !nonnull and !align are
+ // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
+ //
+ // !nonnull, !align : Not support pointer type, no need to keep.
+ // !range: Load type is changed from scalar to vector, but the metadata on
+ // vector specifies a per-element range, so the semantics stay the
+ // same. Keep it.
+ // !annotation: Not impact semantics. Keep it.
+ I->dropUBImplyingAttrsAndUnknownMetadata(
+ {LLVMContext::MD_range, LLVMContext::MD_annotation});
+ // FIXME: DIAssignID is not supported for masked store yet.
+ // (Verifier::visitDIAssignIDMetadata)
+ at::deleteAssignmentMarkers(I);
+ I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
+ return Node->getMetadataID() == Metadata::DIAssignIDKind;
+ });
+ MaskedLoadStore->copyMetadata(*I);
+ I->eraseFromParent();
+ }
+
// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
for (PHINode &PN : EndBB->phis()) {
diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll
index f2e80814f347ad..12f88d60d66cec 100644
--- a/llvm/test/Other/new-pm-print-pipeline.ll
+++ b/llvm/test/Other/new-pm-print-pipeline.ll
@@ -49,8 +49,8 @@
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)
-; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
-; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
+; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
+; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;hoist-loads-stores-with-cond-faulting;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-hoist-loads-stores-with-cond-faulting;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll
new file mode 100644
index 00000000000000..405a26de3d6afa
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/X86/masked-memory-ops-with-cf.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -O1 -S | FileCheck %s
+
+;; Test masked.load/store.v1* is generated in simplifycfg and not falls back to branch+load/store in following passes.
+define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @basic(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
+; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ br label %if.end
+
+if.true:
+ %pv = load i16, ptr %p, align 2
+ %qv = load i32, ptr %q, align 4
+ %bv = load i64, ptr %b, align 8
+ store i16 %pv, ptr %b, align 2
+ store i32 %qv, ptr %p, align 4
+ store i64 %bv, ptr %q, align 8
+ br label %if.false
+
+if.end:
+ ret void
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
new file mode 100644
index 00000000000000..047ca717da8009
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -0,0 +1,694 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
+
+;; Basic case: check masked.load/store is generated for i16/i32/i64.
+define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @basic(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
+; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ br label %if.end
+
+if.true:
+ %0 = load i16, ptr %p, align 2
+ %1 = load i32, ptr %q, align 4
+ %2 = load i64, ptr %b, align 8
+ store i16 %0, ptr %b, align 2
+ store i32 %1, ptr %p, align 4
+ store i64 %2, ptr %q, align 8
+ br label %if.false
+
+if.end:
+ ret void
+}
+
+;; Successor 1 branches to successor 0.
+define void @succ1to0(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @succ1to0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %tobool = icmp ne i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.end:
+ ret void
+
+if.then:
+ %0 = load i32, ptr %q
+ store i32 %0, ptr %p
+ br label %if.end
+}
+
+;; Successor 1 branches to successor 0 and there is a phi node.
+define i32 @succ1to0_phi(ptr %p) {
+; CHECK-LABEL: @succ1to0_phi(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]]
+; CHECK-NEXT: ret i32 [[SPEC_SELECT]]
+;
+entry:
+ %cond = icmp eq ptr %p, null
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ %0 = load i32, ptr %p
+ br label %if.true
+
+if.true:
+ %res = phi i32 [ %0, %if.false ], [ 0, %entry ]
+ ret i32 %res
+}
+
+;; Successor 0 branches to successor 1.
+define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @succ0to1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i32 1, ptr %q
+ br label %if.end
+
+if.true:
+ %0 = load i32, ptr %b
+ store i32 %0, ptr %p
+ br label %if.false
+
+if.end:
+ ret void
+}
+
+;; Load after store can be hoisted.
+define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
+; CHECK-LABEL: @load_after_store(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
+; CHECK-NEXT: ret i64 [[SPEC_SELECT]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.end
+
+if.true:
+ store i32 1, ptr %b
+ %0 = load i16, ptr %p
+ %zext = zext i16 %0 to i64
+ ret i64 %zext
+
+if.end:
+ ret i64 0
+}
+
+;; Speculatable memory read doesn't prevent the hoist.
+define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
+; CHECK-LABEL: @load_skip_speculatable_memory_read(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i32 1, ptr %q
+ br label %if.end
+
+if.true:
+ %read = call i32 @read_memory_only()
+ store i32 %read, ptr %p
+ br label %if.false
+
+if.end:
+ ret void
+}
+
+;; Source of the load can be a GEP.
+define i32 @load_from_gep(ptr %p) {
+; CHECK-LABEL: @load_from_gep(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 16
+; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[ARRAYIDX]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i32 0, i32 [[TMP3]]
+; CHECK-NEXT: ret i32 [[SPEC_SELECT]]
+;
+entry:
+ %cond = icmp eq ptr %p, null
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ %arrayidx = getelementptr inbounds i8, ptr %p, i64 16
+ %0 = load i32, ptr %arrayidx
+ br label %if.true
+
+if.true:
+ %res = phi i32 [ %0, %if.false ], [ 0, %entry ]
+ ret i32 %res
+}
+
+;; Metadata range/annotation are kept.
+define void @nondebug_metadata(i1 %cond, ptr %p, ptr %q) {
+; CHECK-LABEL: @nondebug_metadata(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison), !range [[RNG5:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison), !annotation [[META6:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP5]], ptr [[Q]], i32 4, <1 x i1> [[TMP0]]), !annotation [[META6]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP6]], ptr [[P]], i32 2, <1 x i1> [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ ret void
+
+if.true:
+ %0 = load i16, ptr %p, align 2, !range !{i16 0, i16 10}
+ %1 = load i32, ptr %q, align 4, !annotation !11
+ store i16 %0, ptr %q, align 4, !annotation !11
+ store i32 %1, ptr %p, align 2
+ br label %if.false
+}
+
+define i16 @debug_metadata_diassign(i1 %cond, i16 %a, ptr %p) {
+; CHECK-LABEL: @debug_metadata_diassign(
+; CHECK-NEXT: bb0:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 7>, ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i16 3, i16 2
+; CHECK-NEXT: ret i16 [[SPEC_SELECT]]
+;
+bb0:
+ br i1 %cond, label %if.true, label %if.false
+
+if.true:
+ store i16 7, ptr %p, align 4, !DIAssignID !9
+ br label %if.false
+
+if.false:
+ %ret = phi i16 [ 2, %bb0 ], [ 3, %if.true ]
+ call void @llvm.dbg.assign(metadata i16 %ret, metadata !8, metadata !DIExpression(), metadata !9, metadata ptr %p, metadata !DIExpression()), !dbg !7
+ ret i16 %ret
+}
+
+;; Not crash when working with opt controlled by simplifycfg-hoist-cond-stores.
+define i32 @hoist_cond_stores(i1 %cond, ptr %p) {
+; CHECK-LABEL: @hoist_cond_stores(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i1 false, ptr [[P:%.*]], align 2
+; CHECK-NEXT: [[SPEC_STORE_SELECT:%.*]] = select i1 [[COND:%.*]], i1 false, i1 false
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> zeroinitializer, ptr [[P]], i32 8, <1 x i1> [[TMP0]])
+; CHECK-NEXT: store i1 [[SPEC_STORE_SELECT]], ptr [[P]], align 2
+; CHECK-NEXT: ret i32 0
+;
+entry:
+ store i1 false, ptr %p, align 2
+ br i1 %cond, label %if.true, label %if.false
+
+if.true: ; preds = %entry
+ store i32 0, ptr %p, align 8
+ store i1 false, ptr %p, align 2
+ br label %if.false
+
+if.false: ; preds = %if.true, %entry
+ ret i32 0
+}
+
+;; Both of successor 0 and successor 1 have a single predecessor.
+;; TODO: Support transform for this case.
+define void @single_predecessor(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @single_predecessor(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: ret void
+; CHECK: if.end:
+; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ %tobool = icmp ne i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.end:
+ store i32 1, ptr %q
+ ret void
+
+if.then:
+ %0 = load i32, ptr %q
+ store i32 %0, ptr %p
+ ret void
+}
+
+;; Hoist 6 stores.
+define void @threshold_6(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6) {
+; CHECK-LABEL: @threshold_6(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 1>, ptr [[P1:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 2>, ptr [[P2:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 3>, ptr [[P3:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 4>, ptr [[P4:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 5>, ptr [[P5:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> <i32 6>, ptr [[P6:%.*]], i32 4, <1 x i1> [[TMP0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.true:
+ store i32 1, ptr %p1, align 4
+ store i32 2, ptr %p2, align 4
+ store i32 3, ptr %p3, align 4
+ store i32 4, ptr %p4, align 4
+ store i32 5, ptr %p5, align 4
+ store i32 6, ptr %p6, align 4
+ br label %if.false
+
+if.false:
+ ret void
+}
+
+;; Not hoist 7 stores.
+define void @threshold_7(i1 %cond, ptr %p1, ptr %p2, ptr %p3, ptr %p4, ptr %p5, ptr %p6, ptr %p7) {
+; CHECK-LABEL: @threshold_7(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.true:
+; CHECK-NEXT: store i32 1, ptr [[P1:%.*]], align 4
+; CHECK-NEXT: store i32 2, ptr [[P2:%.*]], align 4
+; CHECK-NEXT: store i32 3, ptr [[P3:%.*]], align 4
+; CHECK-NEXT: store i32 4, ptr [[P4:%.*]], align 4
+; CHECK-NEXT: store i32 5, ptr [[P5:%.*]], align 4
+; CHECK-NEXT: store i32 6, ptr [[P6:%.*]], align 4
+; CHECK-NEXT: store i32 7, ptr [[P7:%.*]], align 4
+; CHECK-NEXT: br label [[IF_FALSE]]
+; CHECK: if.false:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.true:
+ store i32 1, ptr %p1, align 4
+ store i32 2, ptr %p2, align 4
+ store i32 3, ptr %p3, align 4
+ store i32 4, ptr %p4, align 4
+ store i32 5, ptr %p5, align 4
+ store i32 6, ptr %p6, align 4
+ store i32 7, ptr %p7, align 4
+ br label %if.false
+
+if.false:
+ ret void
+}
+
+;; Not do hoist if the cost of instructions to be hoisted is expensive.
+define i32 @not_cheap_to_hoist(i32 %a, ptr %b, ptr %p, ptr %q, i32 %v0, i32 %v1, i32 %v2, i1 %cc) {
+; CHECK-LABEL: @not_cheap_to_hoist(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: common.ret:
+; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ [[VVVV:%.*]], [[IF_FALSE]] ], [ 0, [[IF_TRUE]] ]
+; CHECK-NEXT: ret i32 [[COMMON_RET_OP]]
+; CHECK: if.false:
+; CHECK-NEXT: store i64 1, ptr [[P:%.*]], align 8
+; CHECK-NEXT: store i16 2, ptr [[Q:%.*]], align 2
+; CHECK-NEXT: [[V:%.*]] = udiv i32 [[A]], 12345
+; CHECK-NEXT: [[VV:%.*]] = mul i32 [[V]], [[V0:%.*]]
+; CHECK-NEXT: [[VVV:%.*]] = mul i32 [[VV]], [[V1:%.*]]
+; CHECK-NEXT: [[VVVV]] = select i1 [[CC:%.*]], i32 [[V2:%.*]], i32 [[VVV]]
+; CHECK-NEXT: br label [[COMMON_RET:%.*]]
+; CHECK: if.true:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[P]], align 4
+; CHECK-NEXT: br label [[COMMON_RET]]
+;
+entry:
+ %cond = icmp eq i32 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i64 1, ptr %p
+ store i16 2, ptr %q
+
+ %v = udiv i32 %a, 12345
+ %vv = mul i32 %v, %v0
+ %vvv = mul i32 %vv, %v1
+ %vvvv = select i1 %cc, i32 %v2, i32 %vvv
+ ret i32 %vvvv
+
+if.true:
+ %0 = load i32, ptr %b
+ store i32 %0, ptr %p
+ br label %if.end
+
+if.end:
+ ret i32 0
+}
+
+;; Not hoist if there is more than 1 prodecessor.
+define void @not_single_predecessor(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @not_single_predecessor(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK: if.end:
+; CHECK-NEXT: br label [[IF_THEN]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+;
+entry:
+ %tobool = icmp ne i32 %a, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.end:
+ br label %if.then
+
+if.then:
+ %1 = load i32, ptr %q
+ store i32 %1, ptr %p
+ br label %if.end
+}
+
+;; Not hoist b/c i8 is not supported by conditional faulting.
+define void @not_supported_type(i8 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @not_supported_type(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.false:
+; CHECK-NEXT: store i8 1, ptr [[Q:%.*]], align 1
+; CHECK-NEXT: br label [[IF_END:%.*]]
+; CHECK: if.true:
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[B:%.*]], align 1
+; CHECK-NEXT: store i8 [[TMP0]], ptr [[P:%.*]], align 1
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %cond = icmp eq i8 %a, 0
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store i8 1, ptr %q
+ br label %if.end
+
+if.true:
+ %0 = load i8, ptr %b
+ store i8 %0, ptr %p
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+;; Not hoist if the terminator is not br.
+define void @not_br_terminator(i32 %a, ptr %b, ptr %p, ptr %q) {
+; CHECK-LABEL: @not_br_terminator(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: switch i32 [[A:%.*]], label [[IF_END:%.*]] [
+; CHECK-NEXT: i32 1, label [[IF_FALSE:%.*]]
+; CHECK-NEXT: i32 2, label [[IF_TRUE:%.*]]
+; CHECK-NEXT: ]
+; CHECK: if.false:
+; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.true:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: br label [[IF_FALSE]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ switch i32 %a, label %if.end [
+ i32 1, label %if.false
+ i32 2, label %if.true
+ ]
+
+if.false:
+ store i32 1, ptr %q, align 4
+ br label %if.end
+
+if.true:
+ %0 = load i32, ptr %b, align 4
+ store i32 %0, ptr %p, align 4
+ br label %if.false
+
+if.end:
+ ret void
+}
+
+;; Not hoist if the instruction to be hoist is atomic.
+define void @not_atomic(i1 %cond, ptr %p) {
+; CHECK-LABEL: @not_atomic(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.false:
+; CHECK-NEXT: store atomic i32 1, ptr [[P:%.*]] seq_cst, align 4
+; CHECK-NEXT: br label [[IF_TRUE]]
+; CHECK: if.true:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ store atomic i32 1, ptr %p seq_cst, align 4
+ br label %if.true
+
+if.true:
+ ret void
+}
+
+;; Not hoist if the instruction to be hoist is volatile.
+define void @not_volatile(i1 %cond, ptr %p) {
+; CHECK-LABEL: @not_volatile(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.false:
+; CHECK-NEXT: [[TMP0:%.*]] = load volatile i32, ptr [[P:%.*]], align 4
+; CHECK-NEXT: br label [[IF_TRUE]]
+; CHECK: if.true:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ %0 = load volatile i32, ptr %p, align 4
+ br label %if.true
+
+if.true:
+ ret void
+}
+
+;; Not hoist if there is an instruction that has side effect in the same bb.
+define void @not_hoistable_sideeffect(i1 %cond, ptr %p, ptr %q) {
+; CHECK-LABEL: @not_hoistable_sideeffect(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.false:
+; CHECK-NEXT: [[RMW:%.*]] = atomicrmw xchg ptr [[Q:%.*]], double 4.000000e+00 seq_cst, align 8
+; CHECK-NEXT: store i32 1, ptr [[P:%.*]], align 4
+; CHECK-NEXT: br label [[IF_TRUE]]
+; CHECK: if.true:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.false:
+ %rmw= atomicrmw xchg ptr %q, double 4.0 seq_cst
+ store i32 1, ptr %p, align 4
+ br label %if.true
+
+if.true:
+ ret void
+}
+
+;; Not hoist if the branch is predictable and the `then` BB is not likely to execute.
+define void @not_likely_to_execute(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @not_likely_to_execute(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]], !prof [[PROF7:![0-9]+]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; CHECK-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+;
+entry:
+ %tobool = icmp ne i32 %a, 0
+ br i1 %tobool, label %if.then, label %if.end, !prof !10
+
+if.end:
+ ret void
+
+if.then:
+ %0 = load i32, ptr %q
+ store i32 %0, ptr %p
+ br label %if.end
+}
+
+;; Now the optimization hoist-loads-stores-with-cond-faulting is run in codegen,
+;; which is after sroa and alloca is optimized away. So we don't need to do the transform
+;; for this case. But in the future, it is probably moved before sroa.
+define void @not_alloca(ptr %p, ptr %q, i32 %a) {
+; CHECK-LABEL: @not_alloca(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[P_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[Q_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store ptr [[P:%.*]], ptr [[P_ADDR]], align 8
+; CHECK-NEXT: store ptr [[Q:%.*]], ptr [[Q_ADDR]], align 8
+; CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK: if.then:
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[Q_ADDR]], align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[P_ADDR]], align 8
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[TMP3]], align 4
+; CHECK-NEXT: br label [[IF_END]]
+; CHECK: if.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %p.addr = alloca ptr
+ %q.addr = alloca ptr
+ %a.addr = alloca i32
+ store ptr %p, ptr %p.addr
+ store ptr %q, ptr %q.addr
+ store i32 %a, ptr %a.addr
+ %0 = load i32, ptr %a.addr
+ %tobool = icmp ne i32 %0, 0
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ %1 = load ptr, ptr %q.addr
+ %2 = load i32, ptr %1
+ %3 = load ptr, ptr %p.addr
+ store i32 %2, ptr %3
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+;; Not transform if alignment = 2^32.
+define void @not_maximum_alignment(i1 %cond, ptr %p) {
+; CHECK-LABEL: @not_maximum_alignment(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK: if.true:
+; CHECK-NEXT: store i32 0, ptr [[P:%.*]], align 4294967296
+; CHECK-NEXT: br label [[IF_FALSE]]
+; CHECK: if.false:
+; CHECK-NEXT: ret void
+;
+entry:
+ br i1 %cond, label %if.true, label %if.false
+
+if.true:
+ store i32 0, ptr %p, align 4294967296
+ br label %if.false
+
+if.false:
+ ret void
+}
+
+declare i32 @read_memory_only() readonly nounwind willreturn speculatable
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "clang")
+!1 = !DIFile(filename: "foo.c", directory: "/tmp")
+!2 = !{i32 2, !"Dwarf Version", i32 4}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang"}
+!5 = !DIBasicType(name: "int", size: 16, encoding: DW_ATE_signed)
+!6 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 4, unit: !0)
+!7 = !DILocation(line: 5, column: 7, scope: !6)
+!8 = !DILocalVariable(name: "a", scope: !6, line: 6, type: !5)
+!9 = distinct !DIAssignID()
+!10 = !{!"branch_weights", i32 1, i32 99}
+!11 = !{ !"auto-init" }
More information about the llvm-commits
mailing list