[llvm] [X86,SimplifyCFG] Support conditional faulting load or store only (PR #132032)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 06:56:24 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
This is to fix a bug when a target only support conditional faulting load, see test case hoist_store_without_cstore.
Add option disable-cload-cstore to emulate single load/store case.
---
Patch is 60.97 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132032.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Utils/SimplifyCFG.cpp (+24-9)
- (modified) llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll (+748-166)
``````````diff
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 17c5fa6503f20..8a8baf9073f48 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -132,6 +132,11 @@ static cl::opt<unsigned> HoistLoadsStoresWithCondFaultingThreshold(
"to speculatively execute to eliminate conditional branch "
"(default = 6)"));
+static cl::opt<unsigned> DisableCloadCstore(
+ "disable-cload-cstore", cl::Hidden, cl::init(0),
+ cl::desc("Control to disable cond-faulting-load(1)/cond-faulting-store(2)"
+ "/all(3)"));
+
static cl::opt<unsigned>
HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
cl::init(20),
@@ -1682,22 +1687,22 @@ static bool areIdenticalUpToCommutativity(const Instruction *I1,
static void hoistConditionalLoadsStores(
BranchInst *BI,
SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
- std::optional<bool> Invert) {
+ std::optional<bool> Invert, Instruction *Sel) {
auto &Context = BI->getParent()->getContext();
auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
auto *Cond = BI->getOperand(0);
// Construct the condition if needed.
BasicBlock *BB = BI->getParent();
- IRBuilder<> Builder(
- Invert.has_value() ? SpeculatedConditionalLoadsStores.back() : BI);
Value *Mask = nullptr;
Value *MaskFalse = nullptr;
Value *MaskTrue = nullptr;
if (Invert.has_value()) {
+ IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
Mask = Builder.CreateBitCast(
*Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
VCondTy);
} else {
+ IRBuilder<> Builder(BI);
MaskFalse = Builder.CreateBitCast(
Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
@@ -1723,13 +1728,20 @@ static void hoistConditionalLoadsStores(
PHINode *PN = nullptr;
Value *PassThru = nullptr;
if (Invert.has_value())
- for (User *U : I->users())
+ for (User *U : I->users()) {
if ((PN = dyn_cast<PHINode>(U))) {
PassThru = Builder.CreateBitCast(
PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
FixedVectorType::get(Ty, 1));
- break;
+ } else if (auto *Ins = cast<Instruction>(U);
+ Sel && Ins->getParent() == BB) {
+ // This happens when store or/and a speculative instruction between
+ // load and store were hoisted to the BB. Make sure the masked load
+ // inserted before its use.
+ // We assume there's one of such use.
+ Builder.SetInsertPoint(Ins);
}
+ }
MaskedLoadStore = Builder.CreateMaskedLoad(
FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
@@ -1769,10 +1781,10 @@ static bool isSafeCheapLoadStore(const Instruction *I,
const TargetTransformInfo &TTI) {
// Not handle volatile or atomic.
if (auto *L = dyn_cast<LoadInst>(I)) {
- if (!L->isSimple())
+ if (!L->isSimple() || (DisableCloadCstore & 1))
return false;
} else if (auto *S = dyn_cast<StoreInst>(I)) {
- if (!S->isSimple())
+ if (!S->isSimple() || (DisableCloadCstore & 2))
return false;
} else
return false;
@@ -3308,6 +3320,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
// If we get here, we can hoist the instruction and if-convert.
LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
+ Instruction *Sel = nullptr;
// Insert a select of the value of the speculated store.
if (SpeculatedStoreValue) {
IRBuilder<NoFolder> Builder(BI);
@@ -3318,6 +3331,7 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
std::swap(TrueV, FalseV);
Value *S = Builder.CreateSelect(
BrCond, TrueV, FalseV, "spec.store.select", BI);
+ Sel = cast<Instruction>(S);
SpeculatedStore->setOperand(0, S);
SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
SpeculatedStore->getDebugLoc());
@@ -3390,7 +3404,8 @@ bool SimplifyCFGOpt::speculativelyExecuteBB(BranchInst *BI,
std::prev(ThenBB->end()));
if (!SpeculatedConditionalLoadsStores.empty())
- hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert);
+ hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
+ Sel);
// Insert selects and rewrite the PHI operands.
IRBuilder<NoFolder> Builder(BI);
@@ -8042,7 +8057,7 @@ bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
if (CanSpeculateConditionalLoadsStores()) {
hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
- std::nullopt);
+ std::nullopt, nullptr);
return requestResimplify();
}
}
diff --git a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
index 5c9058b482320..eacabde9f3d61 100644
--- a/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
+++ b/llvm/test/Transforms/SimplifyCFG/X86/hoist-loads-stores-with-cf.ll
@@ -1,24 +1,42 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=0 -S | FileCheck %s --check-prefixes=CHECK,LOADSTORE
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=1 -S | FileCheck %s --check-prefixes=CHECK,NONE,STOREONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=2 -S | FileCheck %s --check-prefixes=CHECK,NONE,LOADONLY
+; RUN: opt < %s -mtriple=x86_64 -mattr=+cf -passes='simplifycfg<hoist-loads-stores-with-cond-faulting>' -simplifycfg-require-and-preserve-domtree=1 -disable-cload-cstore=3 -S | FileCheck %s --check-prefixes=NONE,NONEONLY
;; Basic case: check masked.load/store is generated for i16/i32/i64.
define void @basic(i1 %cond, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @basic(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
-; CHECK-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
-; CHECK-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
-; CHECK-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @basic(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND:%.*]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[TMP3:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast <1 x i32> [[TMP3]] to i32
+; LOADSTORE-NEXT: [[TMP5:%.*]] = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr [[B:%.*]], i32 8, <1 x i1> [[TMP0]], <1 x i64> poison)
+; LOADSTORE-NEXT: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to i64
+; LOADSTORE-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <1 x i16>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i16.p0(<1 x i16> [[TMP7]], ptr [[B]], i32 2, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP8:%.*]] = bitcast i32 [[TMP4]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP8]], ptr [[P]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP6]] to <1 x i64>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i64.p0(<1 x i64> [[TMP9]], ptr [[Q]], i32 8, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @basic(
+; NONE-NEXT: entry:
+; NONE-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_END:%.*]]
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT: [[TMP1:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8
+; NONE-NEXT: store i16 [[TMP0]], ptr [[B]], align 2
+; NONE-NEXT: store i32 [[TMP1]], ptr [[P]], align 4
+; NONE-NEXT: store i64 [[TMP2]], ptr [[Q]], align 8
+; NONE-NEXT: br label [[IF_END]]
+; NONE: if.end:
+; NONE-NEXT: ret void
;
entry:
br i1 %cond, label %if.true, label %if.false
@@ -41,16 +59,27 @@ if.end:
;; Successor 1 branches to successor 0.
define void @succ1to0(ptr %p, ptr %q, i32 %a) {
-; CHECK-LABEL: @succ1to0(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @succ1to0(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[TOBOOL]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[Q:%.*]], i32 4, <1 x i1> [[TMP1]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: [[TMP4:%.*]] = bitcast i32 [[TMP3]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP4]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP1]])
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @succ1to0(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; NONE: if.end:
+; NONE-NEXT: ret void
+; NONE: if.then:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[Q:%.*]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[IF_END]]
;
entry:
%tobool = icmp ne i32 %a, 0
@@ -67,14 +96,45 @@ if.then:
;; Successor 1 branches to successor 0 and there is a phi node.
define i32 @succ1to0_phi(ptr %p) {
-; CHECK-LABEL: @succ1to0_phi(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
-; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
-; CHECK-NEXT: ret i32 [[TMP3]]
+; LOADSTORE-LABEL: @succ1to0_phi(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADSTORE-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADSTORE-NEXT: ret i32 [[TMP3]]
+;
+; STOREONLY-LABEL: @succ1to0_phi(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; STOREONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; STOREONLY: if.false:
+; STOREONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; STOREONLY-NEXT: br label [[IF_TRUE]]
+; STOREONLY: if.true:
+; STOREONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; STOREONLY-NEXT: ret i32 [[RES]]
+;
+; LOADONLY-LABEL: @succ1to0_phi(
+; LOADONLY-NEXT: entry:
+; LOADONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; LOADONLY-NEXT: [[TMP0:%.*]] = xor i1 [[COND]], true
+; LOADONLY-NEXT: [[TMP1:%.*]] = bitcast i1 [[TMP0]] to <1 x i1>
+; LOADONLY-NEXT: [[TMP2:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[P]], i32 4, <1 x i1> [[TMP1]], <1 x i32> zeroinitializer)
+; LOADONLY-NEXT: [[TMP3:%.*]] = bitcast <1 x i32> [[TMP2]] to i32
+; LOADONLY-NEXT: ret i32 [[TMP3]]
+;
+; NONEONLY-LABEL: @succ1to0_phi(
+; NONEONLY-NEXT: entry:
+; NONEONLY-NEXT: [[COND:%.*]] = icmp eq ptr [[P:%.*]], null
+; NONEONLY-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONEONLY: if.false:
+; NONEONLY-NEXT: [[TMP0:%.*]] = load i32, ptr [[P]], align 4
+; NONEONLY-NEXT: br label [[IF_TRUE]]
+; NONEONLY: if.true:
+; NONEONLY-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], [[IF_FALSE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONEONLY-NEXT: ret i32 [[RES]]
;
entry:
%cond = icmp eq ptr %p, null
@@ -91,16 +151,28 @@ if.true:
;; Successor 0 branches to successor 1.
define void @succ0to1(i32 %a, ptr %b, ptr %p, ptr %q) {
-; CHECK-LABEL: @succ0to1(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @succ0to1(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]], <1 x i32> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i32> [[TMP1]] to i32
+; LOADSTORE-NEXT: [[TMP3:%.*]] = bitcast i32 [[TMP2]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP3]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: ret void
+;
+; NONE-LABEL: @succ0to1(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; NONE: if.false:
+; NONE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; NONE-NEXT: ret void
+; NONE: if.true:
+; NONE-NEXT: [[TMP0:%.*]] = load i32, ptr [[B:%.*]], align 4
+; NONE-NEXT: store i32 [[TMP0]], ptr [[P:%.*]], align 4
+; NONE-NEXT: br label [[IF_FALSE]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -121,16 +193,29 @@ if.end:
;; Load after store can be hoisted.
define i64 @load_after_store(i32 %a, ptr %b, ptr %p) {
-; CHECK-LABEL: @load_after_store(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
-; CHECK-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
-; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
-; CHECK-NEXT: ret i64 [[SPEC_SELECT]]
+; LOADSTORE-LABEL: @load_after_store(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> splat (i32 1), ptr [[B:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: [[TMP1:%.*]] = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr [[P:%.*]], i32 2, <1 x i1> [[TMP0]], <1 x i16> poison)
+; LOADSTORE-NEXT: [[TMP2:%.*]] = bitcast <1 x i16> [[TMP1]] to i16
+; LOADSTORE-NEXT: [[ZEXT:%.*]] = zext i16 [[TMP2]] to i64
+; LOADSTORE-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[COND]], i64 [[ZEXT]], i64 0
+; LOADSTORE-NEXT: ret i64 [[SPEC_SELECT]]
+;
+; NONE-LABEL: @load_after_store(
+; NONE-NEXT: entry:
+; NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; NONE-NEXT: br i1 [[COND]], label [[IF_TRUE:%.*]], label [[COMMON_RET:%.*]]
+; NONE: common.ret:
+; NONE-NEXT: [[COMMON_RET_OP:%.*]] = phi i64 [ [[ZEXT:%.*]], [[IF_TRUE]] ], [ 0, [[ENTRY:%.*]] ]
+; NONE-NEXT: ret i64 [[COMMON_RET_OP]]
+; NONE: if.true:
+; NONE-NEXT: store i32 1, ptr [[B:%.*]], align 4
+; NONE-NEXT: [[TMP0:%.*]] = load i16, ptr [[P:%.*]], align 2
+; NONE-NEXT: [[ZEXT]] = zext i16 [[TMP0]] to i64
+; NONE-NEXT: br label [[COMMON_RET]]
;
entry:
%cond = icmp eq i32 %a, 0
@@ -148,15 +233,49 @@ if.end:
;; Speculatable memory read doesn't prevent the hoist.
define void @load_skip_speculatable_memory_read(i32 %a, ptr %p, ptr %q) {
-; CHECK-LABEL: @load_skip_speculatable_memory_read(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
-; CHECK-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
-; CHECK-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
-; CHECK-NEXT: store i32 1, ptr [[Q:%.*]], align 4
-; CHECK-NEXT: ret void
+; LOADSTORE-LABEL: @load_skip_speculatable_memory_read(
+; LOADSTORE-NEXT: entry:
+; LOADSTORE-NEXT: [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; LOADSTORE-NEXT: [[READ:%.*]] = call i32 @read_memory_only()
+; LOADSTORE-NEXT: [[TMP0:%.*]] = bitcast i1 [[COND]] to <1 x i1>
+; LOADSTORE-NEXT: [[TMP1:%.*]] = bitcast i32 [[READ]] to <1 x i32>
+; LOADSTORE-NEXT: call void @llvm.masked.store.v1i32.p0(<1 x i32> [[TMP1]], ptr [[P:%.*]], i32 4, <1 x i1> [[TMP0]])
+; LOADSTORE-NEXT: store i32 1, ptr [[Q:%.*]], align 4
+; LOADSTORE-NEXT: ret void
+;
+; STOREONLY-LABEL: @load_skip_speculatable_memory_read(
+; STOREONLY-NEXT: entry:
+; STOREONLY-NEXT: [[COND...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/132032
More information about the llvm-commits
mailing list