[llvm] [SimplifyCFG] Increase budget for FoldTwoEntryPHINode() if the branch is unpredictable. (PR #98495)
Tianqing Wang via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 21 23:54:15 PDT 2024
https://github.com/tianqingw updated https://github.com/llvm/llvm-project/pull/98495
>From fe7c50ae94d49e69e7844882b79ec47afcde72d4 Mon Sep 17 00:00:00 2001
From: Tianqing Wang <tianqing.wang at intel.com>
Date: Thu, 11 Jul 2024 23:16:09 +0800
Subject: [PATCH 1/6] [SimplifyCFG] Increase budget for FoldTwoEntryPHINode()
if the branch is unpredictable.
The `!unpredictable` metadata has been present for a long time, but
it's usage in optimizations is still limited. This patch teaches
`FoldTwoEntryPHINode()` to be more aggressive with an unpredictable
branch to reduce mispredictions.
A TTI interface `getBranchMispredictPenalty()` is added to distinguish
between different hardwares to ensure we don't go too far for simpler
cores. For simplicity, only a naive x86 implementation is included for
the time being.
---
.../llvm/Analysis/TargetTransformInfo.h | 9 ++
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 +
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 +
.../lib/Target/X86/X86TargetTransformInfo.cpp | 5 +
llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 +
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 9 +-
.../two-entry-phi-fold-unpredictable.ll | 96 +++++++++++++++++++
7 files changed, 124 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cf378008e4c7c..bdcaeab033791 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -419,6 +419,11 @@ class TargetTransformInfo {
/// this factor, it is very likely to be predicted correctly.
BranchProbability getPredictableBranchThreshold() const;
+ // Returns an integer indicating how aggressive the target wants for
+ // eliminating unpredictable branches. A zero return value means extra
+ // optimization applied to them should be minimal.
+ unsigned getBranchMispredictPenalty() const;
+
/// Return true if branch divergence exists.
///
/// Branch divergence has a significantly negative impact on GPU performance
@@ -1832,6 +1837,7 @@ class TargetTransformInfo::Concept {
ArrayRef<const Value *> Operands,
TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
+ virtual unsigned getBranchMispredictPenalty() = 0;
virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
@@ -2243,6 +2249,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
BranchProbability getPredictableBranchThreshold() override {
return Impl.getPredictableBranchThreshold();
}
+ unsigned getBranchMispredictPenalty() override {
+ return Impl.getBranchMispredictPenalty();
+ }
bool hasBranchDivergence(const Function *F = nullptr) override {
return Impl.hasBranchDivergence(F);
}
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 47fde08735c0c..a87470c6446c0 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -99,6 +99,8 @@ class TargetTransformInfoImplBase {
return BranchProbability(99, 100);
}
+ unsigned getBranchMispredictPenalty() const { return 0; }
+
bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
bool isSourceOfDivergence(const Value *V) const { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 693f7a5bb7af5..a1fcf5482d334 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -279,6 +279,10 @@ BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
: TTIImpl->getPredictableBranchThreshold();
}
+unsigned TargetTransformInfo::getBranchMispredictPenalty() const {
+ return TTIImpl->getBranchMispredictPenalty();
+}
+
bool TargetTransformInfo::hasBranchDivergence(const Function *F) const {
return TTIImpl->hasBranchDivergence(F);
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 32a3683355b72..984586f4ae5f6 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6756,3 +6756,8 @@ InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return AM.Scale != 0;
return -1;
}
+
+unsigned X86TTIImpl::getBranchMispredictPenalty() const {
+ // TODO: Hook MispredictPenalty of SchedMachineModel into this.
+ return 14;
+}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 5eccb1aea308d..d2b5c093e7003 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -294,6 +294,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
bool supportsEfficientVectorElementLoadStore() const;
bool enableInterleavedAccessVectorization();
+ unsigned getBranchMispredictPenalty() const;
+
private:
bool supportsGather() const;
InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8f717cb43bcb4..73687b5c31c64 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3508,7 +3508,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// jump to one specific 'then' block (if we have two of them).
// It isn't beneficial to speculatively execute the code
// from the block that we know is predictably not entered.
- if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
+ bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
+ if (!IsUnpredictable) {
uint64_t TWeight, FWeight;
if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
(TWeight + FWeight) != 0) {
@@ -3549,8 +3550,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
InstructionCost Cost = 0;
- InstructionCost Budget =
- TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ unsigned Threshold = TwoEntryPHINodeFoldingThreshold;
+ if (IsUnpredictable)
+ Threshold += TTI.getBranchMispredictPenalty();
+ InstructionCost Budget = Threshold * TargetTransformInfo::TCC_Basic;
bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
diff --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
new file mode 100644
index 0000000000000..88aa8a619207d
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
@@ -0,0 +1,96 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; Two-entry phi nodes with unpredictable conditions may get increased budget for folding.
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-FOLD %s
+
+define { <2 x float>, <2 x float> } @foo(float %speed, <2 x float> %velocity.coerce0, <2 x float> %velocity.coerce1) {
+; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
+; CHECK-NOFOLD-SAME: float [[SPEED:%.*]], <2 x float> [[VELOCITY_COERCE0:%.*]], <2 x float> [[VELOCITY_COERCE1:%.*]]) {
+; CHECK-NOFOLD-NEXT: [[ENTRY:.*]]:
+; CHECK-NOFOLD-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[SPEED]], 0x3F747AE140000000
+; CHECK-NOFOLD-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]], !unpredictable [[META0:![0-9]+]]
+; CHECK-NOFOLD: [[IF_THEN]]:
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 0
+; CHECK-NOFOLD-NEXT: [[MUL_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_0_VEC_EXTRACT]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 1
+; CHECK-NOFOLD-NEXT: [[MUL8_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_4_VEC_EXTRACT]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
+; CHECK-NOFOLD-NEXT: [[ADD_I_I_I_I:%.*]] = fadd fast float [[MUL8_I_I_I_I]], [[MUL_I_I_I_I]]
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE1]], i64 0
+; CHECK-NOFOLD-NEXT: [[MUL13_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_14_8_VEC_EXTRACT]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
+; CHECK-NOFOLD-NEXT: [[ADD14_I_I_I_I:%.*]] = fadd fast float [[ADD_I_I_I_I]], [[MUL13_I_I_I_I]]
+; CHECK-NOFOLD-NEXT: [[TMP0:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[ADD14_I_I_I_I]])
+; CHECK-NOFOLD-NEXT: [[MUL_I_I_I:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[TMP0]]
+; CHECK-NOFOLD-NEXT: [[SUB_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
+; CHECK-NOFOLD-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[SUB_I]], i64 0
+; CHECK-NOFOLD-NEXT: [[SUB8_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_INSERT25:%.*]] = insertelement <2 x float> [[TMP1]], float [[SUB8_I]], i64 1
+; CHECK-NOFOLD-NEXT: [[SUB13_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_INSERT35:%.*]] = insertelement <2 x float> [[VELOCITY_COERCE1]], float [[SUB13_I]], i64 0
+; CHECK-NOFOLD-NEXT: br label %[[IF_END]]
+; CHECK-NOFOLD: [[IF_END]]:
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_0:%.*]] = phi nsz <2 x float> [ [[VELOCITY_SROA_0_4_VEC_INSERT25]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_0:%.*]] = phi nsz <2 x float> [ [[VELOCITY_SROA_14_8_VEC_INSERT35]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NOFOLD-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[VELOCITY_SROA_0_0]], 0
+; CHECK-NOFOLD-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[VELOCITY_SROA_14_0]], 1
+; CHECK-NOFOLD-NEXT: ret { <2 x float>, <2 x float> } [[DOTFCA_1_INSERT]]
+;
+; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
+; CHECK-FOLD-SAME: float [[SPEED:%.*]], <2 x float> [[VELOCITY_COERCE0:%.*]], <2 x float> [[VELOCITY_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-FOLD-NEXT: [[ENTRY:.*:]]
+; CHECK-FOLD-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[SPEED]], 0x3F747AE140000000
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 0
+; CHECK-FOLD-NEXT: [[MUL_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_0_VEC_EXTRACT]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 1
+; CHECK-FOLD-NEXT: [[MUL8_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_4_VEC_EXTRACT]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
+; CHECK-FOLD-NEXT: [[ADD_I_I_I_I:%.*]] = fadd fast float [[MUL8_I_I_I_I]], [[MUL_I_I_I_I]]
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE1]], i64 0
+; CHECK-FOLD-NEXT: [[MUL13_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_14_8_VEC_EXTRACT]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
+; CHECK-FOLD-NEXT: [[ADD14_I_I_I_I:%.*]] = fadd fast float [[ADD_I_I_I_I]], [[MUL13_I_I_I_I]]
+; CHECK-FOLD-NEXT: [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[ADD14_I_I_I_I]])
+; CHECK-FOLD-NEXT: [[MUL_I_I_I:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[TMP0]]
+; CHECK-FOLD-NEXT: [[SUB_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
+; CHECK-FOLD-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[SUB_I]], i64 0
+; CHECK-FOLD-NEXT: [[SUB8_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_INSERT25:%.*]] = insertelement <2 x float> [[TMP1]], float [[SUB8_I]], i64 1
+; CHECK-FOLD-NEXT: [[SUB13_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_INSERT35:%.*]] = insertelement <2 x float> [[VELOCITY_COERCE1]], float [[SUB13_I]], i64 0
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_0:%.*]] = select nsz i1 [[CMP]], <2 x float> [[VELOCITY_SROA_0_4_VEC_INSERT25]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]]
+; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_0:%.*]] = select nsz i1 [[CMP]], <2 x float> [[VELOCITY_SROA_14_8_VEC_INSERT35]], <2 x float> zeroinitializer, !unpredictable [[META0]]
+; CHECK-FOLD-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[VELOCITY_SROA_0_0]], 0
+; CHECK-FOLD-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[VELOCITY_SROA_14_0]], 1
+; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[DOTFCA_1_INSERT]]
+;
+entry:
+ %cmp = fcmp fast ogt float %speed, 0x3F747AE140000000
+ br i1 %cmp, label %if.then, label %if.end, !unpredictable !0
+
+if.then:
+ %velocity.sroa.0.0.vec.extract = extractelement <2 x float> %velocity.coerce0, i64 0
+ %mul.i.i.i.i = fmul fast float %velocity.sroa.0.0.vec.extract, %velocity.sroa.0.0.vec.extract
+ %velocity.sroa.0.4.vec.extract = extractelement <2 x float> %velocity.coerce0, i64 1
+ %mul8.i.i.i.i = fmul fast float %velocity.sroa.0.4.vec.extract, %velocity.sroa.0.4.vec.extract
+ %add.i.i.i.i = fadd fast float %mul8.i.i.i.i, %mul.i.i.i.i
+ %velocity.sroa.14.8.vec.extract = extractelement <2 x float> %velocity.coerce1, i64 0
+ %mul13.i.i.i.i = fmul fast float %velocity.sroa.14.8.vec.extract, %velocity.sroa.14.8.vec.extract
+ %add14.i.i.i.i = fadd fast float %add.i.i.i.i, %mul13.i.i.i.i
+ %0 = tail call fast noundef float @llvm.sqrt.f32(float %add14.i.i.i.i)
+ %mul.i.i.i = fdiv fast float 0x3FEFD70A40000000, %0
+ %sub.i = fmul fast float %mul.i.i.i, %velocity.sroa.0.0.vec.extract
+ %1 = insertelement <2 x float> poison, float %sub.i, i64 0
+ %sub8.i = fmul fast float %mul.i.i.i, %velocity.sroa.0.4.vec.extract
+ %velocity.sroa.0.4.vec.insert25 = insertelement <2 x float> %1, float %sub8.i, i64 1
+ %sub13.i = fmul fast float %mul.i.i.i, %velocity.sroa.14.8.vec.extract
+ %velocity.sroa.14.8.vec.insert35 = insertelement <2 x float> %velocity.coerce1, float %sub13.i, i64 0
+ br label %if.end
+
+if.end:
+ %velocity.sroa.0.0 = phi nsz <2 x float> [ %velocity.sroa.0.4.vec.insert25, %if.then ], [ zeroinitializer, %entry ]
+ %velocity.sroa.14.0 = phi nsz <2 x float> [ %velocity.sroa.14.8.vec.insert35, %if.then ], [ zeroinitializer, %entry ]
+ %.fca.0.insert = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %velocity.sroa.0.0, 0
+ %.fca.1.insert = insertvalue { <2 x float>, <2 x float> } %.fca.0.insert, <2 x float> %velocity.sroa.14.0, 1
+ ret { <2 x float>, <2 x float> } %.fca.1.insert
+}
+
+declare float @llvm.sqrt.f32(float)
+
+!0 = !{}
>From 645ba1fe958cc10fdd6e2c69f3f0f80d67a72e17 Mon Sep 17 00:00:00 2001
From: Tianqing Wang <tianqing.wang at intel.com>
Date: Wed, 17 Jul 2024 15:58:29 +0800
Subject: [PATCH 2/6] Add debug log.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 73687b5c31c64..049226af047c5 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3623,8 +3623,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
[](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
return Changed;
- LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond
- << " T: " << IfTrue->getName()
+ LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
+ if (IsUnpredictable) dbgs() << " (unpredictable)";
+ dbgs() << " T: " << IfTrue->getName()
<< " F: " << IfFalse->getName() << "\n");
// If we can still promote the PHI nodes after this gauntlet of tests,
>From da55feb74075271d0cd7a36575b9ee5c36400dda Mon Sep 17 00:00:00 2001
From: Tianqing Wang <tianqing.wang at intel.com>
Date: Fri, 19 Jul 2024 02:06:56 +0800
Subject: [PATCH 3/6] Returns InstructionCost for getBranchMispredictPenalty().
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 13 +++++++------
.../include/llvm/Analysis/TargetTransformInfoImpl.h | 2 +-
llvm/lib/Analysis/TargetTransformInfo.cpp | 2 +-
llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 2 +-
llvm/lib/Target/X86/X86TargetTransformInfo.h | 2 +-
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 5 ++---
6 files changed, 13 insertions(+), 13 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index bdcaeab033791..2411b2b31d293 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -419,10 +419,11 @@ class TargetTransformInfo {
/// this factor, it is very likely to be predicted correctly.
BranchProbability getPredictableBranchThreshold() const;
- // Returns an integer indicating how aggressive the target wants for
- // eliminating unpredictable branches. A zero return value means extra
- // optimization applied to them should be minimal.
- unsigned getBranchMispredictPenalty() const;
+ /// Returns estimated penalty of a branch misprediction in latency. Indicates
+ /// how aggressive the target wants for eliminating unpredictable branches. A
+ /// zero return value means extra optimization applied to them should be
+ /// minimal.
+ InstructionCost getBranchMispredictPenalty() const;
/// Return true if branch divergence exists.
///
@@ -1837,7 +1838,7 @@ class TargetTransformInfo::Concept {
ArrayRef<const Value *> Operands,
TargetCostKind CostKind) = 0;
virtual BranchProbability getPredictableBranchThreshold() = 0;
- virtual unsigned getBranchMispredictPenalty() = 0;
+ virtual InstructionCost getBranchMispredictPenalty() = 0;
virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
@@ -2249,7 +2250,7 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
BranchProbability getPredictableBranchThreshold() override {
return Impl.getPredictableBranchThreshold();
}
- unsigned getBranchMispredictPenalty() override {
+ InstructionCost getBranchMispredictPenalty() override {
return Impl.getBranchMispredictPenalty();
}
bool hasBranchDivergence(const Function *F = nullptr) override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a87470c6446c0..00efa474a91b5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -99,7 +99,7 @@ class TargetTransformInfoImplBase {
return BranchProbability(99, 100);
}
- unsigned getBranchMispredictPenalty() const { return 0; }
+ InstructionCost getBranchMispredictPenalty() const { return 0; }
bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index a1fcf5482d334..6a0fa98089ba5 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -279,7 +279,7 @@ BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
: TTIImpl->getPredictableBranchThreshold();
}
-unsigned TargetTransformInfo::getBranchMispredictPenalty() const {
+InstructionCost TargetTransformInfo::getBranchMispredictPenalty() const {
return TTIImpl->getBranchMispredictPenalty();
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 984586f4ae5f6..dc3ac80bdf5cf 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6757,7 +6757,7 @@ InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
return -1;
}
-unsigned X86TTIImpl::getBranchMispredictPenalty() const {
+InstructionCost X86TTIImpl::getBranchMispredictPenalty() const {
// TODO: Hook MispredictPenalty of SchedMachineModel into this.
return 14;
}
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index d2b5c093e7003..b619090e8e1e0 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -294,7 +294,7 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
bool supportsEfficientVectorElementLoadStore() const;
bool enableInterleavedAccessVectorization();
- unsigned getBranchMispredictPenalty() const;
+ InstructionCost getBranchMispredictPenalty() const;
private:
bool supportsGather() const;
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 049226af047c5..aa0cf7dd4fa6a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3550,10 +3550,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
InstructionCost Cost = 0;
- unsigned Threshold = TwoEntryPHINodeFoldingThreshold;
+ InstructionCost Budget = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
if (IsUnpredictable)
- Threshold += TTI.getBranchMispredictPenalty();
- InstructionCost Budget = Threshold * TargetTransformInfo::TCC_Basic;
+ Budget += TTI.getBranchMispredictPenalty();
bool Changed = false;
for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
>From b008d5f9be21d53d6f65f6749bfaa09675682b50 Mon Sep 17 00:00:00 2001
From: Tianqing Wang <tianqing.wang at intel.com>
Date: Fri, 19 Jul 2024 02:12:23 +0800
Subject: [PATCH 4/6] clang-format.
---
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index aa0cf7dd4fa6a..3ab1564e93e49 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3550,7 +3550,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
// that need to be moved to the dominating block.
SmallPtrSet<Instruction *, 4> AggressiveInsts;
InstructionCost Cost = 0;
- InstructionCost Budget = TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+ InstructionCost Budget =
+ TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
if (IsUnpredictable)
Budget += TTI.getBranchMispredictPenalty();
>From fa967f83a252aa6c983a518a59c0cbeba767bb10 Mon Sep 17 00:00:00 2001
From: Tianqing Wang <tianqing.wang at intel.com>
Date: Mon, 22 Jul 2024 13:33:01 +0800
Subject: [PATCH 5/6] Canonicalize the test.
---
.../two-entry-phi-fold-unpredictable.ll | 164 +++++++++---------
1 file changed, 84 insertions(+), 80 deletions(-)
diff --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
index 88aa8a619207d..0bce8e3ed7dd3 100644
--- a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
@@ -3,92 +3,96 @@
; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-FOLD %s
-define { <2 x float>, <2 x float> } @foo(float %speed, <2 x float> %velocity.coerce0, <2 x float> %velocity.coerce1) {
+define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x float> %arg2) {
; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
-; CHECK-NOFOLD-SAME: float [[SPEED:%.*]], <2 x float> [[VELOCITY_COERCE0:%.*]], <2 x float> [[VELOCITY_COERCE1:%.*]]) {
-; CHECK-NOFOLD-NEXT: [[ENTRY:.*]]:
-; CHECK-NOFOLD-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[SPEED]], 0x3F747AE140000000
-; CHECK-NOFOLD-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]], !unpredictable [[META0:![0-9]+]]
-; CHECK-NOFOLD: [[IF_THEN]]:
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 0
-; CHECK-NOFOLD-NEXT: [[MUL_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_0_VEC_EXTRACT]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 1
-; CHECK-NOFOLD-NEXT: [[MUL8_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_4_VEC_EXTRACT]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
-; CHECK-NOFOLD-NEXT: [[ADD_I_I_I_I:%.*]] = fadd fast float [[MUL8_I_I_I_I]], [[MUL_I_I_I_I]]
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE1]], i64 0
-; CHECK-NOFOLD-NEXT: [[MUL13_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_14_8_VEC_EXTRACT]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
-; CHECK-NOFOLD-NEXT: [[ADD14_I_I_I_I:%.*]] = fadd fast float [[ADD_I_I_I_I]], [[MUL13_I_I_I_I]]
-; CHECK-NOFOLD-NEXT: [[TMP0:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[ADD14_I_I_I_I]])
-; CHECK-NOFOLD-NEXT: [[MUL_I_I_I:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[TMP0]]
-; CHECK-NOFOLD-NEXT: [[SUB_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
-; CHECK-NOFOLD-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[SUB_I]], i64 0
-; CHECK-NOFOLD-NEXT: [[SUB8_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_INSERT25:%.*]] = insertelement <2 x float> [[TMP1]], float [[SUB8_I]], i64 1
-; CHECK-NOFOLD-NEXT: [[SUB13_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_INSERT35:%.*]] = insertelement <2 x float> [[VELOCITY_COERCE1]], float [[SUB13_I]], i64 0
-; CHECK-NOFOLD-NEXT: br label %[[IF_END]]
-; CHECK-NOFOLD: [[IF_END]]:
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_0_0:%.*]] = phi nsz <2 x float> [ [[VELOCITY_SROA_0_4_VEC_INSERT25]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NOFOLD-NEXT: [[VELOCITY_SROA_14_0:%.*]] = phi nsz <2 x float> [ [[VELOCITY_SROA_14_8_VEC_INSERT35]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NOFOLD-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[VELOCITY_SROA_0_0]], 0
-; CHECK-NOFOLD-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[VELOCITY_SROA_14_0]], 1
-; CHECK-NOFOLD-NEXT: ret { <2 x float>, <2 x float> } [[DOTFCA_1_INSERT]]
+; CHECK-NOFOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) {
+; CHECK-NOFOLD-NEXT: [[BB:.*]]:
+; CHECK-NOFOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
+; CHECK-NOFOLD-NEXT: br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
+; CHECK-NOFOLD: [[BB3]]:
+; CHECK-NOFOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
+; CHECK-NOFOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
+; CHECK-NOFOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
+; CHECK-NOFOLD-NEXT: [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
+; CHECK-NOFOLD-NEXT: [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
+; CHECK-NOFOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
+; CHECK-NOFOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
+; CHECK-NOFOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
+; CHECK-NOFOLD-NEXT: [[I12:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[I11]])
+; CHECK-NOFOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
+; CHECK-NOFOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
+; CHECK-NOFOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
+; CHECK-NOFOLD-NEXT: [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
+; CHECK-NOFOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
+; CHECK-NOFOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
+; CHECK-NOFOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
+; CHECK-NOFOLD-NEXT: br label %[[BB20]]
+; CHECK-NOFOLD: [[BB20]]:
+; CHECK-NOFOLD-NEXT: [[I21:%.*]] = phi nsz <2 x float> [ [[I17]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NOFOLD-NEXT: [[I22:%.*]] = phi nsz <2 x float> [ [[I19]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NOFOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
+; CHECK-NOFOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
+; CHECK-NOFOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
;
; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
-; CHECK-FOLD-SAME: float [[SPEED:%.*]], <2 x float> [[VELOCITY_COERCE0:%.*]], <2 x float> [[VELOCITY_COERCE1:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-FOLD-NEXT: [[ENTRY:.*:]]
-; CHECK-FOLD-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[SPEED]], 0x3F747AE140000000
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 0
-; CHECK-FOLD-NEXT: [[MUL_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_0_VEC_EXTRACT]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE0]], i64 1
-; CHECK-FOLD-NEXT: [[MUL8_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_0_4_VEC_EXTRACT]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
-; CHECK-FOLD-NEXT: [[ADD_I_I_I_I:%.*]] = fadd fast float [[MUL8_I_I_I_I]], [[MUL_I_I_I_I]]
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_EXTRACT:%.*]] = extractelement <2 x float> [[VELOCITY_COERCE1]], i64 0
-; CHECK-FOLD-NEXT: [[MUL13_I_I_I_I:%.*]] = fmul fast float [[VELOCITY_SROA_14_8_VEC_EXTRACT]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
-; CHECK-FOLD-NEXT: [[ADD14_I_I_I_I:%.*]] = fadd fast float [[ADD_I_I_I_I]], [[MUL13_I_I_I_I]]
-; CHECK-FOLD-NEXT: [[TMP0:%.*]] = tail call fast float @llvm.sqrt.f32(float [[ADD14_I_I_I_I]])
-; CHECK-FOLD-NEXT: [[MUL_I_I_I:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[TMP0]]
-; CHECK-FOLD-NEXT: [[SUB_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_0_VEC_EXTRACT]]
-; CHECK-FOLD-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[SUB_I]], i64 0
-; CHECK-FOLD-NEXT: [[SUB8_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_0_4_VEC_EXTRACT]]
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_4_VEC_INSERT25:%.*]] = insertelement <2 x float> [[TMP1]], float [[SUB8_I]], i64 1
-; CHECK-FOLD-NEXT: [[SUB13_I:%.*]] = fmul fast float [[MUL_I_I_I]], [[VELOCITY_SROA_14_8_VEC_EXTRACT]]
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_8_VEC_INSERT35:%.*]] = insertelement <2 x float> [[VELOCITY_COERCE1]], float [[SUB13_I]], i64 0
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_0_0:%.*]] = select nsz i1 [[CMP]], <2 x float> [[VELOCITY_SROA_0_4_VEC_INSERT25]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]]
-; CHECK-FOLD-NEXT: [[VELOCITY_SROA_14_0:%.*]] = select nsz i1 [[CMP]], <2 x float> [[VELOCITY_SROA_14_8_VEC_INSERT35]], <2 x float> zeroinitializer, !unpredictable [[META0]]
-; CHECK-FOLD-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[VELOCITY_SROA_0_0]], 0
-; CHECK-FOLD-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue { <2 x float>, <2 x float> } [[DOTFCA_0_INSERT]], <2 x float> [[VELOCITY_SROA_14_0]], 1
-; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[DOTFCA_1_INSERT]]
+; CHECK-FOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-FOLD-NEXT: [[BB:.*]]:
+; CHECK-FOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
+; CHECK-FOLD-NEXT: br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
+; CHECK-FOLD: [[BB3]]:
+; CHECK-FOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
+; CHECK-FOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
+; CHECK-FOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
+; CHECK-FOLD-NEXT: [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
+; CHECK-FOLD-NEXT: [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
+; CHECK-FOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
+; CHECK-FOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
+; CHECK-FOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
+; CHECK-FOLD-NEXT: [[I12:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[I11]])
+; CHECK-FOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
+; CHECK-FOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
+; CHECK-FOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
+; CHECK-FOLD-NEXT: [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
+; CHECK-FOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
+; CHECK-FOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
+; CHECK-FOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
+; CHECK-FOLD-NEXT: br label %[[BB20]]
+; CHECK-FOLD: [[BB20]]:
+; CHECK-FOLD-NEXT: [[I21:%.*]] = phi nsz <2 x float> [ [[I17]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-FOLD-NEXT: [[I22:%.*]] = phi nsz <2 x float> [ [[I19]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-FOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
+; CHECK-FOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
+; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
;
-entry:
- %cmp = fcmp fast ogt float %speed, 0x3F747AE140000000
- br i1 %cmp, label %if.then, label %if.end, !unpredictable !0
+bb:
+ %i = fcmp fast ogt float %arg, 0x3F747AE140000000
+ br i1 %i, label %bb3, label %bb20, !unpredictable !0
-if.then:
- %velocity.sroa.0.0.vec.extract = extractelement <2 x float> %velocity.coerce0, i64 0
- %mul.i.i.i.i = fmul fast float %velocity.sroa.0.0.vec.extract, %velocity.sroa.0.0.vec.extract
- %velocity.sroa.0.4.vec.extract = extractelement <2 x float> %velocity.coerce0, i64 1
- %mul8.i.i.i.i = fmul fast float %velocity.sroa.0.4.vec.extract, %velocity.sroa.0.4.vec.extract
- %add.i.i.i.i = fadd fast float %mul8.i.i.i.i, %mul.i.i.i.i
- %velocity.sroa.14.8.vec.extract = extractelement <2 x float> %velocity.coerce1, i64 0
- %mul13.i.i.i.i = fmul fast float %velocity.sroa.14.8.vec.extract, %velocity.sroa.14.8.vec.extract
- %add14.i.i.i.i = fadd fast float %add.i.i.i.i, %mul13.i.i.i.i
- %0 = tail call fast noundef float @llvm.sqrt.f32(float %add14.i.i.i.i)
- %mul.i.i.i = fdiv fast float 0x3FEFD70A40000000, %0
- %sub.i = fmul fast float %mul.i.i.i, %velocity.sroa.0.0.vec.extract
- %1 = insertelement <2 x float> poison, float %sub.i, i64 0
- %sub8.i = fmul fast float %mul.i.i.i, %velocity.sroa.0.4.vec.extract
- %velocity.sroa.0.4.vec.insert25 = insertelement <2 x float> %1, float %sub8.i, i64 1
- %sub13.i = fmul fast float %mul.i.i.i, %velocity.sroa.14.8.vec.extract
- %velocity.sroa.14.8.vec.insert35 = insertelement <2 x float> %velocity.coerce1, float %sub13.i, i64 0
- br label %if.end
+bb3: ; preds = %bb
+ %i4 = extractelement <2 x float> %arg1, i64 0
+ %i5 = fmul fast float %i4, %i4
+ %i6 = extractelement <2 x float> %arg1, i64 1
+ %i7 = fmul fast float %i6, %i6
+ %i8 = fadd fast float %i7, %i5
+ %i9 = extractelement <2 x float> %arg2, i64 0
+ %i10 = fmul fast float %i9, %i9
+ %i11 = fadd fast float %i8, %i10
+ %i12 = tail call fast noundef float @llvm.sqrt.f32(float %i11)
+ %i13 = fdiv fast float 0x3FEFD70A40000000, %i12
+ %i14 = fmul fast float %i13, %i4
+ %i15 = insertelement <2 x float> poison, float %i14, i64 0
+ %i16 = fmul fast float %i13, %i6
+ %i17 = insertelement <2 x float> %i15, float %i16, i64 1
+ %i18 = fmul fast float %i13, %i9
+ %i19 = insertelement <2 x float> %arg2, float %i18, i64 0
+ br label %bb20
-if.end:
- %velocity.sroa.0.0 = phi nsz <2 x float> [ %velocity.sroa.0.4.vec.insert25, %if.then ], [ zeroinitializer, %entry ]
- %velocity.sroa.14.0 = phi nsz <2 x float> [ %velocity.sroa.14.8.vec.insert35, %if.then ], [ zeroinitializer, %entry ]
- %.fca.0.insert = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %velocity.sroa.0.0, 0
- %.fca.1.insert = insertvalue { <2 x float>, <2 x float> } %.fca.0.insert, <2 x float> %velocity.sroa.14.0, 1
- ret { <2 x float>, <2 x float> } %.fca.1.insert
+bb20: ; preds = %bb3, %bb
+ %i21 = phi nsz <2 x float> [ %i17, %bb3 ], [ zeroinitializer, %bb ]
+ %i22 = phi nsz <2 x float> [ %i19, %bb3 ], [ zeroinitializer, %bb ]
+ %i23 = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %i21, 0
+ %i24 = insertvalue { <2 x float>, <2 x float> } %i23, <2 x float> %i22, 1
+ ret { <2 x float>, <2 x float> } %i24
}
declare float @llvm.sqrt.f32(float)
>From 7a8e32baab9fa05d7b8121d0178195ef59ddd470 Mon Sep 17 00:00:00 2001
From: Tianqing Wang <tianqing.wang at intel.com>
Date: Mon, 22 Jul 2024 14:53:57 +0800
Subject: [PATCH 6/6] Add "speculate-unpredictables" SimplifyCFGOptions.
---
.../Transforms/Utils/SimplifyCFGOptions.h | 5 +++++
llvm/lib/Passes/PassBuilder.cpp | 2 ++
llvm/lib/Passes/PassBuilderPipelines.cpp | 12 +++++-----
.../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 9 +++++++-
llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 ++++---
llvm/test/Other/new-pm-print-pipeline.ll | 4 ++--
.../two-entry-phi-fold-unpredictable.ll | 22 +++++++++----------
7 files changed, 40 insertions(+), 22 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index 8008fc6e8422d..2ea9d64f03cb6 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -30,6 +30,7 @@ struct SimplifyCFGOptions {
bool SinkCommonInsts = false;
bool SimplifyCondBranch = true;
bool SpeculateBlocks = true;
+ bool SpeculateUnpredictables = false;
AssumptionCache *AC = nullptr;
@@ -75,6 +76,10 @@ struct SimplifyCFGOptions {
SpeculateBlocks = B;
return *this;
}
+ SimplifyCFGOptions &speculateUnpredictables(bool B) {
+ SpeculateUnpredictables = B;
+ return *this;
+ }
};
} // namespace llvm
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index a9d3f8ec3a4ec..ade331166f994 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -845,6 +845,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
Result.hoistCommonInsts(Enable);
} else if (ParamName == "sink-common-insts") {
Result.sinkCommonInsts(Enable);
+ } else if (ParamName == "speculate-unpredictables") {
+ Result.speculateUnpredictables(Enable);
} else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
APInt BonusInstThreshold;
if (ParamName.getAsInteger(0, BonusInstThreshold))
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 4fd5ee1946bb7..010f7247d96fd 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1505,8 +1505,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
// LoopSink (and other loop passes since the last simplifyCFG) might have
// resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
- OptimizePM.addPass(
- SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
+ OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .convertSwitchRangeToICmp(true)
+ .speculateUnpredictables(true)));
// Add the core optimizing pipeline.
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
@@ -2024,9 +2025,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
LateFPM.addPass(DivRemPairsPass());
// Delete basic blocks, which optimization passes may have killed.
- LateFPM.addPass(SimplifyCFGPass(
- SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
- true)));
+ LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+ .convertSwitchRangeToICmp(true)
+ .hoistCommonInsts(true)
+ .speculateUnpredictables(true)));
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
// Drop bodies of available eternally objects to improve GlobalDCE.
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b7baf34f27c21..11de37f7a7c10 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -77,6 +77,9 @@ static cl::opt<bool> UserSinkCommonInsts(
"sink-common-insts", cl::Hidden, cl::init(false),
cl::desc("Sink common instructions (default = false)"));
+static cl::opt<bool> UserSpeculateUnpredictables(
+ "speculate-unpredictables", cl::Hidden, cl::init(false),
+ cl::desc("Speculate unpredictable branches (default = false)"));
STATISTIC(NumSimpl, "Number of blocks simplified");
@@ -325,6 +328,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
Options.HoistCommonInsts = UserHoistCommonInsts;
if (UserSinkCommonInsts.getNumOccurrences())
Options.SinkCommonInsts = UserSinkCommonInsts;
+ if (UserSpeculateUnpredictables.getNumOccurrences())
+ Options.SpeculateUnpredictables = UserSpeculateUnpredictables;
}
SimplifyCFGPass::SimplifyCFGPass() {
@@ -351,7 +356,9 @@ void SimplifyCFGPass::printPipeline(
OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
- OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
+ OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
+ OS << (Options.SpeculateUnpredictables ? "" : "no-")
+ << "speculate-unpredictables";
OS << '>';
}
diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3ab1564e93e49..f23e28888931d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3476,7 +3476,8 @@ static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
/// Given a BB that starts with the specified two-entry PHI node,
/// see if we can eliminate it.
static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
- DomTreeUpdater *DTU, const DataLayout &DL) {
+ DomTreeUpdater *DTU, const DataLayout &DL,
+ bool SpeculateUnpredictables) {
// Ok, this is a two entry PHI node. Check to see if this is a simple "if
// statement", which has a very simple dominance structure. Basically, we
// are trying to find the condition that is being branched on, which
@@ -3552,7 +3553,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
InstructionCost Cost = 0;
InstructionCost Budget =
TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
- if (IsUnpredictable)
+ if (SpeculateUnpredictables && IsUnpredictable)
Budget += TTI.getBranchMispredictPenalty();
bool Changed = false;
@@ -7818,7 +7819,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
// eliminate it, do so now.
if (auto *PN = dyn_cast<PHINode>(BB->begin()))
if (PN->getNumIncomingValues() == 2)
- if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
+ if (FoldTwoEntryPHINode(PN, TTI, DTU, DL,
+ Options.SpeculateUnpredictables))
return true;
}
diff --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll
index a524c9991f1bf..f2e80814f347a 100644
--- a/llvm/test/Other/new-pm-print-pipeline.ll
+++ b/llvm/test/Other/new-pm-print-pipeline.ll
@@ -49,8 +49,8 @@
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)
-; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
-; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)
+; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
+; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)
diff --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
index 0bce8e3ed7dd3..82566d47b0328 100644
--- a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
+++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
@@ -1,11 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; Two-entry phi nodes with unpredictable conditions may get increased budget for folding.
; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-FOLD %s
+; RUN: opt < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-NOFOLD %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-FOLD %s
-define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x float> %arg2) {
+define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
-; CHECK-NOFOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) {
+; CHECK-NOFOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NOFOLD-NEXT: [[BB:.*]]:
; CHECK-NOFOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
; CHECK-NOFOLD-NEXT: br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
@@ -36,10 +38,8 @@ define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x flo
;
; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
; CHECK-FOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-FOLD-NEXT: [[BB:.*]]:
+; CHECK-FOLD-NEXT: [[BB:.*:]]
; CHECK-FOLD-NEXT: [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
-; CHECK-FOLD-NEXT: br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
-; CHECK-FOLD: [[BB3]]:
; CHECK-FOLD-NEXT: [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
; CHECK-FOLD-NEXT: [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
; CHECK-FOLD-NEXT: [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
@@ -48,7 +48,7 @@ define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x flo
; CHECK-FOLD-NEXT: [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
; CHECK-FOLD-NEXT: [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
; CHECK-FOLD-NEXT: [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
-; CHECK-FOLD-NEXT: [[I12:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[I11]])
+; CHECK-FOLD-NEXT: [[I12:%.*]] = tail call fast float @llvm.sqrt.f32(float [[I11]])
; CHECK-FOLD-NEXT: [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
; CHECK-FOLD-NEXT: [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
; CHECK-FOLD-NEXT: [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
@@ -56,10 +56,8 @@ define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x flo
; CHECK-FOLD-NEXT: [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
; CHECK-FOLD-NEXT: [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
; CHECK-FOLD-NEXT: [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
-; CHECK-FOLD-NEXT: br label %[[BB20]]
-; CHECK-FOLD: [[BB20]]:
-; CHECK-FOLD-NEXT: [[I21:%.*]] = phi nsz <2 x float> [ [[I17]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
-; CHECK-FOLD-NEXT: [[I22:%.*]] = phi nsz <2 x float> [ [[I19]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-FOLD-NEXT: [[I21:%.*]] = select nsz i1 [[I]], <2 x float> [[I17]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]]
+; CHECK-FOLD-NEXT: [[I22:%.*]] = select nsz i1 [[I]], <2 x float> [[I19]], <2 x float> zeroinitializer, !unpredictable [[META0]]
; CHECK-FOLD-NEXT: [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
; CHECK-FOLD-NEXT: [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
; CHECK-FOLD-NEXT: ret { <2 x float>, <2 x float> } [[I24]]
@@ -97,4 +95,6 @@ bb20: ; preds = %bb3, %bb
declare float @llvm.sqrt.f32(float)
+attributes #0 = { nounwind }
+
!0 = !{}
More information about the llvm-commits
mailing list