[llvm] 3d494bf - [SimplifyCFG] Increase budget for FoldTwoEntryPHINode() if the branch is unpredictable. (#98495)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 22 16:47:25 PDT 2024


Author: Tianqing Wang
Date: 2024-07-23T07:47:21+08:00
New Revision: 3d494bfc7ff73cb0a8dbe16ac41db6f47910eef1

URL: https://github.com/llvm/llvm-project/commit/3d494bfc7ff73cb0a8dbe16ac41db6f47910eef1
DIFF: https://github.com/llvm/llvm-project/commit/3d494bfc7ff73cb0a8dbe16ac41db6f47910eef1.diff

LOG: [SimplifyCFG] Increase budget for FoldTwoEntryPHINode() if the branch is unpredictable. (#98495)

The `!unpredictable` metadata has been present for a long time, but
it's usage in optimizations is still limited. This patch teaches
`FoldTwoEntryPHINode()` to be more aggressive with an unpredictable
branch to reduce mispredictions.

A TTI interface `getBranchMispredictPenalty()` is added to distinguish
between different hardwares to ensure we don't go too far for simpler
cores. For simplicity, only a naive x86 implementation is included for
the time being.

Added: 
    llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll

Modified: 
    llvm/include/llvm/Analysis/TargetTransformInfo.h
    llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
    llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
    llvm/lib/Analysis/TargetTransformInfo.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassBuilderPipelines.cpp
    llvm/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/lib/Target/X86/X86TargetTransformInfo.h
    llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
    llvm/lib/Transforms/Utils/SimplifyCFG.cpp
    llvm/test/Other/new-pm-print-pipeline.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index cf378008e4c7c..2411b2b31d293 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -419,6 +419,12 @@ class TargetTransformInfo {
   /// this factor, it is very likely to be predicted correctly.
   BranchProbability getPredictableBranchThreshold() const;
 
+  /// Returns estimated penalty of a branch misprediction in latency. Indicates
+  /// how aggressive the target wants for eliminating unpredictable branches. A
+  /// zero return value means extra optimization applied to them should be
+  /// minimal.
+  InstructionCost getBranchMispredictPenalty() const;
+
   /// Return true if branch divergence exists.
   ///
   /// Branch divergence has a significantly negative impact on GPU performance
@@ -1832,6 +1838,7 @@ class TargetTransformInfo::Concept {
                                              ArrayRef<const Value *> Operands,
                                              TargetCostKind CostKind) = 0;
   virtual BranchProbability getPredictableBranchThreshold() = 0;
+  virtual InstructionCost getBranchMispredictPenalty() = 0;
   virtual bool hasBranchDivergence(const Function *F = nullptr) = 0;
   virtual bool isSourceOfDivergence(const Value *V) = 0;
   virtual bool isAlwaysUniform(const Value *V) = 0;
@@ -2243,6 +2250,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   BranchProbability getPredictableBranchThreshold() override {
     return Impl.getPredictableBranchThreshold();
   }
+  InstructionCost getBranchMispredictPenalty() override {
+    return Impl.getBranchMispredictPenalty();
+  }
   bool hasBranchDivergence(const Function *F = nullptr) override {
     return Impl.hasBranchDivergence(F);
   }

diff  --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 47fde08735c0c..00efa474a91b5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -99,6 +99,8 @@ class TargetTransformInfoImplBase {
     return BranchProbability(99, 100);
   }
 
+  InstructionCost getBranchMispredictPenalty() const { return 0; }
+
   bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
 
   bool isSourceOfDivergence(const Value *V) const { return false; }

diff  --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index 8008fc6e8422d..2ea9d64f03cb6 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -30,6 +30,7 @@ struct SimplifyCFGOptions {
   bool SinkCommonInsts = false;
   bool SimplifyCondBranch = true;
   bool SpeculateBlocks = true;
+  bool SpeculateUnpredictables = false;
 
   AssumptionCache *AC = nullptr;
 
@@ -75,6 +76,10 @@ struct SimplifyCFGOptions {
     SpeculateBlocks = B;
     return *this;
   }
+  SimplifyCFGOptions &speculateUnpredictables(bool B) {
+    SpeculateUnpredictables = B;
+    return *this;
+  }
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 693f7a5bb7af5..6a0fa98089ba5 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -279,6 +279,10 @@ BranchProbability TargetTransformInfo::getPredictableBranchThreshold() const {
              : TTIImpl->getPredictableBranchThreshold();
 }
 
+InstructionCost TargetTransformInfo::getBranchMispredictPenalty() const {
+  return TTIImpl->getBranchMispredictPenalty();
+}
+
 bool TargetTransformInfo::hasBranchDivergence(const Function *F) const {
   return TTIImpl->hasBranchDivergence(F);
 }

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 63d2f633784e9..5dbb1e2f49871 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -845,6 +845,8 @@ Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
       Result.hoistCommonInsts(Enable);
     } else if (ParamName == "sink-common-insts") {
       Result.sinkCommonInsts(Enable);
+    } else if (ParamName == "speculate-unpredictables") {
+      Result.speculateUnpredictables(Enable);
     } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
       APInt BonusInstThreshold;
       if (ParamName.getAsInteger(0, BonusInstThreshold))

diff  --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
index 935504b070d2e..6f36bdad780ae 100644
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -1515,8 +1515,9 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
 
   // LoopSink (and other loop passes since the last simplifyCFG) might have
   // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
-  OptimizePM.addPass(
-      SimplifyCFGPass(SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
+  OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+                                         .convertSwitchRangeToICmp(true)
+                                         .speculateUnpredictables(true)));
 
   // Add the core optimizing pipeline.
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM),
@@ -2034,9 +2035,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
   LateFPM.addPass(DivRemPairsPass());
 
   // Delete basic blocks, which optimization passes may have killed.
-  LateFPM.addPass(SimplifyCFGPass(
-      SimplifyCFGOptions().convertSwitchRangeToICmp(true).hoistCommonInsts(
-          true)));
+  LateFPM.addPass(SimplifyCFGPass(SimplifyCFGOptions()
+                                      .convertSwitchRangeToICmp(true)
+                                      .hoistCommonInsts(true)
+                                      .speculateUnpredictables(true)));
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
 
   // Drop bodies of available eternally objects to improve GlobalDCE.

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 32a3683355b72..dc3ac80bdf5cf 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6756,3 +6756,8 @@ InstructionCost X86TTIImpl::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
     return AM.Scale != 0;
   return -1;
 }
+
+InstructionCost X86TTIImpl::getBranchMispredictPenalty() const {
+  // TODO: Hook MispredictPenalty of SchedMachineModel into this.
+  return 14;
+}

diff  --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 5eccb1aea308d..b619090e8e1e0 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -294,6 +294,8 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   bool supportsEfficientVectorElementLoadStore() const;
   bool enableInterleavedAccessVectorization();
 
+  InstructionCost getBranchMispredictPenalty() const;
+
 private:
   bool supportsGather() const;
   InstructionCost getGSVectorCost(unsigned Opcode, TTI::TargetCostKind CostKind,

diff  --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index b7baf34f27c21..11de37f7a7c10 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -77,6 +77,9 @@ static cl::opt<bool> UserSinkCommonInsts(
     "sink-common-insts", cl::Hidden, cl::init(false),
     cl::desc("Sink common instructions (default = false)"));
 
+static cl::opt<bool> UserSpeculateUnpredictables(
+    "speculate-unpredictables", cl::Hidden, cl::init(false),
+    cl::desc("Speculate unpredictable branches (default = false)"));
 
 STATISTIC(NumSimpl, "Number of blocks simplified");
 
@@ -325,6 +328,8 @@ static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
     Options.HoistCommonInsts = UserHoistCommonInsts;
   if (UserSinkCommonInsts.getNumOccurrences())
     Options.SinkCommonInsts = UserSinkCommonInsts;
+  if (UserSpeculateUnpredictables.getNumOccurrences())
+    Options.SpeculateUnpredictables = UserSpeculateUnpredictables;
 }
 
 SimplifyCFGPass::SimplifyCFGPass() {
@@ -351,7 +356,9 @@ void SimplifyCFGPass::printPipeline(
   OS << (Options.HoistCommonInsts ? "" : "no-") << "hoist-common-insts;";
   OS << (Options.SinkCommonInsts ? "" : "no-") << "sink-common-insts;";
   OS << (Options.SpeculateBlocks ? "" : "no-") << "speculate-blocks;";
-  OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch";
+  OS << (Options.SimplifyCondBranch ? "" : "no-") << "simplify-cond-branch;";
+  OS << (Options.SpeculateUnpredictables ? "" : "no-")
+     << "speculate-unpredictables";
   OS << '>';
 }
 

diff  --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 8f717cb43bcb4..f23e28888931d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -3476,7 +3476,8 @@ static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
 /// Given a BB that starts with the specified two-entry PHI node,
 /// see if we can eliminate it.
 static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
-                                DomTreeUpdater *DTU, const DataLayout &DL) {
+                                DomTreeUpdater *DTU, const DataLayout &DL,
+                                bool SpeculateUnpredictables) {
   // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
   // statement", which has a very simple dominance structure.  Basically, we
   // are trying to find the condition that is being branched on, which
@@ -3508,7 +3509,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   // jump to one specific 'then' block (if we have two of them).
   // It isn't beneficial to speculatively execute the code
   // from the block that we know is predictably not entered.
-  if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
+  bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
+  if (!IsUnpredictable) {
     uint64_t TWeight, FWeight;
     if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
         (TWeight + FWeight) != 0) {
@@ -3551,6 +3553,8 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   InstructionCost Cost = 0;
   InstructionCost Budget =
       TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
+  if (SpeculateUnpredictables && IsUnpredictable)
+    Budget += TTI.getBranchMispredictPenalty();
 
   bool Changed = false;
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
@@ -3620,8 +3624,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
              [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
     return Changed;
 
-  LLVM_DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond
-                    << "  T: " << IfTrue->getName()
+  LLVM_DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond;
+             if (IsUnpredictable) dbgs() << " (unpredictable)";
+             dbgs() << "  T: " << IfTrue->getName()
                     << "  F: " << IfFalse->getName() << "\n");
 
   // If we can still promote the PHI nodes after this gauntlet of tests,
@@ -7814,7 +7819,8 @@ bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
     // eliminate it, do so now.
     if (auto *PN = dyn_cast<PHINode>(BB->begin()))
       if (PN->getNumIncomingValues() == 2)
-        if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
+        if (FoldTwoEntryPHINode(PN, TTI, DTU, DL,
+                                Options.SpeculateUnpredictables))
           return true;
   }
 

diff  --git a/llvm/test/Other/new-pm-print-pipeline.ll b/llvm/test/Other/new-pm-print-pipeline.ll
index a524c9991f1bf..f2e80814f347a 100644
--- a/llvm/test/Other/new-pm-print-pipeline.ll
+++ b/llvm/test/Other/new-pm-print-pipeline.ll
@@ -49,8 +49,8 @@
 ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(print<stack-lifetime><may>,print<stack-lifetime><must>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-17
 ; CHECK-17: function(print<stack-lifetime><may>,print<stack-lifetime><must>)
 
-; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
-; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch>)
+; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-18
+; CHECK-18: function(simplifycfg<bonus-inst-threshold=5;forward-switch-cond;no-switch-range-to-icmp;switch-to-lookup;keep-loops;hoist-common-insts;sink-common-insts;speculate-blocks;simplify-cond-branch;speculate-unpredictables>,simplifycfg<bonus-inst-threshold=7;no-forward-switch-cond;no-switch-range-to-icmp;no-switch-to-lookup;no-keep-loops;no-hoist-common-insts;no-sink-common-insts;no-speculate-blocks;no-simplify-cond-branch;no-speculate-unpredictables>)
 
 ; RUN: opt -disable-output -disable-verify -print-pipeline-passes -passes='function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only>,loop-vectorize<interleave-forced-only;vectorize-forced-only>)' < %s | FileCheck %s --match-full-lines --check-prefixes=CHECK-19
 ; CHECK-19: function(loop-vectorize<no-interleave-forced-only;no-vectorize-forced-only;>,loop-vectorize<interleave-forced-only;vectorize-forced-only;>)

diff  --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
new file mode 100644
index 0000000000000..82566d47b0328
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-unpredictable.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; Two-entry phi nodes with unpredictable conditions may get increased budget for folding.
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
+; RUN: opt < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-NOFOLD %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes=simplifycfg | FileCheck --check-prefix=CHECK-NOFOLD %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s -S -passes='simplifycfg<speculate-unpredictables>' | FileCheck --check-prefix=CHECK-FOLD %s
+
+define { <2 x float>, <2 x float> } @foo(float %arg, <2 x float> %arg1, <2 x float> %arg2) #0 {
+; CHECK-NOFOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
+; CHECK-NOFOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NOFOLD-NEXT:  [[BB:.*]]:
+; CHECK-NOFOLD-NEXT:    [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
+; CHECK-NOFOLD-NEXT:    br i1 [[I]], label %[[BB3:.*]], label %[[BB20:.*]], !unpredictable [[META0:![0-9]+]]
+; CHECK-NOFOLD:       [[BB3]]:
+; CHECK-NOFOLD-NEXT:    [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
+; CHECK-NOFOLD-NEXT:    [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
+; CHECK-NOFOLD-NEXT:    [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
+; CHECK-NOFOLD-NEXT:    [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
+; CHECK-NOFOLD-NEXT:    [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
+; CHECK-NOFOLD-NEXT:    [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
+; CHECK-NOFOLD-NEXT:    [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
+; CHECK-NOFOLD-NEXT:    [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
+; CHECK-NOFOLD-NEXT:    [[I12:%.*]] = tail call fast noundef float @llvm.sqrt.f32(float [[I11]])
+; CHECK-NOFOLD-NEXT:    [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
+; CHECK-NOFOLD-NEXT:    [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
+; CHECK-NOFOLD-NEXT:    [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
+; CHECK-NOFOLD-NEXT:    [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
+; CHECK-NOFOLD-NEXT:    [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
+; CHECK-NOFOLD-NEXT:    [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
+; CHECK-NOFOLD-NEXT:    [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
+; CHECK-NOFOLD-NEXT:    br label %[[BB20]]
+; CHECK-NOFOLD:       [[BB20]]:
+; CHECK-NOFOLD-NEXT:    [[I21:%.*]] = phi nsz <2 x float> [ [[I17]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NOFOLD-NEXT:    [[I22:%.*]] = phi nsz <2 x float> [ [[I19]], %[[BB3]] ], [ zeroinitializer, %[[BB]] ]
+; CHECK-NOFOLD-NEXT:    [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
+; CHECK-NOFOLD-NEXT:    [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
+; CHECK-NOFOLD-NEXT:    ret { <2 x float>, <2 x float> } [[I24]]
+;
+; CHECK-FOLD-LABEL: define { <2 x float>, <2 x float> } @foo(
+; CHECK-FOLD-SAME: float [[ARG:%.*]], <2 x float> [[ARG1:%.*]], <2 x float> [[ARG2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-FOLD-NEXT:  [[BB:.*:]]
+; CHECK-FOLD-NEXT:    [[I:%.*]] = fcmp fast ogt float [[ARG]], 0x3F747AE140000000
+; CHECK-FOLD-NEXT:    [[I4:%.*]] = extractelement <2 x float> [[ARG1]], i64 0
+; CHECK-FOLD-NEXT:    [[I5:%.*]] = fmul fast float [[I4]], [[I4]]
+; CHECK-FOLD-NEXT:    [[I6:%.*]] = extractelement <2 x float> [[ARG1]], i64 1
+; CHECK-FOLD-NEXT:    [[I7:%.*]] = fmul fast float [[I6]], [[I6]]
+; CHECK-FOLD-NEXT:    [[I8:%.*]] = fadd fast float [[I7]], [[I5]]
+; CHECK-FOLD-NEXT:    [[I9:%.*]] = extractelement <2 x float> [[ARG2]], i64 0
+; CHECK-FOLD-NEXT:    [[I10:%.*]] = fmul fast float [[I9]], [[I9]]
+; CHECK-FOLD-NEXT:    [[I11:%.*]] = fadd fast float [[I8]], [[I10]]
+; CHECK-FOLD-NEXT:    [[I12:%.*]] = tail call fast float @llvm.sqrt.f32(float [[I11]])
+; CHECK-FOLD-NEXT:    [[I13:%.*]] = fdiv fast float 0x3FEFD70A40000000, [[I12]]
+; CHECK-FOLD-NEXT:    [[I14:%.*]] = fmul fast float [[I13]], [[I4]]
+; CHECK-FOLD-NEXT:    [[I15:%.*]] = insertelement <2 x float> poison, float [[I14]], i64 0
+; CHECK-FOLD-NEXT:    [[I16:%.*]] = fmul fast float [[I13]], [[I6]]
+; CHECK-FOLD-NEXT:    [[I17:%.*]] = insertelement <2 x float> [[I15]], float [[I16]], i64 1
+; CHECK-FOLD-NEXT:    [[I18:%.*]] = fmul fast float [[I13]], [[I9]]
+; CHECK-FOLD-NEXT:    [[I19:%.*]] = insertelement <2 x float> [[ARG2]], float [[I18]], i64 0
+; CHECK-FOLD-NEXT:    [[I21:%.*]] = select nsz i1 [[I]], <2 x float> [[I17]], <2 x float> zeroinitializer, !unpredictable [[META0:![0-9]+]]
+; CHECK-FOLD-NEXT:    [[I22:%.*]] = select nsz i1 [[I]], <2 x float> [[I19]], <2 x float> zeroinitializer, !unpredictable [[META0]]
+; CHECK-FOLD-NEXT:    [[I23:%.*]] = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> [[I21]], 0
+; CHECK-FOLD-NEXT:    [[I24:%.*]] = insertvalue { <2 x float>, <2 x float> } [[I23]], <2 x float> [[I22]], 1
+; CHECK-FOLD-NEXT:    ret { <2 x float>, <2 x float> } [[I24]]
+;
+bb:
+  %i = fcmp fast ogt float %arg, 0x3F747AE140000000
+  br i1 %i, label %bb3, label %bb20, !unpredictable !0
+
+bb3:                                              ; preds = %bb
+  %i4 = extractelement <2 x float> %arg1, i64 0
+  %i5 = fmul fast float %i4, %i4
+  %i6 = extractelement <2 x float> %arg1, i64 1
+  %i7 = fmul fast float %i6, %i6
+  %i8 = fadd fast float %i7, %i5
+  %i9 = extractelement <2 x float> %arg2, i64 0
+  %i10 = fmul fast float %i9, %i9
+  %i11 = fadd fast float %i8, %i10
+  %i12 = tail call fast noundef float @llvm.sqrt.f32(float %i11)
+  %i13 = fdiv fast float 0x3FEFD70A40000000, %i12
+  %i14 = fmul fast float %i13, %i4
+  %i15 = insertelement <2 x float> poison, float %i14, i64 0
+  %i16 = fmul fast float %i13, %i6
+  %i17 = insertelement <2 x float> %i15, float %i16, i64 1
+  %i18 = fmul fast float %i13, %i9
+  %i19 = insertelement <2 x float> %arg2, float %i18, i64 0
+  br label %bb20
+
+bb20:                                             ; preds = %bb3, %bb
+  %i21 = phi nsz <2 x float> [ %i17, %bb3 ], [ zeroinitializer, %bb ]
+  %i22 = phi nsz <2 x float> [ %i19, %bb3 ], [ zeroinitializer, %bb ]
+  %i23 = insertvalue { <2 x float>, <2 x float> } poison, <2 x float> %i21, 0
+  %i24 = insertvalue { <2 x float>, <2 x float> } %i23, <2 x float> %i22, 1
+  ret { <2 x float>, <2 x float> } %i24
+}
+
+declare float @llvm.sqrt.f32(float)
+
+attributes #0 = { nounwind }
+
+!0 = !{}


        


More information about the llvm-commits mailing list