[llvm] r283252 - [Target] move reciprocal estimate settings from TargetOptions to TargetLowering

Tue Oct 4 13:46:44 PDT 2016

Author: spatel
Date: Tue Oct  4 15:46:43 2016
New Revision: 283252

URL: http://llvm.org/viewvc/llvm-project?rev=283252&view=rev
Log:
[Target] move reciprocal estimate settings from TargetOptions to TargetLowering

The motivation for the change is that we can't have pseudo-global settings for
codegen living in TargetOptions because that doesn't work with LTO.

Ideally, these reciprocal attributes will be moved to the instruction-level via
FMF, metadata, or something else. But making them function attributes is at least
an improvement over the current state.

The ingredients of this patch are:

    Remove the reciprocal estimate command-line debug option.
    Add TargetRecip to TargetLowering.
    Remove TargetRecip from TargetOptions.
    Clean up the TargetRecip implementation to work with this new scheme.
    Set the default reciprocal settings in TargetLoweringBase (everything is off).
    Update the PowerPC defaults, users, and tests.
    Update the x86 defaults, users, and tests.

Note that if this patch needs to be reverted, the related clang patch checked in
at r283251 should be reverted too.

Differential Revision: https://reviews.llvm.org/D24816


Modified:
    llvm/trunk/include/llvm/CodeGen/CommandFlags.h
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/include/llvm/Target/TargetOptions.h
    llvm/trunk/include/llvm/Target/TargetRecip.h
    llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
    llvm/trunk/lib/Target/TargetRecip.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
    llvm/trunk/test/CodeGen/PowerPC/recipest.ll
    llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
    llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll
    llvm/trunk/test/CodeGen/X86/sqrt-fastmath.ll

Modified: llvm/trunk/include/llvm/CodeGen/CommandFlags.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/CommandFlags.h?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================

--- llvm/trunk/include/llvm/CodeGen/CommandFlags.h (original)
+++ llvm/trunk/include/llvm/CodeGen/CommandFlags.h Tue Oct  4 15:46:43 2016
@@ -27,7 +27,6 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRecip.h"
 #include <string>
 using namespace llvm;
 
@@ -201,12 +200,6 @@ FuseFPOps("fp-contract",
                          "Only fuse FP ops when the result won't be affected."),
               clEnumValEnd));
 
-cl::list<std::string>
-ReciprocalOps("recip",
-  cl::CommaSeparated,
-  cl::desc("Choose reciprocal operation types and parameters."),
-  cl::value_desc("all,none,default,divf,!vec-sqrtd,vec-divd:0,sqrt:9..."));
-
 cl::opt<bool>
 DontPlaceZerosInBSS("nozero-initialized-in-bss",
               cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -305,7 +298,6 @@ static inline TargetOptions InitTargetOp
   TargetOptions Options;
   Options.LessPreciseFPMADOption = EnableFPMAD;
   Options.AllowFPOpFusion = FuseFPOps;
-  Options.Reciprocals = TargetRecip(ReciprocalOps);
   Options.UnsafeFPMath = EnableUnsafeFPMath;
   Options.NoInfsFPMath = EnableNoInfsFPMath;
   Options.NoNaNsFPMath = EnableNoNaNsFPMath;

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Oct  4 15:46:43 2016
@@ -61,6 +61,7 @@ namespace llvm {
   class MCSymbol;
   template<typename T> class SmallVectorImpl;
   class DataLayout;
+  struct TargetRecip;
   class TargetRegisterClass;
   class TargetLibraryInfo;
   class TargetLoweringObjectFile;
@@ -541,6 +542,12 @@ public:
     }
   }
 
+  /// Return the reciprocal estimate code generation preferences for this target
+  /// after potentially overriding settings using the function's attributes.
+  /// FIXME: Like all unsafe-math target settings, this should really be an
+  /// instruction-level attribute/metadata/FMF.
+  TargetRecip getTargetRecipForFunc(MachineFunction &MF) const;
+
   /// Vector types are broken down into some number of legal first class types.
   /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
   /// promoted EVT::f64 values with the X86 FP stack.  Similarly, EVT::v2i64
@@ -2174,6 +2181,7 @@ protected:
   /// sequence of memory operands that is recognized by PrologEpilogInserter.
   MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
                                     MachineBasicBlock *MBB) const;
+  TargetRecip ReciprocalEstimates;
 };
 
 /// This class defines information used to lower LLVM code to legal SelectionDAG

Modified: llvm/trunk/include/llvm/Target/TargetOptions.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetOptions.h?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetOptions.h (original)
+++ llvm/trunk/include/llvm/Target/TargetOptions.h Tue Oct  4 15:46:43 2016
@@ -111,7 +111,7 @@ namespace llvm {
           DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
           EmulatedTLS(false), EnableIPRA(false),
           FloatABIType(FloatABI::Default),
-          AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
+          AllowFPOpFusion(FPOpFusion::Standard),
           JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX),
           EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default),
           FPDenormalMode(FPDenormal::IEEE),
@@ -252,9 +252,6 @@ namespace llvm {
     /// the value of this option.
     FPOpFusion::FPOpFusionMode AllowFPOpFusion;
 
-    /// This class encapsulates options for reciprocal-estimate code generation.
-    TargetRecip Reciprocals;
-
     /// JTType - This flag specifies the type of jump-instruction table to
     /// create for functions that have the jumptable attribute.
     JumpTable::JumpTableType JTType;
@@ -301,7 +298,6 @@ inline bool operator==(const TargetOptio
     ARE_EQUAL(EmulatedTLS) &&
     ARE_EQUAL(FloatABIType) &&
     ARE_EQUAL(AllowFPOpFusion) &&
-    ARE_EQUAL(Reciprocals) &&
     ARE_EQUAL(JTType) &&
     ARE_EQUAL(ThreadModel) &&
     ARE_EQUAL(EABIVersion) &&

Modified: llvm/trunk/include/llvm/Target/TargetRecip.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetRecip.h?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetRecip.h (original)
+++ llvm/trunk/include/llvm/Target/TargetRecip.h Tue Oct  4 15:46:43 2016
@@ -17,7 +17,6 @@
 #ifndef LLVM_TARGET_TARGETRECIP_H
 #define LLVM_TARGET_TARGETRECIP_H
 
-#include "llvm/ADT/StringRef.h"
 #include <cstdint>
 #include <map>
 #include <string>
@@ -25,22 +24,21 @@
 
 namespace llvm {
 
+class StringRef;
+
 struct TargetRecip {
 public:
   TargetRecip();
 
-  /// Initialize all or part of the operations from command-line options or
-  /// a front end.
-  TargetRecip(const std::vector<std::string> &Args);
-
-  /// Set whether a particular reciprocal operation is enabled and how many
-  /// refinement steps are needed when using it. Use "all" to set enablement
-  /// and refinement steps for all operations.
-  void setDefaults(StringRef Key, bool Enable, unsigned RefSteps);
-
-  /// Return true if the reciprocal operation has been enabled by default or
-  /// from the command-line. Return false if the operation has been disabled
-  /// by default or from the command-line.
+  /// Parse a comma-separated string of reciprocal settings to set values in
+  /// this struct.
+  void set(StringRef &Args);
+
+  /// Set enablement and refinement steps for a particular reciprocal operation.
+  /// Use "all" to give all operations the same values.
+  void set(StringRef Key, bool Enable, unsigned RefSteps);
+
+  /// Return true if the reciprocal operation has been enabled.
   bool isEnabled(StringRef Key) const;
 
   /// Return the number of iterations necessary to refine the
@@ -50,15 +48,14 @@ public:
   bool operator==(const TargetRecip &Other) const;
 
 private:
-  enum {
-    Uninitialized = -1
-  };
-
+  // TODO: We should be able to use special values (enums) to simplify this into
+  // just an int, but we have to be careful because the user is allowed to
+  // specify "default" as a setting and just change the refinement step count.
   struct RecipParams {
-    int8_t Enabled;
+    bool Enabled;
     int8_t RefinementSteps;
 
-    RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
+    RecipParams() : Enabled(false), RefinementSteps(0) {}
   };
 
   std::map<StringRef, RecipParams> RecipMap;

Modified: llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp (original)
+++ llvm/trunk/lib/CodeGen/TargetLoweringBase.cpp Tue Oct  4 15:46:43 2016
@@ -838,6 +838,7 @@ TargetLoweringBase::TargetLoweringBase(c
   InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
   InitCmpLibcallCCs(CmpLibcallCCs);
   InitLibcallCallingConvs(LibcallCallingConvs);
+  ReciprocalEstimates.set("all", false, 0);
 }
 
 void TargetLoweringBase::initActions() {
@@ -1485,6 +1486,22 @@ MVT::SimpleValueType TargetLoweringBase:
   return MVT::i32; // return the default value
 }
 
+TargetRecip
+TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const {
+  const Function *F = MF.getFunction();
+  StringRef RecipAttrName = "reciprocal-estimates";
+  if (!F->hasFnAttribute(RecipAttrName))
+    return ReciprocalEstimates;
+
+  // Make a copy of the target's default reciprocal codegen settings.
+  TargetRecip Recips = ReciprocalEstimates;
+
+  // Override any settings that are customized for this function.
+  StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString();
+  Recips.set(RecipString);
+  return Recips;
+}
+
 /// getVectorTypeBreakdown - Vector types are broken down into some number of
 /// legal first class types.  For example, MVT::v8f32 maps to 2 MVT::v4f32
 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Oct  4 15:46:43 2016
@@ -901,6 +901,23 @@ PPCTargetLowering::PPCTargetLowering(con
     setTargetDAGCombine(ISD::FSQRT);
   }
 
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
+           RefinementSteps64 = RefinementSteps + 1;
+
+  ReciprocalEstimates.set("sqrtf", true, RefinementSteps);
+  ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps);
+  ReciprocalEstimates.set("divf", true, RefinementSteps);
+  ReciprocalEstimates.set("vec-divf", true, RefinementSteps);
+
+  ReciprocalEstimates.set("sqrtd", true, RefinementSteps64);
+  ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64);
+  ReciprocalEstimates.set("divd", true, RefinementSteps64);
+  ReciprocalEstimates.set("vec-divd", true, RefinementSteps64);
+
   // Darwin long double math library functions have $LDBL128 appended.
   if (Subtarget.isDarwin()) {
     setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
@@ -9646,7 +9663,7 @@ SDValue PPCTargetLowering::getRsqrtEstim
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
     std::string RecipOp = getRecipOp("sqrt", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();
@@ -9668,7 +9685,7 @@ SDValue PPCTargetLowering::getRecipEstim
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+    TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
     std::string RecipOp = getRecipOp("div", VT);
     if (!Recips.isEnabled(RecipOp))
       return SDValue();

Modified: llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCTargetMachine.cpp Tue Oct  4 15:46:43 2016
@@ -204,23 +204,6 @@ PPCTargetMachine::PPCTargetMachine(const
       TargetABI(computeTargetABI(TT, Options)),
       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
 
-  // For the estimates, convergence is quadratic, so we essentially double the
-  // number of digits correct after every iteration. For both FRE and FRSQRTE,
-  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
-  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
-  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
-           RefinementSteps64 = RefinementSteps + 1;
-
-  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
-
-  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
-
   initAsmInfo();
 }
 

Modified: llvm/trunk/lib/Target/TargetRecip.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/TargetRecip.cpp?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/lib/Target/TargetRecip.cpp (original)
+++ llvm/trunk/lib/Target/TargetRecip.cpp Tue Oct  4 15:46:43 2016
@@ -16,7 +16,9 @@
 
 #include "llvm/Target/TargetRecip.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
@@ -36,9 +38,7 @@ static const char *const RecipOps[] = {
   "vec-sqrtf",
 };
 
-// The uninitialized state is needed for the enabled settings and refinement
-// steps because custom settings may arrive via the command-line before target
-// defaults are set.
+/// All operations are disabled by default and refinement steps are set to zero.
 TargetRecip::TargetRecip() {
   unsigned NumStrings = llvm::array_lengthof(RecipOps);
   for (unsigned i = 0; i < NumStrings; ++i)
@@ -137,18 +137,8 @@ void TargetRecip::parseIndividualParams(
         assert(Iter == RecipMap.end() && "Float entry missing from map");
         report_fatal_error("Invalid option for -recip.");
       }
-      
-      // The option was specified without a float or double suffix.
-      if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
-        // Make sure that the double entry was not already specified.
-        // The float entry will be checked below.
-        report_fatal_error("Duplicate option for -recip.");
-      }
     }
     
-    if (Iter->second.Enabled != Uninitialized)
-      report_fatal_error("Duplicate option for -recip.");
-    
     // Mark the matched option as found. Do not allow duplicate specifiers.
     Iter->second.Enabled = !IsDisabled;
     if (!RefStepString.empty())
@@ -164,50 +154,45 @@ void TargetRecip::parseIndividualParams(
   }
 }
 
-TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
-  TargetRecip() {
-  unsigned NumArgs = Args.size();
+void TargetRecip::set(StringRef &RecipString) {
+  SmallVector<StringRef, 4> RecipStringVector;
+  SplitString(RecipString, RecipStringVector, ",");
+  std::vector<std::string> RecipVector;
+  for (unsigned i = 0; i < RecipStringVector.size(); ++i)
+    RecipVector.push_back(RecipStringVector[i].str());
+
+  unsigned NumArgs = RecipVector.size();
 
   // Check if "all", "default", or "none" was specified.
-  if (NumArgs == 1 && parseGlobalParams(Args[0]))
+  if (NumArgs == 1 && parseGlobalParams(RecipVector[0]))
     return;
- 
-  parseIndividualParams(Args);
+
+  parseIndividualParams(RecipVector);
 }
 
 bool TargetRecip::isEnabled(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.Enabled != Uninitialized &&
-         "Enablement setting was not initialized");
   return Iter->second.Enabled;
 }
 
 unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
   ConstRecipIter Iter = RecipMap.find(Key);
   assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.RefinementSteps != Uninitialized &&
-         "Refinement step setting was not initialized");
   return Iter->second.RefinementSteps;
 }
 
-/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(StringRef Key, bool Enable,
-                              unsigned RefSteps) {
+void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) {
   if (Key == "all") {
     for (auto &KV : RecipMap) {
       RecipParams &RP = KV.second;
-      if (RP.Enabled == Uninitialized)
-        RP.Enabled = Enable;
-      if (RP.RefinementSteps == Uninitialized)
-        RP.RefinementSteps = RefSteps;
+      RP.Enabled = Enable;
+      RP.RefinementSteps = RefSteps;
     }
   } else {
     RecipParams &RP = RecipMap[Key];
-    if (RP.Enabled == Uninitialized)
-      RP.Enabled = Enable;
-    if (RP.RefinementSteps == Uninitialized)
-      RP.RefinementSteps = RefSteps;
+    RP.Enabled = Enable;
+    RP.RefinementSteps = RefSteps;
   }
 }
 

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Oct  4 15:46:43 2016
@@ -53,6 +53,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRecip.h"
 #include "X86IntrinsicsInfo.h"
 #include <bitset>
 #include <numeric>
@@ -84,6 +85,15 @@ X86TargetLowering::X86TargetLowering(con
   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
+  // By default (and when -ffast-math is on), enable estimate codegen with 1
+  // refinement step for floats (not doubles) except scalar division. Scalar
+  // division estimates are disabled because they break too much real-world
+  // code. These defaults are intended to match GCC behavior.
+  ReciprocalEstimates.set("sqrtf", true, 1);
+  ReciprocalEstimates.set("divf", false, 1);
+  ReciprocalEstimates.set("vec-sqrtf", true, 1);
+  ReciprocalEstimates.set("vec-divf", true, 1);
+
   // For 64-bit, since we have so many registers, use the ILP scheduler.
   // For 32-bit, use the register pressure specific scheduling.
   // For Atom, always use ILP scheduling.
@@ -15206,7 +15216,7 @@ SDValue X86TargetLowering::getRsqrtEstim
   else
     return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+  TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
 
@@ -15238,7 +15248,7 @@ SDValue X86TargetLowering::getRecipEstim
   else
     return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
+  TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
   if (!Recips.isEnabled(RecipOp))
     return SDValue();
 

Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Tue Oct  4 15:46:43 2016
@@ -166,16 +166,6 @@ X86TargetMachine::X86TargetMachine(const
   if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4())
     this->Options.TrapUnreachable = true;
 
-  // By default (and when -ffast-math is on), enable estimate codegen for
-  // everything except scalar division. By default, use 1 refinement step for
-  // all operations. Defaults may be overridden by using command-line options.
-  // Scalar division estimates are disabled because they break too much
-  // real-world code. These defaults match GCC behavior.
-  this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("divf", false, 1);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
-
   initAsmInfo();
 }
 

Modified: llvm/trunk/test/CodeGen/PowerPC/recipest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/recipest.ll?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/recipest.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/recipest.ll Tue Oct  4 15:46:43 2016
@@ -1,6 +1,6 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx | FileCheck %s
-; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -enable-unsafe-fp-math -mattr=-vsx -recip=sqrtf:0,sqrtd:0 | FileCheck %s -check-prefix=CHECK-NONR
 ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=-vsx | FileCheck -check-prefix=CHECK-SAFE %s
+
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
@@ -25,19 +25,26 @@ define double @foo(double %a, double %b)
 ; CHECK-NEXT: fmul
 ; CHECK: blr
 
-; CHECK-NONR: @foo
-; CHECK-NONR: frsqrte
-; CHECK-NONR-NOT: fmadd
-; CHECK-NONR: fmul
-; CHECK-NONR-NOT: fmadd
-; CHECK-NONR: blr
-
 ; CHECK-SAFE: @foo
 ; CHECK-SAFE: fsqrt
 ; CHECK-SAFE: fdiv
 ; CHECK-SAFE: blr
 }
 
+define double @no_estimate_refinement_f64(double %a, double %b) #0 {
+  %x = call double @llvm.sqrt.f64(double %b)
+  %r = fdiv double %a, %x
+  ret double %r
+
+; CHECK-LABEL: @no_estimate_refinement_f64
+; CHECK: frsqrte
+; CHECK-NOT: fmadd
+; CHECK: fmul
+; CHECK-NOT: fmadd
+; CHECK: blr
+}
+
+
 define double @foof(double %a, float %b) nounwind {
   %x = call float @llvm.sqrt.f32(float %b)
   %y = fpext float %x to double
@@ -98,19 +105,26 @@ define float @goo(float %a, float %b) no
 ; CHECK-NEXT: fmuls
 ; CHECK-NEXT: blr
 
-; CHECK-NONR: @goo
-; CHECK-NONR: frsqrtes
-; CHECK-NONR-NOT: fmadds
-; CHECK-NONR: fmuls
-; CHECK-NONR-NOT: fmadds
-; CHECK-NONR: blr
-
 ; CHECK-SAFE: @goo
 ; CHECK-SAFE: fsqrts
 ; CHECK-SAFE: fdivs
 ; CHECK-SAFE: blr
 }
 
+
+define float @no_estimate_refinement_f32(float %a, float %b) #0 {
+  %x = call float @llvm.sqrt.f32(float %b)
+  %r = fdiv float %a, %x
+  ret float %r
+
+; CHECK-LABEL: @no_estimate_refinement_f32
+; CHECK: frsqrtes
+; CHECK-NOT: fmadds
+; CHECK: fmuls
+; CHECK-NOT: fmadds
+; CHECK: blr
+}
+
 ; Recognize that this is rsqrt(a) * rcp(b) * c, 
 ; not 1 / ( 1 / sqrt(a)) * rcp(b) * c.
 define float @rsqrt_fmul(float %a, float %b, float %c) {
@@ -252,3 +266,5 @@ define <4 x float> @hoo3(<4 x float> %a)
 ; CHECK-SAFE: blr
 }
 
+attributes #0 = { nounwind "reciprocal-estimates"="sqrtf:0,sqrtd:0" }
+

Modified: llvm/trunk/test/CodeGen/X86/recip-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/recip-fastmath.ll?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/recip-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/recip-fastmath.ll Tue Oct  4 15:46:43 2016
@@ -1,6 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx  | FileCheck %s --check-prefix=AVX
 
 ; If the target's divss/divps instructions are substantially
 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
@@ -10,100 +9,142 @@
 ; for details about the accuracy, speed, and implementation
 ; differences of x86 reciprocal estimates.
 
-define float @reciprocal_estimate(float %x) #0 {
+define float @f32_no_estimate(float %x) #0 {
+; AVX-LABEL: f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
   %div = fdiv fast float 1.0, %x
   ret float %div
+}
+
+define float @f32_one_step(float %x) #1 {
+; AVX-LABEL: f32_one_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
+; AVX-NEXT:    vsubss %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast float 1.0, %x
+  ret float %div
+}
+
+define float @f32_two_step(float %x) #2 {
+; AVX-LABEL: f32_two_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; AVX-NEXT:    vsubss %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vmulss %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vaddss %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsubss %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast float 1.0, %x
+  ret float %div
+}
+
+define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
+; AVX-LABEL: v4f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <4 x float> %div
+}
 
-; NORECIP-LABEL: reciprocal_estimate:
-; NORECIP: movss
-; NORECIP-NEXT: divss
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: retq
-
-; RECIP-LABEL: reciprocal_estimate:
-; RECIP: vrcpss
-; RECIP: vmulss
-; RECIP: vsubss
-; RECIP: vmulss
-; RECIP: vaddss
-; RECIP-NEXT: retq
-
-; REFINE-LABEL: reciprocal_estimate:
-; REFINE: vrcpss
-; REFINE: vmulss
-; REFINE: vsubss
-; REFINE: vmulss
-; REFINE: vaddss
-; REFINE: vmulss
-; REFINE: vsubss
-; REFINE: vmulss
-; REFINE: vaddss
-; REFINE-NEXT: retq
+define <4 x float> @v4f32_one_step(<4 x float> %x) #1 {
+; AVX-LABEL: v4f32_one_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %xmm0, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %xmm0, %xmm2, %xmm0
+; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <4 x float> %div
 }
 
-define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
+define <4 x float> @v4f32_two_step(<4 x float> %x) #2 {
+; AVX-LABEL: v4f32_two_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %xmm0, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %xmm2, %xmm3, %xmm2
+; AVX-NEXT:    vmulps %xmm2, %xmm1, %xmm2
+; AVX-NEXT:    vaddps %xmm2, %xmm1, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vsubps %xmm0, %xmm3, %xmm0
+; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <4 x float> %div
+}
 
-; NORECIP-LABEL: reciprocal_estimate_v4f32:
-; NORECIP: movaps
-; NORECIP-NEXT: divps
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: retq
-
-; RECIP-LABEL: reciprocal_estimate_v4f32:
-; RECIP: vrcpps
-; RECIP: vmulps
-; RECIP: vsubps
-; RECIP: vmulps
-; RECIP: vaddps
-; RECIP-NEXT: retq
-
-; REFINE-LABEL: reciprocal_estimate_v4f32:
-; REFINE: vrcpps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE-NEXT: retq
+define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
+; AVX-LABEL: v8f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
 }
 
-define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
+define <8 x float> @v8f32_one_step(<8 x float> %x) #1 {
+; AVX-LABEL: v8f32_one_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %ymm0, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %ymm0, %ymm2, %ymm0
+; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
   ret <8 x float> %div
+}
 
-; NORECIP-LABEL: reciprocal_estimate_v8f32:
-; NORECIP: movaps
-; NORECIP: movaps
-; NORECIP-NEXT: divps
-; NORECIP-NEXT: divps
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: movaps
-; NORECIP-NEXT: retq
-
-; RECIP-LABEL: reciprocal_estimate_v8f32:
-; RECIP: vrcpps
-; RECIP: vmulps
-; RECIP: vsubps
-; RECIP: vmulps
-; RECIP: vaddps
-; RECIP-NEXT: retq
-
-; REFINE-LABEL: reciprocal_estimate_v8f32:
-; REFINE: vrcpps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE: vmulps
-; REFINE: vsubps
-; REFINE: vmulps
-; REFINE: vaddps
-; REFINE-NEXT: retq
+define <8 x float> @v8f32_two_step(<8 x float> %x) #2 {
+; AVX-LABEL: v8f32_two_step:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrcpps %ymm0, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm2
+; AVX-NEXT:    vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vsubps %ymm2, %ymm3, %ymm2
+; AVX-NEXT:    vmulps %ymm2, %ymm1, %ymm2
+; AVX-NEXT:    vaddps %ymm2, %ymm1, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vsubps %ymm0, %ymm3, %ymm0
+; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    vaddps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
+  ret <8 x float> %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!divf,!vec-divf" }
+attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf,vec-divf" }
+attributes #2 = { "unsafe-fp-math"="true" "reciprocal-estimates"="divf:2,vec-divf:2" }
+

Modified: llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sqrt-fastmath-mir.ll Tue Oct  4 15:46:43 2016
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -recip=sqrt:2 -stop-after=expand-isel-pseudos 2>&1 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=expand-isel-pseudos 2>&1 | FileCheck %s
 
 declare float @llvm.sqrt.f32(float) #0
 
@@ -48,5 +48,5 @@ define float @rfoo(float %f) #0 {
   ret float %div
 }
 
-attributes #0 = { "unsafe-fp-math"="true" }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" }
 attributes #1 = { nounwind readnone }

Modified: llvm/trunk/test/CodeGen/X86/sqrt-fastmath.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sqrt-fastmath.ll?rev=283252&r1=283251&r2=283252&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sqrt-fastmath.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sqrt-fastmath.ll Tue Oct  4 15:46:43 2016
@@ -1,141 +1,177 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!sqrtf,!vec-sqrtf,!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=sqrtf,vec-sqrtf | FileCheck %s --check-prefix=ESTIMATE
-
-declare double @__sqrt_finite(double) #0
-declare float @__sqrtf_finite(float) #0
-declare x86_fp80 @__sqrtl_finite(x86_fp80) #0
-declare float @llvm.sqrt.f32(float) #0
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
-
-
-define double @fd(double %d) #0 {
-; NORECIP-LABEL: fd:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtsd %xmm0, %xmm0
-; NORECIP-NEXT:    retq
-;
-; ESTIMATE-LABEL: fd:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
-; ESTIMATE-NEXT:    retq
-  %call = tail call double @__sqrt_finite(double %d) #1
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s --check-prefix=AVX
+
+declare double @__sqrt_finite(double)
+declare float @__sqrtf_finite(float)
+declare x86_fp80 @__sqrtl_finite(x86_fp80)
+declare float @llvm.sqrt.f32(float)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+
+
+define double @finite_f64_no_estimate(double %d) #0 {
+; AVX-LABEL: finite_f64_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+  %call = tail call double @__sqrt_finite(double %d) #2
   ret double %call
 }
 
+; No estimates for doubles.
 
-define float @ff(float %f) #0 {
-; NORECIP-LABEL: ff:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtss %xmm0, %xmm0
-; NORECIP-NEXT:    retq
-;
-; ESTIMATE-LABEL: ff:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm0, %xmm2
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm2, %xmm1
-; ESTIMATE-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
-; ESTIMATE-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm2, %xmm1
-; ESTIMATE-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; ESTIMATE-NEXT:    vcmpeqss %xmm2, %xmm0, %xmm0
-; ESTIMATE-NEXT:    vandnps %xmm1, %xmm0, %xmm0
-; ESTIMATE-NEXT:    retq
-  %call = tail call float @__sqrtf_finite(float %f) #1
+define double @finite_f64_estimate(double %d) #1 {
+; AVX-LABEL: finite_f64_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+  %call = tail call double @__sqrt_finite(double %d) #2
+  ret double %call
+}
+
+define float @finite_f32_no_estimate(float %f) #0 {
+; AVX-LABEL: finite_f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+  %call = tail call float @__sqrtf_finite(float %f) #2
   ret float %call
 }
 
+define float @finite_f32_estimate(float %f) #1 {
+; AVX-LABEL: finite_f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm2
+; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm2, %xmm2
+; AVX-NEXT:    vmulss %xmm1, %xmm2, %xmm1
+; AVX-NEXT:    vxorps %xmm2, %xmm2, %xmm2
+; AVX-NEXT:    vcmpeqss %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vandnps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+;
+  %call = tail call float @__sqrtf_finite(float %f) #2
+  ret float %call
+}
 
-define x86_fp80 @fld(x86_fp80 %ld) #0 {
-; NORECIP-LABEL: fld:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    fldt {{[0-9]+}}(%rsp)
-; NORECIP-NEXT:    fsqrt
-; NORECIP-NEXT:    retq
-;
-; ESTIMATE-LABEL: fld:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    fldt {{[0-9]+}}(%rsp)
-; ESTIMATE-NEXT:    fsqrt
-; ESTIMATE-NEXT:    retq
-  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #1
+define x86_fp80 @finite_f80_no_estimate(x86_fp80 %ld) #0 {
+; AVX-LABEL: finite_f80_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT:    fsqrt
+; AVX-NEXT:    retq
+;
+  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
   ret x86_fp80 %call
 }
 
+; Don't die on the impossible.
 
+define x86_fp80 @finite_f80_estimate_but_no(x86_fp80 %ld) #1 {
+; AVX-LABEL: finite_f80_estimate_but_no:
+; AVX:       # BB#0:
+; AVX-NEXT:    fldt {{[0-9]+}}(%rsp)
+; AVX-NEXT:    fsqrt
+; AVX-NEXT:    retq
+;
+  %call = tail call x86_fp80 @__sqrtl_finite(x86_fp80 %ld) #2
+  ret x86_fp80 %call
+}
 
-define float @reciprocal_square_root(float %x) #0 {
-; NORECIP-LABEL: reciprocal_square_root:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtss %xmm0, %xmm1
-; NORECIP-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; NORECIP-NEXT:    divss %xmm1, %xmm0
-; NORECIP-NEXT:    retq
-;
-; ESTIMATE-LABEL: reciprocal_square_root:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulss %xmm1, %xmm1, %xmm2
-; ESTIMATE-NEXT:    vmulss %xmm2, %xmm0, %xmm0
-; ESTIMATE-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
-; ESTIMATE-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm1
-; ESTIMATE-NEXT:    vmulss %xmm0, %xmm1, %xmm0
-; ESTIMATE-NEXT:    retq
+define float @f32_no_estimate(float %x) #0 {
+; AVX-LABEL: f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
+; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT:    vdivss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
   %sqrt = tail call float @llvm.sqrt.f32(float %x)
   %div = fdiv fast float 1.0, %sqrt
   ret float %div
 }
 
-define <4 x float> @reciprocal_square_root_v4f32(<4 x float> %x) #0 {
-; NORECIP-LABEL: reciprocal_square_root_v4f32:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtps %xmm0, %xmm1
-; NORECIP-NEXT:    movaps {{.*#+}} xmm0 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; NORECIP-NEXT:    divps %xmm1, %xmm0
-; NORECIP-NEXT:    retq
-;
-; ESTIMATE-LABEL: reciprocal_square_root_v4f32:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtps %xmm0, %xmm1
-; ESTIMATE-NEXT:    vmulps %xmm1, %xmm1, %xmm2
-; ESTIMATE-NEXT:    vmulps %xmm2, %xmm0, %xmm0
-; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
-; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
-; ESTIMATE-NEXT:    vmulps %xmm0, %xmm1, %xmm0
-; ESTIMATE-NEXT:    retq
+define float @f32_estimate(float %x) #1 {
+; AVX-LABEL: f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm1
+; AVX-NEXT:    vmulss %xmm1, %xmm1, %xmm2
+; AVX-NEXT:    vmulss %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vaddss {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmulss {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmulss %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %sqrt = tail call float @llvm.sqrt.f32(float %x)
+  %div = fdiv fast float 1.0, %sqrt
+  ret float %div
+}
+
+define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 {
+; AVX-LABEL: v4f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtps %xmm0, %xmm0
+; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
   %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
   ret <4 x float> %div
 }
 
-define <8 x float> @reciprocal_square_root_v8f32(<8 x float> %x) #0 {
-; NORECIP-LABEL: reciprocal_square_root_v8f32:
-; NORECIP:       # BB#0:
-; NORECIP-NEXT:    sqrtps %xmm1, %xmm2
-; NORECIP-NEXT:    sqrtps %xmm0, %xmm3
-; NORECIP-NEXT:    movaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
-; NORECIP-NEXT:    movaps %xmm1, %xmm0
-; NORECIP-NEXT:    divps %xmm3, %xmm0
-; NORECIP-NEXT:    divps %xmm2, %xmm1
-; NORECIP-NEXT:    retq
-;
-; ESTIMATE-LABEL: reciprocal_square_root_v8f32:
-; ESTIMATE:       # BB#0:
-; ESTIMATE-NEXT:    vrsqrtps %ymm0, %ymm1
-; ESTIMATE-NEXT:    vmulps %ymm1, %ymm1, %ymm2
-; ESTIMATE-NEXT:    vmulps %ymm2, %ymm0, %ymm0
-; ESTIMATE-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
-; ESTIMATE-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
-; ESTIMATE-NEXT:    vmulps %ymm0, %ymm1, %ymm0
-; ESTIMATE-NEXT:    retq
+define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
+; AVX-LABEL: v4f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtps %xmm0, %xmm1
+; AVX-NEXT:    vmulps %xmm1, %xmm1, %xmm2
+; AVX-NEXT:    vmulps %xmm2, %xmm0, %xmm0
+; AVX-NEXT:    vaddps {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT:    vmulps {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT:    vmulps %xmm0, %xmm1, %xmm0
+; AVX-NEXT:    retq
+;
+  %sqrt = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
+  %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <4 x float> %div
+}
+
+define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 {
+; AVX-LABEL: v8f32_no_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vsqrtps %ymm0, %ymm0
+; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00]
+; AVX-NEXT:    vdivps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
+  %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
+  %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
+  ret <8 x float> %div
+}
+
+define <8 x float> @v8f32_estimate(<8 x float> %x) #1 {
+; AVX-LABEL: v8f32_estimate:
+; AVX:       # BB#0:
+; AVX-NEXT:    vrsqrtps %ymm0, %ymm1
+; AVX-NEXT:    vmulps %ymm1, %ymm1, %ymm2
+; AVX-NEXT:    vmulps %ymm2, %ymm0, %ymm0
+; AVX-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT:    vmulps {{.*}}(%rip), %ymm1, %ymm1
+; AVX-NEXT:    vmulps %ymm0, %ymm1, %ymm0
+; AVX-NEXT:    retq
+;
   %sqrt = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %x)
   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %sqrt
   ret <8 x float> %div
 }
 
 
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { nounwind readnone }
+attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="!sqrtf,!vec-sqrtf,!divf,!vec-divf" }
+attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" }
+attributes #2 = { nounwind readnone }