[llvm] r185956 - AArch64/PowerPC/SystemZ/X86: This patch fixes the interface, usage, and all

Tue Jul 9 11:16:56 PDT 2013

Author: stephenwlin
Date: Tue Jul  9 13:16:56 2013
New Revision: 185956

URL: http://llvm.org/viewvc/llvm-project?rev=185956&view=rev
Log:
AArch64/PowerPC/SystemZ/X86: This patch fixes the interface, usage, and all
in-tree implementations of TargetLoweringBase::isFMAFasterThanMulAndAdd in
order to resolve the following issues with fmuladd (i.e. optional FMA)
intrinsics:

1. On X86(-64) targets, ISD::FMA nodes are formed when lowering fmuladd
intrinsics even if the subtarget does not support FMA instructions, leading
to laughably bad code generation in some situations.

2. On AArch64 targets, ISD::FMA nodes are formed for operations on fp128,
resulting in a call to a software fp128 FMA implementation.

3. On PowerPC targets, FMAs are not generated from fmuladd intrinsics on types
like v2f32, v8f32, v4f64, etc., even though they promote, split, scalarize,
etc. to types that support hardware FMAs.

The function has also been slightly renamed for consistency and to force a
merge/build conflict for any out-of-tree target implementing it. To resolve,
see comments and fixed in-tree examples.

Added:
    llvm/trunk/test/CodeGen/PowerPC/vec_fmuladd.ll
    llvm/trunk/test/CodeGen/X86/extended-fma-contraction.ll
    llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
Modified:
    llvm/trunk/include/llvm/Target/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll
    llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll
    llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll

Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================

--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Tue Jul  9 13:16:56 2013
@@ -1213,11 +1213,16 @@ public:
     return false;
   }
 
-  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-  /// is expanded to mul + add.
-  virtual bool isFMAFasterThanMulAndAdd(EVT) const {
+  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+  /// expanded to FMAs when this method returns true, otherwise fmuladd is
+  /// expanded to fmul + fadd.
+  ///
+  /// NOTE: This may be called before legalization on types for which FMAs are
+  /// not legal, but should return true if those types will eventually legalize
+  /// to types that support FMAs. After legalization, it will only be called on
+  /// types that support FMAs (via Legal or Custom actions)
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
     return false;
   }
 

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Jul  9 13:16:56 2013
@@ -6084,8 +6084,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N
   // FADD -> FMA combines:
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
 
     // fold (fadd (fmul x, y), z) -> (fma x, y, z)
     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())
@@ -6161,8 +6161,8 @@ SDValue DAGCombiner::visitFSUB(SDNode *N
   // FSUB -> FMA combines:
   if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast ||
        DAG.getTarget().Options.UnsafeFPMath) &&
-      DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) &&
-      TLI.isOperationLegalOrCustom(ISD::FMA, VT)) {
+      DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) &&
+      (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) {
 
     // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
     if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse())

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp Tue Jul  9 13:16:56 2013
@@ -4922,7 +4922,7 @@ SelectionDAGBuilder::visitIntrinsicCall(
   case Intrinsic::fmuladd: {
     EVT VT = TLI->getValueType(I.getType());
     if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
-        TLI->isFMAFasterThanMulAndAdd(VT)) {
+        TLI->isFMAFasterThanFMulAndFAdd(VT)) {
       setValue(&I, DAG.getNode(ISD::FMA, sdl,
                                getValue(I.getArgOperand(0)).getValueType(),
                                getValue(I.getArgOperand(0)),

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Tue Jul  9 13:16:56 2013
@@ -2798,6 +2798,27 @@ AArch64TargetLowering::PerformDAGCombine
   return SDValue();
 }
 
+bool
+AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f16:
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  case MVT::f128:
+    return false;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 AArch64TargetLowering::ConstraintType
 AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
   if (Constraint.size() == 1) {

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Tue Jul  9 13:16:56 2013
@@ -229,11 +229,11 @@ public:
 
   virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
-  /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-  /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-  /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-  /// is expanded to mul + add.
-  virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+  /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+  /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+  /// expanded to FMAs when this method returns true, otherwise fmuladd is
+  /// expanded to fmul + fadd.
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
 
   ConstraintType getConstraintType(const std::string &Constraint) const;
 

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Tue Jul  9 13:16:56 2013
@@ -7809,18 +7809,15 @@ bool PPCTargetLowering::allowsUnalignedM
   return true;
 }
 
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
   if (!VT.isSimple())
     return false;
 
   switch (VT.getSimpleVT().SimpleTy) {
   case MVT::f32:
   case MVT::f64:
-  case MVT::v4f32:
     return true;
   default:
     break;

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Tue Jul  9 13:16:56 2013
@@ -459,11 +459,11 @@ namespace llvm {
     /// relative to software emulation.
     virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
 
-    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-    /// is expanded to mul + add.
-    virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+    /// expanded to FMAs when this method returns true, otherwise fmuladd is
+    /// expanded to fmul + fadd.
+    virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
 
   private:
     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Tue Jul  9 13:16:56 2013
@@ -255,6 +255,26 @@ SystemZTargetLowering::SystemZTargetLowe
   MaxStoresPerMemsetOptSize = 0;
 }
 
+bool
+SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  case MVT::f128:
+    return false;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
   return Imm.isZero() || Imm.isNegZero();

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Tue Jul  9 13:16:56 2013
@@ -129,9 +129,7 @@ public:
   virtual EVT getSetCCResultType(LLVMContext &, EVT) const {
     return MVT::i32;
   }
-  virtual bool isFMAFasterThanMulAndAdd(EVT) const LLVM_OVERRIDE {
-    return true;
-  }
+  virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const LLVM_OVERRIDE;
   virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
   virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const;
   virtual const char *getTargetNodeName(unsigned Opcode) const LLVM_OVERRIDE;

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Jul  9 13:16:56 2013
@@ -12966,6 +12966,27 @@ bool X86TargetLowering::isZExtFree(SDVal
   return false;
 }
 
+bool
+X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  if (!(Subtarget->hasFMA() || Subtarget->hasFMA4()))
+    return false;
+
+  VT = VT.getScalarType();
+
+  if (!VT.isSimple())
+    return false;
+
+  switch (VT.getSimpleVT().SimpleTy) {
+  case MVT::f32:
+  case MVT::f64:
+    return true;
+  default:
+    break;
+  }
+
+  return false;
+}
+
 bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const {
   // i16 instructions are longer (0x66 prefix) and potentially slower.
   return !(VT1 == MVT::i32 && VT2 == MVT::i16);

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Jul  9 13:16:56 2013
@@ -646,11 +646,11 @@ namespace llvm {
     virtual bool isZExtFree(EVT VT1, EVT VT2) const;
     virtual bool isZExtFree(SDValue Val, EVT VT2) const;
 
-    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-    /// is expanded to mul + add.
-    virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
+    /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+    /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+    /// expanded to FMAs when this method returns true, otherwise fmuladd is
+    /// expanded to fmul + fadd.
+    virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
 
     /// isNarrowingProfitable - Return true if it's profitable to narrow
     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow

Modified: llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/fp-dp3.ll Tue Jul  9 13:16:56 2013
@@ -1,102 +1,136 @@
 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -fp-contract=fast | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s -check-prefix=CHECK-NOFAST
 
 declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)
 
 define float @test_fmadd(float %a, float %b, float %c) {
 ; CHECK: test_fmadd:
+; CHECK-NOFAST: test_fmadd:
   %val = call float @llvm.fma.f32(float %a, float %b, float %c)
 ; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define float @test_fmsub(float %a, float %b, float %c) {
 ; CHECK: test_fmsub:
+; CHECK-NOFAST: test_fmsub:
   %nega = fsub float -0.0, %a
   %val = call float @llvm.fma.f32(float %nega, float %b, float %c)
 ; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define float @test_fnmadd(float %a, float %b, float %c) {
 ; CHECK: test_fnmadd:
+; CHECK-NOFAST: test_fnmadd:
   %negc = fsub float -0.0, %c
   %val = call float @llvm.fma.f32(float %a, float %b, float %negc)
 ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define float @test_fnmsub(float %a, float %b, float %c) {
 ; CHECK: test_fnmsub:
+; CHECK-NOFAST: test_fnmsub:
   %nega = fsub float -0.0, %a
   %negc = fsub float -0.0, %c
   %val = call float @llvm.fma.f32(float %nega, float %b, float %negc)
 ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %val
 }
 
 define double @testd_fmadd(double %a, double %b, double %c) {
 ; CHECK: testd_fmadd:
+; CHECK-NOFAST: testd_fmadd:
   %val = call double @llvm.fma.f64(double %a, double %b, double %c)
 ; CHECK: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define double @testd_fmsub(double %a, double %b, double %c) {
 ; CHECK: testd_fmsub:
+; CHECK-NOFAST: testd_fmsub:
   %nega = fsub double -0.0, %a
   %val = call double @llvm.fma.f64(double %nega, double %b, double %c)
 ; CHECK: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define double @testd_fnmadd(double %a, double %b, double %c) {
 ; CHECK: testd_fnmadd:
+; CHECK-NOFAST: testd_fnmadd:
   %negc = fsub double -0.0, %c
   %val = call double @llvm.fma.f64(double %a, double %b, double %negc)
 ; CHECK: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmadd {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define double @testd_fnmsub(double %a, double %b, double %c) {
 ; CHECK: testd_fnmsub:
+; CHECK-NOFAST: testd_fnmsub:
   %nega = fsub double -0.0, %a
   %negc = fsub double -0.0, %c
   %val = call double @llvm.fma.f64(double %nega, double %b, double %negc)
 ; CHECK: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
+; CHECK-NOFAST: fnmsub {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}, {{d[0-9]+}}
   ret double %val
 }
 
 define float @test_fmadd_unfused(float %a, float %b, float %c) {
 ; CHECK: test_fmadd_unfused:
+; CHECK-NOFAST: test_fmadd_unfused:
   %prod = fmul float %b, %c
   %sum = fadd float %a, %prod
 ; CHECK: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %sum
 }
 
 define float @test_fmsub_unfused(float %a, float %b, float %c) {
 ; CHECK: test_fmsub_unfused:
+; CHECK-NOFAST: test_fmsub_unfused:
   %prod = fmul float %b, %c
   %diff = fsub float %a, %prod
 ; CHECK: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %diff
 }
 
 define float @test_fnmadd_unfused(float %a, float %b, float %c) {
 ; CHECK: test_fnmadd_unfused:
+; CHECK-NOFAST: test_fnmadd_unfused:
   %nega = fsub float -0.0, %a
   %prod = fmul float %b, %c
   %sum = fadd float %nega, %prod
 ; CHECK: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmadd {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %sum
 }
 
 define float @test_fnmsub_unfused(float %a, float %b, float %c) {
 ; CHECK: test_fnmsub_unfused:
+; CHECK-NOFAST: test_fnmsub_unfused:
   %nega = fsub float -0.0, %a
   %prod = fmul float %b, %c
   %diff = fsub float %nega, %prod
 ; CHECK: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST-NOT: fnmsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fneg {{s[0-9]+}}, {{s[0-9]+}}
+; CHECK-NOFAST: fsub {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
   ret float %diff
 }

Modified: llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/illegal-float-ops.ll Tue Jul  9 13:16:56 2013
@@ -219,3 +219,29 @@ define void @test_frem(float %float, dou
 
   ret void
 }
+
+declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
+
+define void @test_fma(fp128 %fp128) {
+; CHECK: test_fma:
+
+  %fmafp128 = call fp128 @llvm.fma.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+  store fp128 %fmafp128, fp128* @varfp128
+; CHECK: bl fmal
+
+  ret void
+}
+
+declare fp128 @llvm.fmuladd.f128(fp128, fp128, fp128)
+
+define void @test_fmuladd(fp128 %fp128) {
+; CHECK: test_fmuladd:
+
+  %fmuladdfp128 = call fp128 @llvm.fmuladd.f128(fp128 %fp128, fp128 %fp128, fp128 %fp128)
+  store fp128 %fmuladdfp128, fp128* @varfp128
+; CHECK-NOT: bl fmal
+; CHECK: bl __multf3
+; CHECK: bl __addtf3
+
+  ret void
+}

Added: llvm/trunk/test/CodeGen/PowerPC/vec_fmuladd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_fmuladd.ll?rev=185956&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_fmuladd.ll (added)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_fmuladd.ll Tue Jul  9 13:16:56 2013
@@ -0,0 +1,56 @@
+; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+declare <2 x float> @llvm.fmuladd.v2f32(<2 x float> %val, <2 x float>, <2 x float>)
+declare <4 x float> @llvm.fmuladd.v4f32(<4 x float> %val, <4 x float>, <4 x float>)
+declare <8 x float> @llvm.fmuladd.v8f32(<8 x float> %val, <8 x float>, <8 x float>)
+declare <2 x double> @llvm.fmuladd.v2f64(<2 x double> %val, <2 x double>, <2 x double>)
+declare <4 x double> @llvm.fmuladd.v4f64(<4 x double> %val, <4 x double>, <4 x double>)
+
+define <2 x float> @v2f32_fmuladd(<2 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <2 x float> @llvm.fmuladd.v2f32 (<2 x float> %x, <2 x float> %x, <2 x float> %x)
+  ret <2 x float> %fmuladd
+}
+; fmuladd (<2 x float>) is promoted to fmuladd (<4 x float>)
+; CHECK: v2f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x float> @v4f32_fmuladd(<4 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <4 x float> @llvm.fmuladd.v4f32 (<4 x float> %x, <4 x float> %x, <4 x float> %x)
+  ret <4 x float> %fmuladd
+}
+; CHECK: v4f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <8 x float> @v8f32_fmuladd(<8 x float> %x) nounwind readnone {
+entry:
+  %fmuladd = call <8 x float> @llvm.fmuladd.v8f32 (<8 x float> %x, <8 x float> %x, <8 x float> %x)
+  ret <8 x float> %fmuladd
+}
+; CHECK: v8f32_fmuladd:
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vmaddfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <2 x double> @v2f64_fmuladd(<2 x double> %x) nounwind readnone {
+entry:
+  %fmuladd = call <2 x double> @llvm.fmuladd.v2f64 (<2 x double> %x, <2 x double> %x, <2 x double> %x)
+  ret <2 x double> %fmuladd
+}
+; CHECK: v2f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+
+define <4 x double> @v4f64_fmuladd(<4 x double> %x) nounwind readnone {
+entry:
+  %fmuladd = call <4 x double> @llvm.fmuladd.v4f64 (<4 x double> %x, <4 x double> %x, <4 x double> %x)
+  ret <4 x double> %fmuladd
+}
+; CHECK: v4f64_fmuladd:
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 
+; CHECK: fmadd {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} 

Added: llvm/trunk/test/CodeGen/X86/extended-fma-contraction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/extended-fma-contraction.ll?rev=185956&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/extended-fma-contraction.ll (added)
+++ llvm/trunk/test/CodeGen/X86/extended-fma-contraction.ll Tue Jul  9 13:16:56 2013
@@ -0,0 +1,22 @@
+; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
+
+; CHECK: fmafunc
+define <3 x float> @fmafunc(<3 x float> %a, <3 x float> %b, <3 x float> %c) {
+
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+; CHECK: vfmaddps
+; CHECK-NOT: vmulps
+; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
+  %ret = tail call <3 x float> @llvm.fmuladd.v3f32(<3 x float> %a, <3 x float> %b, <3 x float> %c)
+  ret <3 x float> %ret
+}
+
+declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) nounwind readnone

Added: llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll?rev=185956&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll Tue Jul  9 13:16:56 2013
@@ -0,0 +1,84 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=avx2,+fma -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=-fma4 -fp-contract=fast | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver1 -fp-contract=fast | FileCheck %s --check-prefix=CHECK_FMA4
+
+; CHECK: test_x86_fmadd_ps_y_wide
+; CHECK: vfmadd213ps
+; CHECK: vfmadd213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_ps_y_wide
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: vfmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fadd <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fmsub_ps_y_wide
+; CHECK: vfmsub213ps
+; CHECK: vfmsub213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_ps_y_wide
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: vfmsubps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %x, %a2
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fnmadd_ps_y_wide
+; CHECK: vfnmadd213ps
+; CHECK: vfnmadd213ps
+; CHECK: ret
+; CHECK_FMA4: test_x86_fnmadd_ps_y_wide
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: vfnmaddps
+; CHECK_FMA4: ret
+define <16 x float> @test_x86_fnmadd_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %res = fsub <16 x float> %a2, %x
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fnmsub_ps_y_wide
+; CHECK: vfnmsub213ps
+; CHECK: vfnmsub213ps
+; CHECK: ret
+define <16 x float> @test_x86_fnmsub_ps_y_wide(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
+  %x = fmul <16 x float> %a0, %a1
+  %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x
+  %res = fsub <16 x float> %y, %a2
+  ret <16 x float> %res
+}
+
+; CHECK: test_x86_fmadd_pd_y_wide
+; CHECK: vfmadd213pd
+; CHECK: vfmadd213pd
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmadd_pd_y_wide
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: vfmaddpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmadd_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fadd <8 x double> %x, %a2
+  ret <8 x double> %res
+}
+
+; CHECK: test_x86_fmsub_pd_y_wide
+; CHECK: vfmsub213pd
+; CHECK: vfmsub213pd
+; CHECK: ret
+; CHECK_FMA4: test_x86_fmsub_pd_y_wide
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: vfmsubpd
+; CHECK_FMA4: ret
+define <8 x double> @test_x86_fmsub_pd_y_wide(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
+  %x = fmul <8 x double> %a0, %a1
+  %res = fsub <8 x double> %x, %a2
+  ret <8 x double> %res
+}

Modified: llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll?rev=185956&r1=185955&r2=185956&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll (original)
+++ llvm/trunk/test/CodeGen/X86/wide-fma-contraction.ll Tue Jul  9 13:16:56 2013
@@ -1,7 +1,9 @@
 ; RUN: llc -march=x86 -mattr=+fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+; RUN: llc -march=x86 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
 
 ; CHECK: fmafunc
 define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+
 ; CHECK-NOT: vmulps
 ; CHECK-NOT: vaddps
 ; CHECK: vfmaddps
@@ -10,11 +12,17 @@ define <16 x float> @fmafunc(<16 x float
 ; CHECK: vfmaddps
 ; CHECK-NOT: vmulps
 ; CHECK-NOT: vaddps
+
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+; CHECK-NOFMA: vmulps
+; CHECK-NOFMA: vaddps
+; CHECK-NOFMA-NOT: calll
+
   %ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
   ret <16 x float> %ret
 }
 
 declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) nounwind readnone
-
-
-