[llvm] r318984 - [X86] Add separate intrinsics for scalar FMA4 instructions.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 25 10:32:43 PST 2017


Author: ctopper
Date: Sat Nov 25 10:32:43 2017
New Revision: 318984

URL: http://llvm.org/viewvc/llvm-project?rev=318984&view=rev
Log:
[X86] Add separate intrinsics for scalar FMA4 instructions.

Summary:
These instructions zero the non-scalar part of the lower 128-bits which makes them different than the FMA3 instructions which pass through the non-scalar part of the lower 128-bits.

I've only added fmadd because we should be able to derive all other variants using operand negation in the intrinsic header like we do for AVX512.

I think there are still some missed negate folding opportunities with the FMA4 instructions in light of this behavior difference that I hadn't noticed before.

I've split the tests so that we can use different intrinsics for scalar testing between the two. I just copied the tests split the RUN lines and changed out the scalar intrinsics.

fma4-fneg-combine.ll is a new test to make sure we negate the fma4 intrinsics correctly though there are a couple TODOs in it.

Reviewers: RKSimon, spatel

Reviewed By: RKSimon

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D39851

Added:
    llvm/trunk/test/CodeGen/X86/fma4-commute-x86.ll
      - copied, changed from r318983, llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll
    llvm/trunk/test/CodeGen/X86/fma4-fneg-combine.ll
    llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/fma4-scalar-memfold.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFMA.td
    llvm/trunk/lib/Target/X86/X86InstrFormats.td
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/lib/Target/X86/X86Subtarget.h
    llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll
    llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll
    llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll
    llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll

Modified: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h Sat Nov 25 10:32:43 2017
@@ -838,7 +838,7 @@ namespace ISD {
   /// which do not reference a specific memory location should be less than
   /// this value. Those that do must not be less than this value, and can
   /// be used with SelectionDAG::getMemIntrinsicNode.
-  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+300;
+  static const int FIRST_TARGET_MEMORY_OPCODE = BUILTIN_OP_END+400;
 
   //===--------------------------------------------------------------------===//
   /// MemIndexedMode enum - This enum defines the load / store indexed

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Sat Nov 25 10:32:43 2017
@@ -2116,6 +2116,14 @@ let TargetPrefix = "x86" in {  // All in
               Intrinsic<[llvm_v2f64_ty],
                         [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
                         [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_ss : GCCBuiltin<"__builtin_ia32_vfmaddss">,
+              Intrinsic<[llvm_v4f32_ty],
+                        [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],
+                        [IntrNoMem]>;
+  def int_x86_fma4_vfmadd_sd : GCCBuiltin<"__builtin_ia32_vfmaddsd">,
+              Intrinsic<[llvm_v2f64_ty],
+                        [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty],
+                        [IntrNoMem]>;
   def int_x86_fma_vfmadd_ps : GCCBuiltin<"__builtin_ia32_vfmaddps">,
               Intrinsic<[llvm_v4f32_ty],
                         [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty],

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sat Nov 25 10:32:43 2017
@@ -25169,6 +25169,10 @@ const char *X86TargetLowering::getTarget
   case X86ISD::FNMADDS3_RND:       return "X86ISD::FNMADDS3_RND";
   case X86ISD::FMSUBS3_RND:        return "X86ISD::FMSUBS3_RND";
   case X86ISD::FNMSUBS3_RND:       return "X86ISD::FNMSUBS3_RND";
+  case X86ISD::FMADD4S:            return "X86ISD::FMADD4S";
+  case X86ISD::FNMADD4S:           return "X86ISD::FNMADD4S";
+  case X86ISD::FMSUB4S:            return "X86ISD::FMSUB4S";
+  case X86ISD::FNMSUB4S:           return "X86ISD::FNMSUB4S";
   case X86ISD::VPMADD52H:          return "X86ISD::VPMADD52H";
   case X86ISD::VPMADD52L:          return "X86ISD::VPMADD52L";
   case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE";
@@ -35724,6 +35728,13 @@ static SDValue combineFMA(SDNode *N, Sel
     case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break;
     case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break;
     }
+  } else if (N->getOpcode() == X86ISD::FMADD4S) {
+    switch (NewOpcode) {
+    case ISD::FMA:       NewOpcode = X86ISD::FMADD4S; break;
+    case X86ISD::FMSUB:  NewOpcode = X86ISD::FMSUB4S; break;
+    case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD4S; break;
+    case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB4S; break;
+    }
   } else {
     llvm_unreachable("Unexpected opcode!");
   }
@@ -37092,6 +37103,7 @@ SDValue X86TargetLowering::PerformDAGCom
   case X86ISD::FMADDS3_RND:
   case X86ISD::FMADDS1:
   case X86ISD::FMADDS3:
+  case X86ISD::FMADD4S:
   case ISD::FMA:            return combineFMA(N, DAG, Subtarget);
   case X86ISD::FMADDSUB_RND:
   case X86ISD::FMSUBADD_RND:

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Sat Nov 25 10:32:43 2017
@@ -505,6 +505,9 @@ namespace llvm {
       FMADDSUB_RND,
       FMSUBADD_RND,
 
+      // FMA4 specific scalar intrinsics bits that zero the non-scalar bits.
+      FMADD4S, FNMADD4S, FMSUB4S, FNMSUB4S,
+
       // Scalar intrinsic FMA.
       FMADDS1, FMADDS3,
       FNMADDS1, FNMADDS3,

Modified: llvm/trunk/lib/Target/X86/X86InstrFMA.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFMA.td?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFMA.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFMA.td Sat Nov 25 10:32:43 2017
@@ -253,18 +253,18 @@ let Constraints = "$src1 = $dst", isComm
     hasSideEffects = 0 in
 multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
                         Operand memopr, RegisterClass RC> {
-  def r_Int : FMA3S<opc, MRMSrcReg, (outs RC:$dst),
-                    (ins RC:$src1, RC:$src2, RC:$src3),
-                    !strconcat(OpcodeStr,
-                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    []>;
+  def r_Int : FMA3S_Int<opc, MRMSrcReg, (outs RC:$dst),
+                        (ins RC:$src1, RC:$src2, RC:$src3),
+                        !strconcat(OpcodeStr,
+                                   "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                        []>;
 
   let mayLoad = 1 in
-  def m_Int : FMA3S<opc, MRMSrcMem, (outs RC:$dst),
-                    (ins RC:$src1, RC:$src2, memopr:$src3),
-                    !strconcat(OpcodeStr,
-                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-                    []>;
+  def m_Int : FMA3S_Int<opc, MRMSrcMem, (outs RC:$dst),
+                        (ins RC:$src1, RC:$src2, memopr:$src3),
+                        !strconcat(OpcodeStr,
+                                   "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
+                        []>;
 }
 
 // The FMA 213 form is created for lowering of scalar FMA intrinscis
@@ -385,20 +385,20 @@ let isCodeGenOnly = 1, ForceDisassemble
 multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
                      ValueType VT, ComplexPattern mem_cpat, SDNode OpNode> {
 let isCodeGenOnly = 1 in {
-  def rr_Int : FMA4S<opc, MRMSrcRegOp4, (outs VR128:$dst),
+  def rr_Int : FMA4S_Int<opc, MRMSrcRegOp4, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                [(set VR128:$dst,
                  (VT (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>, VEX_W,
                VEX_LIG;
-  def rm_Int : FMA4S<opc, MRMSrcMemOp4, (outs VR128:$dst),
+  def rm_Int : FMA4S_Int<opc, MRMSrcMemOp4, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, memop:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                [(set VR128:$dst, (VT (OpNode VR128:$src1, VR128:$src2,
                                   mem_cpat:$src3)))]>, VEX_W, VEX_LIG;
-  def mr_Int : FMA4S<opc, MRMSrcMem, (outs VR128:$dst),
+  def mr_Int : FMA4S_Int<opc, MRMSrcMem, (outs VR128:$dst),
                (ins VR128:$src1, memop:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -406,7 +406,7 @@ let isCodeGenOnly = 1 in {
                  (VT (OpNode VR128:$src1, mem_cpat:$src2, VR128:$src3)))]>,
                VEX_LIG;
 let hasSideEffects = 0 in
-  def rr_Int_REV : FMA4S<opc, MRMSrcReg, (outs VR128:$dst),
+  def rr_Int_REV : FMA4S_Int<opc, MRMSrcReg, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -476,18 +476,18 @@ let ExeDomain = SSEPackedSingle in {
   // Scalar Instructions
   defm VFMADDSS4  : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>,
                     fma4s_int<0x6A, "vfmaddss", ssmem, v4f32, sse_load_f32,
-                              X86Fmadds1>;
+                              X86Fmadd4s>;
   defm VFMSUBSS4  : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>,
                     fma4s_int<0x6E, "vfmsubss", ssmem, v4f32, sse_load_f32,
-                              X86Fmsubs1>;
+                              X86Fmsub4s>;
   defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32,
                           X86Fnmadd, loadf32>,
                     fma4s_int<0x7A, "vfnmaddss", ssmem, v4f32, sse_load_f32,
-                              X86Fnmadds1>;
+                              X86Fnmadd4s>;
   defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32,
                           X86Fnmsub, loadf32>,
                     fma4s_int<0x7E, "vfnmsubss", ssmem, v4f32, sse_load_f32,
-                              X86Fnmsubs1>;
+                              X86Fnmsub4s>;
   // Packed Instructions
   defm VFMADDPS4    : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32,
                             loadv4f32, loadv8f32>;
@@ -507,18 +507,18 @@ let ExeDomain = SSEPackedDouble in {
   // Scalar Instructions
   defm VFMADDSD4  : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>,
                     fma4s_int<0x6B, "vfmaddsd", sdmem, v2f64, sse_load_f64,
-                              X86Fmadds1>;
+                              X86Fmadd4s>;
   defm VFMSUBSD4  : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>,
                     fma4s_int<0x6F, "vfmsubsd", sdmem, v2f64, sse_load_f64,
-                              X86Fmsubs1>;
+                              X86Fmsub4s>;
   defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64,
                           X86Fnmadd, loadf64>,
                     fma4s_int<0x7B, "vfnmaddsd", sdmem, v2f64, sse_load_f64,
-                              X86Fnmadds1>;
+                              X86Fnmadd4s>;
   defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64,
                           X86Fnmsub, loadf64>,
                     fma4s_int<0x7F, "vfnmsubsd", sdmem, v2f64, sse_load_f64,
-                              X86Fnmsubs1>;
+                              X86Fnmsub4s>;
   // Packed Instructions
   defm VFMADDPD4    : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64,
                             loadv2f64, loadv4f64>;

Modified: llvm/trunk/lib/Target/X86/X86InstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFormats.td?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFormats.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFormats.td Sat Nov 25 10:32:43 2017
@@ -862,10 +862,14 @@ class PCLMULIi8<bits<8> o, Format F, dag
 class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag>pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, T8PD,
-        VEX_4V, FMASC, Requires<[HasFMA, NoVLX]>;
+        VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoVLX]>;
 class FMA3S<bits<8> o, Format F, dag outs, dag ins, string asm,
             list<dag>pattern, InstrItinClass itin = NoItinerary>
       : I<o, F, outs, ins, asm, pattern, itin>, T8PD,
+        VEX_4V, FMASC, Requires<[HasFMA, NoFMA4, NoAVX512]>;
+class FMA3S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag>pattern, InstrItinClass itin = NoItinerary>
+      : I<o, F, outs, ins, asm, pattern, itin>, T8PD,
         VEX_4V, FMASC, Requires<[HasFMA, NoAVX512]>;
 
 // FMA4 Instruction Templates
@@ -877,6 +881,10 @@ class FMA4S<bits<8> o, Format F, dag out
             list<dag>pattern, InstrItinClass itin = NoItinerary>
       : Ii8Reg<o, F, outs, ins, asm, pattern, itin>, TAPD,
         VEX_4V, FMASC, Requires<[HasFMA4, NoAVX512]>;
+class FMA4S_Int<bits<8> o, Format F, dag outs, dag ins, string asm,
+                list<dag>pattern, InstrItinClass itin = NoItinerary>
+      : Ii8Reg<o, F, outs, ins, asm, pattern, itin>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4]>;
 
 // XOP 2, 3 and 4 Operand Instruction Template
 class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Sat Nov 25 10:32:43 2017
@@ -506,6 +506,12 @@ def X86FnmsubRnd    : SDNode<"X86ISD::FN
 def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound, [SDNPCommutative]>;
 def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound, [SDNPCommutative]>;
 
+// Scalar FMA4 intrinsics which zero the non-scalar bits.
+def X86Fmadd4s  : SDNode<"X86ISD::FMADD4S",  SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fnmadd4s : SDNode<"X86ISD::FNMADD4S", SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fmsub4s  : SDNode<"X86ISD::FMSUB4S",  SDTFPTernaryOp, [SDNPCommutative]>;
+def X86Fnmsub4s : SDNode<"X86ISD::FNMSUB4S", SDTFPTernaryOp, [SDNPCommutative]>;
+
 // Scalar FMA intrinsics with passthru bits in operand 1.
 def X86Fmadds1  : SDNode<"X86ISD::FMADDS1",  SDTFPTernaryOp>;
 def X86Fnmadds1 : SDNode<"X86ISD::FNMADDS1", SDTFPTernaryOp>;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Sat Nov 25 10:32:43 2017
@@ -850,6 +850,7 @@ def NoVLX_Or_NoVPCLMULQDQ :
 def HasVPCLMULQDQ : Predicate<"Subtarget->hasVPCLMULQDQ()">;
 def HasFMA       : Predicate<"Subtarget->hasFMA()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
+def NoFMA4       : Predicate<"!Subtarget->hasFMA4()">;
 def HasXOP       : Predicate<"Subtarget->hasXOP()">;
 def HasTBM       : Predicate<"Subtarget->hasTBM()">;
 def NoTBM        : Predicate<"!Subtarget->hasTBM()">;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Sat Nov 25 10:32:43 2017
@@ -1593,6 +1593,8 @@ static const IntrinsicData  IntrinsicsWi
   X86_INTRINSIC_DATA(fma_vfnmsub_ps_256,   INTR_TYPE_3OP, X86ISD::FNMSUB, 0),
   X86_INTRINSIC_DATA(fma_vfnmsub_sd,       INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
   X86_INTRINSIC_DATA(fma_vfnmsub_ss,       INTR_TYPE_3OP, X86ISD::FNMSUBS1, 0),
+  X86_INTRINSIC_DATA(fma4_vfmadd_sd,       INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
+  X86_INTRINSIC_DATA(fma4_vfmadd_ss,       INTR_TYPE_3OP, X86ISD::FMADD4S, 0),
   X86_INTRINSIC_DATA(sse_cmp_ps,        INTR_TYPE_3OP, X86ISD::CMPP, 0),
   X86_INTRINSIC_DATA(sse_comieq_ss,     COMI, X86ISD::COMI, ISD::SETEQ),
   X86_INTRINSIC_DATA(sse_comige_ss,     COMI, X86ISD::COMI, ISD::SETGE),

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sat Nov 25 10:32:43 2017
@@ -482,7 +482,7 @@ public:
   bool hasVPCLMULQDQ() const { return HasVPCLMULQDQ; }
   // Prefer FMA4 to FMA - its better for commutation/memory folding and
   // has equal or better performance on all supported targets.
-  bool hasFMA() const { return HasFMA && !HasFMA4; }
+  bool hasFMA() const { return HasFMA; }
   bool hasFMA4() const { return HasFMA4; }
   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
   bool hasXOP() const { return HasXOP; }

Modified: llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll Sat Nov 25 10:32:43 2017
@@ -2,7 +2,6 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA
 ; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA
 ; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA
-; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4
 
 attributes #0 = { nounwind }
 
@@ -14,13 +13,6 @@ define <4 x float> @test_x86_fmadd_baa_s
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfmadd213ss %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfmaddss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -31,12 +23,6 @@ define <4 x float> @test_x86_fmadd_aba_s
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfmadd132ss (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -47,12 +33,6 @@ define <4 x float> @test_x86_fmadd_bba_s
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfmadd213ss (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfmaddss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -64,12 +44,6 @@ define <4 x float> @test_x86_fmadd_baa_p
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfmadd132ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_baa_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -80,12 +54,6 @@ define <4 x float> @test_x86_fmadd_aba_p
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfmadd231ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_aba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmaddps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -96,12 +64,6 @@ define <4 x float> @test_x86_fmadd_bba_p
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfmadd213ps (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_bba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfmaddps (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -113,12 +75,6 @@ define <8 x float> @test_x86_fmadd_baa_p
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfmadd132ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -129,12 +85,6 @@ define <8 x float> @test_x86_fmadd_aba_p
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfmadd231ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfmaddps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -145,12 +95,6 @@ define <8 x float> @test_x86_fmadd_bba_p
 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
 ; FMA-NEXT:    vfmadd213ps (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %ymm0
-; FMA4-NEXT:    vfmaddps (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -163,13 +107,6 @@ define <2 x double> @test_x86_fmadd_baa_
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfmadd213sd %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfmaddsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -180,12 +117,6 @@ define <2 x double> @test_x86_fmadd_aba_
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfmadd132sd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -196,12 +127,6 @@ define <2 x double> @test_x86_fmadd_bba_
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfmadd213sd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -213,12 +138,6 @@ define <2 x double> @test_x86_fmadd_baa_
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfmadd132pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_baa_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -229,12 +148,6 @@ define <2 x double> @test_x86_fmadd_aba_
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfmadd231pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_aba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmaddpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -245,12 +158,6 @@ define <2 x double> @test_x86_fmadd_bba_
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfmadd213pd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_bba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfmaddpd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -262,12 +169,6 @@ define <4 x double> @test_x86_fmadd_baa_
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfmadd132pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -278,12 +179,6 @@ define <4 x double> @test_x86_fmadd_aba_
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfmadd231pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfmaddpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -294,12 +189,6 @@ define <4 x double> @test_x86_fmadd_bba_
 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
 ; FMA-NEXT:    vfmadd213pd (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %ymm0
-; FMA4-NEXT:    vfmaddpd (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -313,13 +202,6 @@ define <4 x float> @test_x86_fnmadd_baa_
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfnmadd213ss %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfnmaddss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -330,12 +212,6 @@ define <4 x float> @test_x86_fnmadd_aba_
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfnmadd132ss (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -346,12 +222,6 @@ define <4 x float> @test_x86_fnmadd_bba_
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfnmadd213ss (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfnmaddss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -363,12 +233,6 @@ define <4 x float> @test_x86_fnmadd_baa_
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfnmadd132ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -379,12 +243,6 @@ define <4 x float> @test_x86_fnmadd_aba_
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfnmadd231ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -395,12 +253,6 @@ define <4 x float> @test_x86_fnmadd_bba_
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfnmadd213ps (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfnmaddps (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -412,12 +264,6 @@ define <8 x float> @test_x86_fnmadd_baa_
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfnmadd132ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -428,12 +274,6 @@ define <8 x float> @test_x86_fnmadd_aba_
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfnmadd231ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfnmaddps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -444,12 +284,6 @@ define <8 x float> @test_x86_fnmadd_bba_
 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
 ; FMA-NEXT:    vfnmadd213ps (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %ymm0
-; FMA4-NEXT:    vfnmaddps (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -462,13 +296,6 @@ define <2 x double> @test_x86_fnmadd_baa
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfnmadd213sd %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfnmaddsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -479,12 +306,6 @@ define <2 x double> @test_x86_fnmadd_aba
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfnmadd132sd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -495,12 +316,6 @@ define <2 x double> @test_x86_fnmadd_bba
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfnmadd213sd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfnmaddsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -512,12 +327,6 @@ define <2 x double> @test_x86_fnmadd_baa
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfnmadd132pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -528,12 +337,6 @@ define <2 x double> @test_x86_fnmadd_aba
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfnmadd231pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -544,12 +347,6 @@ define <2 x double> @test_x86_fnmadd_bba
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfnmadd213pd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfnmaddpd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -561,12 +358,6 @@ define <4 x double> @test_x86_fnmadd_baa
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfnmadd132pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -577,12 +368,6 @@ define <4 x double> @test_x86_fnmadd_aba
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfnmadd231pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfnmaddpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -593,12 +378,6 @@ define <4 x double> @test_x86_fnmadd_bba
 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
 ; FMA-NEXT:    vfnmadd213pd (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %ymm0
-; FMA4-NEXT:    vfnmaddpd (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -611,13 +390,6 @@ define <4 x float> @test_x86_fmsub_baa_s
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfmsub213ss %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfmsubss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -628,12 +400,6 @@ define <4 x float> @test_x86_fmsub_aba_s
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfmsub132ss (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -644,12 +410,6 @@ define <4 x float> @test_x86_fmsub_bba_s
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfmsub213ss (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfmsubss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -661,12 +421,6 @@ define <4 x float> @test_x86_fmsub_baa_p
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfmsub132ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -677,12 +431,6 @@ define <4 x float> @test_x86_fmsub_aba_p
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfmsub231ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -693,12 +441,6 @@ define <4 x float> @test_x86_fmsub_bba_p
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfmsub213ps (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfmsubps (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -710,12 +452,6 @@ define <8 x float> @test_x86_fmsub_baa_p
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfmsub132ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -726,12 +462,6 @@ define <8 x float> @test_x86_fmsub_aba_p
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfmsub231ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfmsubps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -742,12 +472,6 @@ define <8 x float> @test_x86_fmsub_bba_p
 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
 ; FMA-NEXT:    vfmsub213ps (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %ymm0
-; FMA4-NEXT:    vfmsubps (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -760,13 +484,6 @@ define <2 x double> @test_x86_fmsub_baa_
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfmsub213sd %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfmsubsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -777,12 +494,6 @@ define <2 x double> @test_x86_fmsub_aba_
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfmsub132sd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -793,12 +504,6 @@ define <2 x double> @test_x86_fmsub_bba_
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfmsub213sd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfmsubsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -810,12 +515,6 @@ define <2 x double> @test_x86_fmsub_baa_
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfmsub132pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -826,12 +525,6 @@ define <2 x double> @test_x86_fmsub_aba_
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfmsub231pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -842,12 +535,6 @@ define <2 x double> @test_x86_fmsub_bba_
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfmsub213pd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfmsubpd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -859,12 +546,6 @@ define <4 x double> @test_x86_fmsub_baa_
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfmsub132pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -875,12 +556,6 @@ define <4 x double> @test_x86_fmsub_aba_
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfmsub231pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfmsubpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -891,12 +566,6 @@ define <4 x double> @test_x86_fmsub_bba_
 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
 ; FMA-NEXT:    vfmsub213pd (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %ymm0
-; FMA4-NEXT:    vfmsubpd (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -910,13 +579,6 @@ define <4 x float> @test_x86_fnmsub_baa_
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfnmsub213ss %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfnmsubss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -927,12 +589,6 @@ define <4 x float> @test_x86_fnmsub_aba_
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfnmsub132ss (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -943,12 +599,6 @@ define <4 x float> @test_x86_fnmsub_bba_
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfnmsub213ss (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfnmsubss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -960,12 +610,6 @@ define <4 x float> @test_x86_fnmsub_baa_
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfnmsub132ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -976,12 +620,6 @@ define <4 x float> @test_x86_fnmsub_aba_
 ; FMA-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA-NEXT:    vfnmsub231ps (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubps %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -992,12 +630,6 @@ define <4 x float> @test_x86_fnmsub_bba_
 ; FMA-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA-NEXT:    vfnmsub213ps (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_ps:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfnmsubps (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
@@ -1009,12 +641,6 @@ define <8 x float> @test_x86_fnmsub_baa_
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfnmsub132ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %a, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -1025,12 +651,6 @@ define <8 x float> @test_x86_fnmsub_aba_
 ; FMA-NEXT:    vmovaps (%rcx), %ymm0
 ; FMA-NEXT:    vfnmsub231ps (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %ymm0
-; FMA4-NEXT:    vfnmsubps %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -1041,12 +661,6 @@ define <8 x float> @test_x86_fnmsub_bba_
 ; FMA-NEXT:    vmovaps (%rdx), %ymm0
 ; FMA-NEXT:    vfnmsub213ps (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %ymm0
-; FMA4-NEXT:    vfnmsubps (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %b, <8 x float> %b, <8 x float> %a) nounwind
   ret <8 x float> %res
 }
@@ -1059,13 +673,6 @@ define <2 x double> @test_x86_fnmsub_baa
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfnmsub213sd %xmm1, %xmm1, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfnmsubsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -1076,12 +683,6 @@ define <2 x double> @test_x86_fnmsub_aba
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfnmsub132sd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -1092,12 +693,6 @@ define <2 x double> @test_x86_fnmsub_bba
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfnmsub213sd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfnmsubsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -1109,12 +704,6 @@ define <2 x double> @test_x86_fnmsub_baa
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfnmsub132pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -1125,12 +714,6 @@ define <2 x double> @test_x86_fnmsub_aba
 ; FMA-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA-NEXT:    vfnmsub231pd (%rdx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubpd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -1141,12 +724,6 @@ define <2 x double> @test_x86_fnmsub_bba
 ; FMA-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA-NEXT:    vfnmsub213pd (%rcx), %xmm0, %xmm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_pd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfnmsubpd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
@@ -1158,12 +735,6 @@ define <4 x double> @test_x86_fnmsub_baa
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfnmsub132pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %a, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -1174,12 +745,6 @@ define <4 x double> @test_x86_fnmsub_aba
 ; FMA-NEXT:    vmovapd (%rcx), %ymm0
 ; FMA-NEXT:    vfnmsub231pd (%rdx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %ymm0
-; FMA4-NEXT:    vfnmsubpd %ymm0, (%rdx), %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }
@@ -1190,12 +755,6 @@ define <4 x double> @test_x86_fnmsub_bba
 ; FMA-NEXT:    vmovapd (%rdx), %ymm0
 ; FMA-NEXT:    vfnmsub213pd (%rcx), %ymm0, %ymm0
 ; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %ymm0
-; FMA4-NEXT:    vfnmsubpd (%rcx), %ymm0, %ymm0, %ymm0
-; FMA4-NEXT:    retq
   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %b, <4 x double> %b, <4 x double> %a) nounwind
   ret <4 x double> %res
 }

Modified: llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-intrinsics-x86.ll Sat Nov 25 10:32:43 2017
@@ -2,7 +2,6 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AVX512VL
 ; RUN: llc < %s -mtriple=x86_64-pc-windows -mattr=+fma,-fma4 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA-WIN
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FMA4
 
 ; VFMADD
 define <4 x float> @test_x86_fma_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
@@ -22,11 +21,6 @@ define <4 x float> @test_x86_fma_vfmadd_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -50,11 +44,6 @@ define <4 x float> @test_x86_fma_vfmadd_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa9,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_bac_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -77,11 +66,6 @@ define <2 x double> @test_x86_fma_vfmadd
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -105,11 +89,6 @@ define <2 x double> @test_x86_fma_vfmadd
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa9,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_bac_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -132,11 +111,6 @@ define <4 x float> @test_x86_fma_vfmadd_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa8,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -159,11 +133,6 @@ define <2 x double> @test_x86_fma_vfmadd
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa8,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -186,11 +155,6 @@ define <8 x float> @test_x86_fma_vfmadd_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa8,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
@@ -213,11 +177,6 @@ define <4 x double> @test_x86_fma_vfmadd
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa8,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmadd_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -241,11 +200,6 @@ define <4 x float> @test_x86_fma_vfmsub_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6e,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -269,11 +223,6 @@ define <4 x float> @test_x86_fma_vfmsub_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xab,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_bac_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6e,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -296,11 +245,6 @@ define <2 x double> @test_x86_fma_vfmsub
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6f,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -324,11 +268,6 @@ define <2 x double> @test_x86_fma_vfmsub
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xab,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_bac_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6f,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -351,11 +290,6 @@ define <4 x float> @test_x86_fma_vfmsub_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaa,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -378,11 +312,6 @@ define <2 x double> @test_x86_fma_vfmsub
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaa,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -405,11 +334,6 @@ define <8 x float> @test_x86_fma_vfmsub_
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xaa,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
@@ -432,11 +356,6 @@ define <4 x double> @test_x86_fma_vfmsub
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xaa,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsub_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -460,11 +379,6 @@ define <4 x float> @test_x86_fma_vfnmadd
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7a,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -488,11 +402,6 @@ define <4 x float> @test_x86_fma_vfnmadd
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xad,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_bac_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7a,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -515,11 +424,6 @@ define <2 x double> @test_x86_fma_vfnmad
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7b,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -543,11 +447,6 @@ define <2 x double> @test_x86_fma_vfnmad
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xad,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_bac_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7b,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -570,11 +469,6 @@ define <4 x float> @test_x86_fma_vfnmadd
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xac,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -597,11 +491,6 @@ define <2 x double> @test_x86_fma_vfnmad
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xac,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -624,11 +513,6 @@ define <8 x float> @test_x86_fma_vfnmadd
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xac,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
@@ -651,11 +535,6 @@ define <4 x double> @test_x86_fma_vfnmad
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xac,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmadd_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -679,11 +558,6 @@ define <4 x float> @test_x86_fma_vfnmsub
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rcx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7e,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -707,11 +581,6 @@ define <4 x float> @test_x86_fma_vfnmsub
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ss (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xaf,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_bac_ss:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7e,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -734,11 +603,6 @@ define <2 x double> @test_x86_fma_vfnmsu
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rcx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x01]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7f,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -762,11 +626,6 @@ define <2 x double> @test_x86_fma_vfnmsu
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213sd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xaf,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_bac_sd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x7f,0xc2,0x00]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -789,11 +648,6 @@ define <4 x float> @test_x86_fma_vfnmsub
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xae,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -816,11 +670,6 @@ define <2 x double> @test_x86_fma_vfnmsu
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xae,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -843,11 +692,6 @@ define <8 x float> @test_x86_fma_vfnmsub
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xae,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
@@ -870,11 +714,6 @@ define <4 x double> @test_x86_fma_vfnmsu
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfnmsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xae,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfnmsub_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -898,11 +737,6 @@ define <4 x float> @test_x86_fma_vfmadds
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa6,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -925,11 +759,6 @@ define <2 x double> @test_x86_fma_vfmadd
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa6,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -952,11 +781,6 @@ define <8 x float> @test_x86_fma_vfmadds
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa6,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
@@ -979,11 +803,6 @@ define <4 x double> @test_x86_fma_vfmadd
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmaddsub213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa6,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmaddsub_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }
@@ -1007,11 +826,6 @@ define <4 x float> @test_x86_fma_vfmsuba
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %xmm0 # encoding: [0xc5,0xf8,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213ps (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0x71,0xa7,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
   ret <4 x float> %res
 }
@@ -1034,11 +848,6 @@ define <2 x double> @test_x86_fma_vfmsub
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %xmm0 # encoding: [0xc5,0xf9,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213pd (%r8), %xmm1, %xmm0 # encoding: [0xc4,0xc2,0xf1,0xa7,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
   ret <2 x double> %res
 }
@@ -1061,11 +870,6 @@ define <8 x float> @test_x86_fma_vfmsuba
 ; CHECK-FMA-WIN-NEXT:    vmovaps (%rdx), %ymm0 # encoding: [0xc5,0xfc,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213ps (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0x75,0xa7,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_ps_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
   ret <8 x float> %res
 }
@@ -1088,11 +892,6 @@ define <4 x double> @test_x86_fma_vfmsub
 ; CHECK-FMA-WIN-NEXT:    vmovapd (%rdx), %ymm0 # encoding: [0xc5,0xfd,0x28,0x02]
 ; CHECK-FMA-WIN-NEXT:    vfmsubadd213pd (%r8), %ymm1, %ymm0 # encoding: [0xc4,0xc2,0xf5,0xa7,0x00]
 ; CHECK-FMA-WIN-NEXT:    retq # encoding: [0xc3]
-;
-; CHECK-FMA4-LABEL: test_x86_fma_vfmsubadd_pd_256:
-; CHECK-FMA4:       # BB#0:
-; CHECK-FMA4-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
-; CHECK-FMA4-NEXT:    retq # encoding: [0xc3]
   %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
   ret <4 x double> %res
 }

Modified: llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-scalar-memfold.ll Sat Nov 25 10:32:43 2017
@@ -1,7 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
 ; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
-; RUN: llc < %s -mattr=fma4 | FileCheck %s --check-prefix=FMA4
 
 target triple = "x86_64-unknown-unknown"
 
@@ -22,13 +21,6 @@ define void @fmadd_aab_ss(float* %a, flo
 ; CHECK-NEXT:    vfmadd213ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmadd_aab_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfmaddss (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -55,13 +47,6 @@ define void @fmadd_aba_ss(float* %a, flo
 ; CHECK-NEXT:    vfmadd132ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmadd_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfmaddss %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -88,13 +73,6 @@ define void @fmsub_aab_ss(float* %a, flo
 ; CHECK-NEXT:    vfmsub213ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmsub_aab_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfmsubss (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -121,13 +99,6 @@ define void @fmsub_aba_ss(float* %a, flo
 ; CHECK-NEXT:    vfmsub132ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmsub_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfmsubss %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -154,13 +125,6 @@ define void @fnmadd_aab_ss(float* %a, fl
 ; CHECK-NEXT:    vfnmadd213ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmadd_aab_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfnmaddss (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -187,13 +151,6 @@ define void @fnmadd_aba_ss(float* %a, fl
 ; CHECK-NEXT:    vfnmadd132ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmadd_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfnmaddss %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -220,13 +177,6 @@ define void @fnmsub_aab_ss(float* %a, fl
 ; CHECK-NEXT:    vfnmsub213ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmsub_aab_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfnmsubss (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -253,13 +203,6 @@ define void @fnmsub_aba_ss(float* %a, fl
 ; CHECK-NEXT:    vfnmsub132ss (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovss %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmsub_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; FMA4-NEXT:    vfnmsubss %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovss %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load float, float* %a
   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
@@ -286,13 +229,6 @@ define void @fmadd_aab_sd(double* %a, do
 ; CHECK-NEXT:    vfmadd213sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmadd_aab_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -315,13 +251,6 @@ define void @fmadd_aba_sd(double* %a, do
 ; CHECK-NEXT:    vfmadd132sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmadd_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -344,13 +273,6 @@ define void @fmsub_aab_sd(double* %a, do
 ; CHECK-NEXT:    vfmsub213sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmsub_aab_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfmsubsd (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -373,13 +295,6 @@ define void @fmsub_aba_sd(double* %a, do
 ; CHECK-NEXT:    vfmsub132sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fmsub_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfmsubsd %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -402,13 +317,6 @@ define void @fnmadd_aab_sd(double* %a, d
 ; CHECK-NEXT:    vfnmadd213sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmadd_aab_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfnmaddsd (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -431,13 +339,6 @@ define void @fnmadd_aba_sd(double* %a, d
 ; CHECK-NEXT:    vfnmadd132sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmadd_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfnmaddsd %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -460,13 +361,6 @@ define void @fnmsub_aab_sd(double* %a, d
 ; CHECK-NEXT:    vfnmsub213sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmsub_aab_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfnmsubsd (%rsi), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
@@ -489,13 +383,6 @@ define void @fnmsub_aba_sd(double* %a, d
 ; CHECK-NEXT:    vfnmsub132sd (%rsi), %xmm0, %xmm0
 ; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
 ; CHECK-NEXT:    retq
-;
-; FMA4-LABEL: fnmsub_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; FMA4-NEXT:    vfnmsubsd %xmm0, (%rsi), %xmm0, %xmm0
-; FMA4-NEXT:    vmovlpd %xmm0, (%rdi)
-; FMA4-NEXT:    retq
   %a.val = load double, double* %a
   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1

Copied: llvm/trunk/test/CodeGen/X86/fma4-commute-x86.ll (from r318983, llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-commute-x86.ll?p2=llvm/trunk/test/CodeGen/X86/fma4-commute-x86.ll&p1=llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll&r1=318983&r2=318984&rev=318984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-commute-x86.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-commute-x86.ll Sat Nov 25 10:32:43 2017
@@ -1,70 +1,41 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-pc-win32 -mcpu=core-avx2 | FileCheck %s --check-prefix=FMA
-; RUN: llc < %s -mtriple=x86_64-pc-win32 -mattr=+fma | FileCheck %s --check-prefix=FMA
-; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 -mattr=-fma4 | FileCheck %s --check-prefix=FMA
-; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4
+; RUN: llc < %s -mcpu=bdver2 -mtriple=x86_64-pc-win32 | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA
 
 attributes #0 = { nounwind }
 
-declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 define <4 x float> @test_x86_fmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_baa_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm1
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfmadd213ss %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_baa_ss:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfmaddss %xmm0, %xmm0, %xmm1, %xmm0
+; FMA4-NEXT:    vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
 ; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
+  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
 
 define <4 x float> @test_x86_fmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_aba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfmadd132ss (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_aba_ss:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
 ; FMA4-NEXT:    vfmaddss %xmm0, (%rdx), %xmm0, %xmm0
 ; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
+  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
 
 define <4 x float> @test_x86_fmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_bba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfmadd213ss (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_bba_ss:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
 ; FMA4-NEXT:    vfmaddss (%rcx), %xmm0, %xmm0, %xmm0
 ; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
+  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
   ret <4 x float> %res
 }
 
 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 define <4 x float> @test_x86_fmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_baa_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfmadd132ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_baa_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -75,12 +46,6 @@ define <4 x float> @test_x86_fmadd_baa_p
 }
 
 define <4 x float> @test_x86_fmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_aba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfmadd231ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_aba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -91,12 +56,6 @@ define <4 x float> @test_x86_fmadd_aba_p
 }
 
 define <4 x float> @test_x86_fmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_bba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfmadd213ps (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_bba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
@@ -108,12 +67,6 @@ define <4 x float> @test_x86_fmadd_bba_p
 
 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 define <8 x float> @test_x86_fmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_baa_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfmadd132ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_baa_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -124,12 +77,6 @@ define <8 x float> @test_x86_fmadd_baa_p
 }
 
 define <8 x float> @test_x86_fmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_aba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfmadd231ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_aba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -140,12 +87,6 @@ define <8 x float> @test_x86_fmadd_aba_p
 }
 
 define <8 x float> @test_x86_fmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_bba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %ymm0
-; FMA-NEXT:    vfmadd213ps (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_bba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
@@ -155,65 +96,39 @@ define <8 x float> @test_x86_fmadd_bba_p
   ret <8 x float> %res
 }
 
-declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
+declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 define <2 x double> @test_x86_fmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_baa_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm1
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfmadd213sd %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_baa_sd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfmaddsd %xmm0, %xmm0, %xmm1, %xmm0
+; FMA4-NEXT:    vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
 ; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
+  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
 
 define <2 x double> @test_x86_fmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_aba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfmadd132sd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_aba_sd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
 ; FMA4-NEXT:    vfmaddsd %xmm0, (%rdx), %xmm0, %xmm0
 ; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
+  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
 
 define <2 x double> @test_x86_fmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_bba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfmadd213sd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_bba_sd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
 ; FMA4-NEXT:    vfmaddsd (%rcx), %xmm0, %xmm0, %xmm0
 ; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
+  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
   ret <2 x double> %res
 }
 
 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 define <2 x double> @test_x86_fmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_baa_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfmadd132pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_baa_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -224,12 +139,6 @@ define <2 x double> @test_x86_fmadd_baa_
 }
 
 define <2 x double> @test_x86_fmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_aba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfmadd231pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_aba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -240,12 +149,6 @@ define <2 x double> @test_x86_fmadd_aba_
 }
 
 define <2 x double> @test_x86_fmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_bba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfmadd213pd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_bba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
@@ -257,12 +160,6 @@ define <2 x double> @test_x86_fmadd_bba_
 
 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
 define <4 x double> @test_x86_fmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_baa_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfmadd132pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_baa_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -273,12 +170,6 @@ define <4 x double> @test_x86_fmadd_baa_
 }
 
 define <4 x double> @test_x86_fmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_aba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfmadd231pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_aba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -289,12 +180,6 @@ define <4 x double> @test_x86_fmadd_aba_
 }
 
 define <4 x double> @test_x86_fmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmadd_bba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %ymm0
-; FMA-NEXT:    vfmadd213pd (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmadd_bba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
@@ -304,66 +189,8 @@ define <4 x double> @test_x86_fmadd_bba_
   ret <4 x double> %res
 }
 
-
-declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-define <4 x float> @test_x86_fnmadd_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_baa_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm1
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfnmadd213ss %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfnmaddss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fnmadd_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_aba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfnmadd132ss (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fnmadd_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_bba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfnmadd213ss (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfnmaddss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 define <4 x float> @test_x86_fnmadd_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_baa_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfnmadd132ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_baa_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -374,12 +201,6 @@ define <4 x float> @test_x86_fnmadd_baa_
 }
 
 define <4 x float> @test_x86_fnmadd_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_aba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfnmadd231ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_aba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -390,12 +211,6 @@ define <4 x float> @test_x86_fnmadd_aba_
 }
 
 define <4 x float> @test_x86_fnmadd_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_bba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfnmadd213ps (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_bba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
@@ -407,12 +222,6 @@ define <4 x float> @test_x86_fnmadd_bba_
 
 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 define <8 x float> @test_x86_fnmadd_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_baa_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfnmadd132ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_baa_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -423,12 +232,6 @@ define <8 x float> @test_x86_fnmadd_baa_
 }
 
 define <8 x float> @test_x86_fnmadd_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_aba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfnmadd231ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_aba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -439,12 +242,6 @@ define <8 x float> @test_x86_fnmadd_aba_
 }
 
 define <8 x float> @test_x86_fnmadd_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_bba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %ymm0
-; FMA-NEXT:    vfnmadd213ps (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_bba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
@@ -454,65 +251,8 @@ define <8 x float> @test_x86_fnmadd_bba_
   ret <8 x float> %res
 }
 
-declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_fnmadd_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_baa_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm1
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfnmadd213sd %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfnmaddsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fnmadd_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_aba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfnmadd132sd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmaddsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fnmadd_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_bba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfnmadd213sd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmadd_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfnmaddsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 define <2 x double> @test_x86_fnmadd_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_baa_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfnmadd132pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_baa_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -523,12 +263,6 @@ define <2 x double> @test_x86_fnmadd_baa
 }
 
 define <2 x double> @test_x86_fnmadd_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_aba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfnmadd231pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_aba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -539,12 +273,6 @@ define <2 x double> @test_x86_fnmadd_aba
 }
 
 define <2 x double> @test_x86_fnmadd_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_bba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfnmadd213pd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_bba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
@@ -556,12 +284,6 @@ define <2 x double> @test_x86_fnmadd_bba
 
 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
 define <4 x double> @test_x86_fnmadd_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_baa_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfnmadd132pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_baa_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -572,12 +294,6 @@ define <4 x double> @test_x86_fnmadd_baa
 }
 
 define <4 x double> @test_x86_fnmadd_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_aba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfnmadd231pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_aba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -588,12 +304,6 @@ define <4 x double> @test_x86_fnmadd_aba
 }
 
 define <4 x double> @test_x86_fnmadd_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmadd_bba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %ymm0
-; FMA-NEXT:    vfnmadd213pd (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmadd_bba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
@@ -603,65 +313,8 @@ define <4 x double> @test_x86_fnmadd_bba
   ret <4 x double> %res
 }
 
-declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-define <4 x float> @test_x86_fmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_baa_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm1
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfmsub213ss %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfmsubss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_aba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfmsub132ss (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_bba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfmsub213ss (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfmsubss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 define <4 x float> @test_x86_fmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_baa_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfmsub132ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_baa_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -672,12 +325,6 @@ define <4 x float> @test_x86_fmsub_baa_p
 }
 
 define <4 x float> @test_x86_fmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_aba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfmsub231ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_aba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -688,12 +335,6 @@ define <4 x float> @test_x86_fmsub_aba_p
 }
 
 define <4 x float> @test_x86_fmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_bba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfmsub213ps (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_bba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
@@ -705,12 +346,6 @@ define <4 x float> @test_x86_fmsub_bba_p
 
 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 define <8 x float> @test_x86_fmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_baa_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfmsub132ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_baa_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -721,12 +356,6 @@ define <8 x float> @test_x86_fmsub_baa_p
 }
 
 define <8 x float> @test_x86_fmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_aba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfmsub231ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_aba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -737,12 +366,6 @@ define <8 x float> @test_x86_fmsub_aba_p
 }
 
 define <8 x float> @test_x86_fmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_bba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %ymm0
-; FMA-NEXT:    vfmsub213ps (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_bba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
@@ -752,65 +375,8 @@ define <8 x float> @test_x86_fmsub_bba_p
   ret <8 x float> %res
 }
 
-declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_fmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_baa_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm1
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfmsub213sd %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfmsubsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_aba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfmsub132sd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfmsubsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_bba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfmsub213sd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fmsub_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfmsubsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 define <2 x double> @test_x86_fmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_baa_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfmsub132pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_baa_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -821,12 +387,6 @@ define <2 x double> @test_x86_fmsub_baa_
 }
 
 define <2 x double> @test_x86_fmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_aba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfmsub231pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_aba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -837,12 +397,6 @@ define <2 x double> @test_x86_fmsub_aba_
 }
 
 define <2 x double> @test_x86_fmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_bba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfmsub213pd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_bba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
@@ -854,12 +408,6 @@ define <2 x double> @test_x86_fmsub_bba_
 
 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
 define <4 x double> @test_x86_fmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_baa_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfmsub132pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_baa_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -870,12 +418,6 @@ define <4 x double> @test_x86_fmsub_baa_
 }
 
 define <4 x double> @test_x86_fmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_aba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfmsub231pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_aba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -886,12 +428,6 @@ define <4 x double> @test_x86_fmsub_aba_
 }
 
 define <4 x double> @test_x86_fmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fmsub_bba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %ymm0
-; FMA-NEXT:    vfmsub213pd (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fmsub_bba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0
@@ -901,66 +437,8 @@ define <4 x double> @test_x86_fmsub_bba_
   ret <4 x double> %res
 }
 
-
-declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
-define <4 x float> @test_x86_fnmsub_baa_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_baa_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm1
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfnmsub213ss %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vmovaps (%rdx), %xmm1
-; FMA4-NEXT:    vfnmsubss %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %a, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fnmsub_aba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_aba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfnmsub132ss (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubss %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_fnmsub_bba_ss(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_bba_ss:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfnmsub213ss (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_ss:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovaps (%rdx), %xmm0
-; FMA4-NEXT:    vfnmsubss (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %b, <4 x float> %b, <4 x float> %a) nounwind
-  ret <4 x float> %res
-}
-
 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
 define <4 x float> @test_x86_fnmsub_baa_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_baa_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfnmsub132ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_baa_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -971,12 +449,6 @@ define <4 x float> @test_x86_fnmsub_baa_
 }
 
 define <4 x float> @test_x86_fnmsub_aba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_aba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %xmm0
-; FMA-NEXT:    vfnmsub231ps (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_aba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %xmm0
@@ -987,12 +459,6 @@ define <4 x float> @test_x86_fnmsub_aba_
 }
 
 define <4 x float> @test_x86_fnmsub_bba_ps(<4 x float> %a, <4 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_bba_ps:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %xmm0
-; FMA-NEXT:    vfnmsub213ps (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_bba_ps:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %xmm0
@@ -1004,12 +470,6 @@ define <4 x float> @test_x86_fnmsub_bba_
 
 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone
 define <8 x float> @test_x86_fnmsub_baa_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_baa_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfnmsub132ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_baa_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -1020,12 +480,6 @@ define <8 x float> @test_x86_fnmsub_baa_
 }
 
 define <8 x float> @test_x86_fnmsub_aba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_aba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rcx), %ymm0
-; FMA-NEXT:    vfnmsub231ps (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_aba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rcx), %ymm0
@@ -1036,12 +490,6 @@ define <8 x float> @test_x86_fnmsub_aba_
 }
 
 define <8 x float> @test_x86_fnmsub_bba_ps_y(<8 x float> %a, <8 x float> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_bba_ps_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovaps (%rdx), %ymm0
-; FMA-NEXT:    vfnmsub213ps (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_bba_ps_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovaps (%rdx), %ymm0
@@ -1051,65 +499,8 @@ define <8 x float> @test_x86_fnmsub_bba_
   ret <8 x float> %res
 }
 
-declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
-define <2 x double> @test_x86_fnmsub_baa_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_baa_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm1
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfnmsub213sd %xmm1, %xmm1, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_baa_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vmovapd (%rdx), %xmm1
-; FMA4-NEXT:    vfnmsubsd %xmm0, %xmm0, %xmm1, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %a, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fnmsub_aba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_aba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfnmsub132sd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_aba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rcx), %xmm0
-; FMA4-NEXT:    vfnmsubsd %xmm0, (%rdx), %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_x86_fnmsub_bba_sd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_bba_sd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfnmsub213sd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
-; FMA4-LABEL: test_x86_fnmsub_bba_sd:
-; FMA4:       # BB#0:
-; FMA4-NEXT:    vmovapd (%rdx), %xmm0
-; FMA4-NEXT:    vfnmsubsd (%rcx), %xmm0, %xmm0, %xmm0
-; FMA4-NEXT:    retq
-  %res = call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %b, <2 x double> %b, <2 x double> %a) nounwind
-  ret <2 x double> %res
-}
-
 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
 define <2 x double> @test_x86_fnmsub_baa_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_baa_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfnmsub132pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_baa_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -1120,12 +511,6 @@ define <2 x double> @test_x86_fnmsub_baa
 }
 
 define <2 x double> @test_x86_fnmsub_aba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_aba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %xmm0
-; FMA-NEXT:    vfnmsub231pd (%rdx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_aba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %xmm0
@@ -1136,12 +521,6 @@ define <2 x double> @test_x86_fnmsub_aba
 }
 
 define <2 x double> @test_x86_fnmsub_bba_pd(<2 x double> %a, <2 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_bba_pd:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %xmm0
-; FMA-NEXT:    vfnmsub213pd (%rcx), %xmm0, %xmm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_bba_pd:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %xmm0
@@ -1153,12 +532,6 @@ define <2 x double> @test_x86_fnmsub_bba
 
 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone
 define <4 x double> @test_x86_fnmsub_baa_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_baa_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfnmsub132pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_baa_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -1169,12 +542,6 @@ define <4 x double> @test_x86_fnmsub_baa
 }
 
 define <4 x double> @test_x86_fnmsub_aba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_aba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rcx), %ymm0
-; FMA-NEXT:    vfnmsub231pd (%rdx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_aba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rcx), %ymm0
@@ -1185,12 +552,6 @@ define <4 x double> @test_x86_fnmsub_aba
 }
 
 define <4 x double> @test_x86_fnmsub_bba_pd_y(<4 x double> %a, <4 x double> %b) #0 {
-; FMA-LABEL: test_x86_fnmsub_bba_pd_y:
-; FMA:       # BB#0:
-; FMA-NEXT:    vmovapd (%rdx), %ymm0
-; FMA-NEXT:    vfnmsub213pd (%rcx), %ymm0, %ymm0
-; FMA-NEXT:    retq
-;
 ; FMA4-LABEL: test_x86_fnmsub_bba_pd_y:
 ; FMA4:       # BB#0:
 ; FMA4-NEXT:    vmovapd (%rdx), %ymm0

Added: llvm/trunk/test/CodeGen/X86/fma4-fneg-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-fneg-combine.ll?rev=318984&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-fneg-combine.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fma4-fneg-combine.ll Sat Nov 25 10:32:43 2017
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4,-fma  | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+fma4,+fma  | FileCheck %s
+
+declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+
+; TODO this can be negated
+define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: test1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorps {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %res
+  ret <4 x float> %sub.i
+}
+
+define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: test2:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
+  %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %sub.i)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: test3:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %b
+  %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a, <4 x float> %sub.i, <4 x float> %c)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test4(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: test4:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test5(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: test5:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmsubss %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
+  %sub.i.2 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
+  %res = tail call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %sub.i, <4 x float> %b, <4 x float> %sub.i.2)
+  ret <4 x float> %res
+}
+
+define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: test6:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %res
+  ret <2 x double> %sub.i
+}
+
+define <2 x double> @test7(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: test7:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
+  %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %sub.i)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test8(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: test8:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %b
+  %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a, <2 x double> %sub.i, <2 x double> %c)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test9(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: test9:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
+  %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test10(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; CHECK-LABEL: test10:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmsubsd %xmm2, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
+  %sub.i.2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
+  %res = tail call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %sub.i.2)
+  ret <2 x double> %res
+}

Added: llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll?rev=318984&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86.ll Sat Nov 25 10:32:43 2017
@@ -0,0 +1,289 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,-fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma4,+fma -show-mc-encoding | FileCheck %s --check-prefix=CHECK
+
+; VFMADD
+define <4 x float> @test_x86_fma4_vfmadd_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma4_vfmadd_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddss %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6a,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_fma4_vfmadd_bac_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma4_vfmadd_bac_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddss %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6a,0xc2,0x00]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %a1, <4 x float> %a0, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma4_vfmadd_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma4_vfmadd_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6b,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+
+define <2 x double> @test_x86_fma4_vfmadd_bac_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma4_vfmadd_bac_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsd %xmm2, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0xf1,0x6b,0xc2,0x00]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %a1, <2 x double> %a0, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define <4 x float> @test_x86_fma_vfmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x68,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x69,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_ps_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x68,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmadd_pd_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x69,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFMSUB
+define <4 x float> @test_x86_fma_vfmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6c,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x6d,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_ps_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6c,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsub_pd_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x6d,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFNMADD
+define <4 x float> @test_x86_fma_vfnmadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x78,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfnmadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x79,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfnmadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_ps_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x78,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfnmadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmadd_pd_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x79,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFNMSUB
+define <4 x float> @test_x86_fma_vfnmsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7c,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfnmsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x7d,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfnmsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_ps_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7c,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfnmsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfnmsub_pd_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfnmsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x7d,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFMADDSUB
+define <4 x float> @test_x86_fma_vfmaddsub_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmaddsub_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsubps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5c,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmaddsub_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmaddsub_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsubpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5d,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmaddsub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmaddsub_ps_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsubps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5c,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmaddsub_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmaddsub_pd_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmaddsubpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5d,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+; VFMSUBADD
+define <4 x float> @test_x86_fma_vfmsubadd_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsubadd_ps:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubaddps %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5e,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2)
+  ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>)
+
+define <2 x double> @test_x86_fma_vfmsubadd_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsubadd_pd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubaddpd %xmm2, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0xf9,0x5f,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2)
+  ret <2 x double> %res
+}
+declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>)
+
+define <8 x float> @test_x86_fma_vfmsubadd_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsubadd_ps_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubaddps %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5e,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
+
+define <4 x double> @test_x86_fma_vfmsubadd_pd_256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 {
+; CHECK-LABEL: test_x86_fma_vfmsubadd_pd_256:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vfmsubaddpd %ymm2, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0xfd,0x5f,0xc2,0x10]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+  %res = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2)
+  ret <4 x double> %res
+}
+declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>)
+
+attributes #0 = { nounwind }

Modified: llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll?rev=318984&r1=318983&r2=318984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma4-intrinsics-x86_64-folded-load.ll Sat Nov 25 10:32:43 2017
@@ -3,50 +3,50 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -mattr=+avx,-fma | FileCheck %s
 
 ; VFMADD
-define < 4 x float > @test_x86_fma_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
-; CHECK-LABEL: test_x86_fma_vfmadd_ss_load:
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load(< 4 x float > %a0, < 4 x float > %a1, float* %a2) {
+; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vfmaddss (%rdi), %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %x = load float , float *%a2
   %y = insertelement <4 x float> undef, float %x, i32 0
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %a1, < 4 x float > %y)
   ret < 4 x float > %res
 }
-define < 4 x float > @test_x86_fma_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
-; CHECK-LABEL: test_x86_fma_vfmadd_ss_load2:
+define < 4 x float > @test_x86_fma4_vfmadd_ss_load2(< 4 x float > %a0, float* %a1, < 4 x float > %a2) {
+; CHECK-LABEL: test_x86_fma4_vfmadd_ss_load2:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vfmaddss %xmm1, (%rdi), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %x = load float , float *%a1
   %y = insertelement <4 x float> undef, float %x, i32 0
-  %res = call < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
+  %res = call < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float > %a0, < 4 x float > %y, < 4 x float > %a2)
   ret < 4 x float > %res
 }
 
-declare < 4 x float > @llvm.x86.fma.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
+declare < 4 x float > @llvm.x86.fma4.vfmadd.ss(< 4 x float >, < 4 x float >, < 4 x float >) nounwind readnone
 
-define < 2 x double > @test_x86_fma_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
-; CHECK-LABEL: test_x86_fma_vfmadd_sd_load:
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load(< 2 x double > %a0, < 2 x double > %a1, double* %a2) {
+; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vfmaddsd (%rdi), %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %x = load double , double *%a2
   %y = insertelement <2 x double> undef, double %x, i32 0
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %a1, < 2 x double > %y)
   ret < 2 x double > %res
 }
-define < 2 x double > @test_x86_fma_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
-; CHECK-LABEL: test_x86_fma_vfmadd_sd_load2:
+define < 2 x double > @test_x86_fma4_vfmadd_sd_load2(< 2 x double > %a0, double* %a1, < 2 x double > %a2) {
+; CHECK-LABEL: test_x86_fma4_vfmadd_sd_load2:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    vfmaddsd %xmm1, (%rdi), %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %x = load double , double *%a1
   %y = insertelement <2 x double> undef, double %x, i32 0
-  %res = call < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
+  %res = call < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double > %a0, < 2 x double > %y, < 2 x double > %a2)
   ret < 2 x double > %res
 }
-declare < 2 x double > @llvm.x86.fma.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
+declare < 2 x double > @llvm.x86.fma4.vfmadd.sd(< 2 x double >, < 2 x double >, < 2 x double >) nounwind readnone
 define < 4 x float > @test_x86_fma_vfmadd_ps_load(< 4 x float > %a0, < 4 x float > %a1, < 4 x float >* %a2) {
 ; CHECK-LABEL: test_x86_fma_vfmadd_ps_load:
 ; CHECK:       # BB#0:

Added: llvm/trunk/test/CodeGen/X86/fma4-scalar-memfold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma4-scalar-memfold.ll?rev=318984&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma4-scalar-memfold.ll (added)
+++ llvm/trunk/test/CodeGen/X86/fma4-scalar-memfold.ll Sat Nov 25 10:32:43 2017
@@ -0,0 +1,104 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mattr=fma4 | FileCheck %s
+
+target triple = "x86_64-unknown-unknown"
+
+declare <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
+
+define void @fmadd_aab_ss(float* %a, float* %b) {
+; CHECK-LABEL: fmadd_aab_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfmaddss (%rsi), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %av, <4 x float> %av, <4 x float> %bv)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fmadd_aba_ss(float* %a, float* %b) {
+; CHECK-LABEL: fmadd_aba_ss:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vfmaddss %xmm0, (%rsi), %xmm0, %xmm0
+; CHECK-NEXT:    vmovss %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+  %a.val = load float, float* %a
+  %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+  %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+  %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+  %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+  %b.val = load float, float* %b
+  %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+  %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+  %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+  %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+
+  %vr = call <4 x float> @llvm.x86.fma4.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av)
+
+  %sr = extractelement <4 x float> %vr, i32 0
+  store float %sr, float* %a
+  ret void
+}
+
+define void @fmadd_aab_sd(double* %a, double* %b) {
+; CHECK-LABEL: fmadd_aab_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vfmaddsd (%rsi), %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %av, <2 x double> %av, <2 x double> %bv)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+
+define void @fmadd_aba_sd(double* %a, double* %b) {
+; CHECK-LABEL: fmadd_aba_sd:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    vfmaddsd %xmm0, (%rsi), %xmm0, %xmm0
+; CHECK-NEXT:    vmovlpd %xmm0, (%rdi)
+; CHECK-NEXT:    retq
+  %a.val = load double, double* %a
+  %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+  %av  = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+  %b.val = load double, double* %b
+  %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+  %bv  = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+
+  %vr = call <2 x double> @llvm.x86.fma4.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av)
+
+  %sr = extractelement <2 x double> %vr, i32 0
+  store double %sr, double* %a
+  ret void
+}
+




More information about the llvm-commits mailing list