[llvm] r276521 - [AVX512] Implement commuting support for EVEX encoded FMA3 instructions.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 23 00:16:56 PDT 2016


Author: ctopper
Date: Sat Jul 23 02:16:56 2016
New Revision: 276521

URL: http://llvm.org/viewvc/llvm-project?rev=276521&view=rev
Log:
[AVX512] Implement commuting support for EVEX encoded FMA3 instructions.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/test/CodeGen/X86/avx512-fma.ll
    llvm/trunk/test/CodeGen/X86/fma_patterns.ll
    llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=276521&r1=276520&r2=276521&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Sat Jul 23 02:16:56 2016
@@ -3157,145 +3157,107 @@ X86InstrInfo::convertToThreeAddress(Mach
 static bool isFMA3(unsigned Opcode, bool &IsIntrinsic) {
   IsIntrinsic = false;
 
+#define FMA3_CASE(Name, Modifier) \
+case X86::Name##r##Modifier: case X86::Name##m##Modifier:
+
+#define FMA3_SCALAR_PAIR(Name, Form, Modifier) \
+  FMA3_CASE(Name##SD##Form, Modifier) \
+  FMA3_CASE(Name##SS##Form, Modifier)
+
+#define FMA3_PACKED_PAIR(Name, Form, Modifier) \
+  FMA3_CASE(Name##PD##Form, Modifier) \
+  FMA3_CASE(Name##PS##Form, Modifier)
+
+#define FMA3_PACKED_SET(Form, Modifier) \
+  FMA3_PACKED_PAIR(VFMADD,    Form, Modifier) \
+  FMA3_PACKED_PAIR(VFMSUB,    Form, Modifier) \
+  FMA3_PACKED_PAIR(VFNMADD,   Form, Modifier) \
+  FMA3_PACKED_PAIR(VFNMSUB,   Form, Modifier) \
+  FMA3_PACKED_PAIR(VFMADDSUB, Form, Modifier) \
+  FMA3_PACKED_PAIR(VFMSUBADD, Form, Modifier)
+
+#define FMA3_CASES(Form) \
+  FMA3_SCALAR_PAIR(VFMADD,  Form, ) \
+  FMA3_SCALAR_PAIR(VFMSUB,  Form, ) \
+  FMA3_SCALAR_PAIR(VFNMADD, Form, ) \
+  FMA3_SCALAR_PAIR(VFNMSUB, Form, ) \
+  FMA3_PACKED_SET(Form, ) \
+  FMA3_PACKED_SET(Form, Y) \
+
+#define FMA3_SCALAR_PAIR_AVX512(Name, Modifier) \
+  FMA3_CASE(Name##SD, Modifier) \
+  FMA3_CASE(Name##SS, Modifier)
+
+#define FMA3_PACKED_PAIR_AVX512(Name, Size) \
+  FMA3_CASE(Name##PD##Size, ) \
+  FMA3_CASE(Name##PS##Size, )
+
+#define FMA3_PACKED_SET_AVX512(Form, Size) \
+  FMA3_PACKED_PAIR_AVX512(VFMADD##Form, Size) \
+  FMA3_PACKED_PAIR_AVX512(VFMSUB##Form, Size) \
+  FMA3_PACKED_PAIR_AVX512(VFNMADD##Form, Size) \
+  FMA3_PACKED_PAIR_AVX512(VFNMSUB##Form, Size) \
+  FMA3_PACKED_PAIR_AVX512(VFMADDSUB##Form, Size) \
+  FMA3_PACKED_PAIR_AVX512(VFMSUBADD##Form, Size)
+
+#define FMA3_CASES_AVX512(Form) \
+  FMA3_SCALAR_PAIR_AVX512(VFMADD##Form, ) \
+  FMA3_SCALAR_PAIR_AVX512(VFMSUB##Form, ) \
+  FMA3_SCALAR_PAIR_AVX512(VFNMADD##Form, ) \
+  FMA3_SCALAR_PAIR_AVX512(VFNMSUB##Form, ) \
+  FMA3_PACKED_SET_AVX512(Form, Z128) \
+  FMA3_PACKED_SET_AVX512(Form, Z256) \
+  FMA3_PACKED_SET_AVX512(Form, Z)
+
+#define FMA3_CASES_SCALAR_INT(Form) \
+  FMA3_SCALAR_PAIR(VFMADD,  Form, _Int) \
+  FMA3_SCALAR_PAIR(VFMSUB,  Form, _Int) \
+  FMA3_SCALAR_PAIR(VFNMADD, Form, _Int) \
+  FMA3_SCALAR_PAIR(VFNMSUB, Form, _Int)
+
+#define FMA3_CASES_SCALAR_INT_AVX512(Form) \
+  FMA3_SCALAR_PAIR_AVX512(VFMADD##Form, _Int) \
+  FMA3_SCALAR_PAIR_AVX512(VFMSUB##Form, _Int) \
+  FMA3_SCALAR_PAIR_AVX512(VFNMADD##Form, _Int) \
+  FMA3_SCALAR_PAIR_AVX512(VFNMSUB##Form, _Int)
+
   switch (Opcode) {
-  case X86::VFMADDSDr132r:      case X86::VFMADDSDr132m:
-  case X86::VFMADDSSr132r:      case X86::VFMADDSSr132m:
-  case X86::VFMSUBSDr132r:      case X86::VFMSUBSDr132m:
-  case X86::VFMSUBSSr132r:      case X86::VFMSUBSSr132m:
-  case X86::VFNMADDSDr132r:     case X86::VFNMADDSDr132m:
-  case X86::VFNMADDSSr132r:     case X86::VFNMADDSSr132m:
-  case X86::VFNMSUBSDr132r:     case X86::VFNMSUBSDr132m:
-  case X86::VFNMSUBSSr132r:     case X86::VFNMSUBSSr132m:
-
-  case X86::VFMADDSDr213r:      case X86::VFMADDSDr213m:
-  case X86::VFMADDSSr213r:      case X86::VFMADDSSr213m:
-  case X86::VFMSUBSDr213r:      case X86::VFMSUBSDr213m:
-  case X86::VFMSUBSSr213r:      case X86::VFMSUBSSr213m:
-  case X86::VFNMADDSDr213r:     case X86::VFNMADDSDr213m:
-  case X86::VFNMADDSSr213r:     case X86::VFNMADDSSr213m:
-  case X86::VFNMSUBSDr213r:     case X86::VFNMSUBSDr213m:
-  case X86::VFNMSUBSSr213r:     case X86::VFNMSUBSSr213m:
-
-  case X86::VFMADDSDr231r:      case X86::VFMADDSDr231m:
-  case X86::VFMADDSSr231r:      case X86::VFMADDSSr231m:
-  case X86::VFMSUBSDr231r:      case X86::VFMSUBSDr231m:
-  case X86::VFMSUBSSr231r:      case X86::VFMSUBSSr231m:
-  case X86::VFNMADDSDr231r:     case X86::VFNMADDSDr231m:
-  case X86::VFNMADDSSr231r:     case X86::VFNMADDSSr231m:
-  case X86::VFNMSUBSDr231r:     case X86::VFNMSUBSDr231m:
-  case X86::VFNMSUBSSr231r:     case X86::VFNMSUBSSr231m:
-
-  case X86::VFMADDSUBPDr132r:   case X86::VFMADDSUBPDr132m:
-  case X86::VFMADDSUBPSr132r:   case X86::VFMADDSUBPSr132m:
-  case X86::VFMSUBADDPDr132r:   case X86::VFMSUBADDPDr132m:
-  case X86::VFMSUBADDPSr132r:   case X86::VFMSUBADDPSr132m:
-  case X86::VFMADDSUBPDr132rY:  case X86::VFMADDSUBPDr132mY:
-  case X86::VFMADDSUBPSr132rY:  case X86::VFMADDSUBPSr132mY:
-  case X86::VFMSUBADDPDr132rY:  case X86::VFMSUBADDPDr132mY:
-  case X86::VFMSUBADDPSr132rY:  case X86::VFMSUBADDPSr132mY:
-
-  case X86::VFMADDPDr132r:      case X86::VFMADDPDr132m:
-  case X86::VFMADDPSr132r:      case X86::VFMADDPSr132m:
-  case X86::VFMSUBPDr132r:      case X86::VFMSUBPDr132m:
-  case X86::VFMSUBPSr132r:      case X86::VFMSUBPSr132m:
-  case X86::VFNMADDPDr132r:     case X86::VFNMADDPDr132m:
-  case X86::VFNMADDPSr132r:     case X86::VFNMADDPSr132m:
-  case X86::VFNMSUBPDr132r:     case X86::VFNMSUBPDr132m:
-  case X86::VFNMSUBPSr132r:     case X86::VFNMSUBPSr132m:
-  case X86::VFMADDPDr132rY:     case X86::VFMADDPDr132mY:
-  case X86::VFMADDPSr132rY:     case X86::VFMADDPSr132mY:
-  case X86::VFMSUBPDr132rY:     case X86::VFMSUBPDr132mY:
-  case X86::VFMSUBPSr132rY:     case X86::VFMSUBPSr132mY:
-  case X86::VFNMADDPDr132rY:    case X86::VFNMADDPDr132mY:
-  case X86::VFNMADDPSr132rY:    case X86::VFNMADDPSr132mY:
-  case X86::VFNMSUBPDr132rY:    case X86::VFNMSUBPDr132mY:
-  case X86::VFNMSUBPSr132rY:    case X86::VFNMSUBPSr132mY:
-
-  case X86::VFMADDSUBPDr213r:   case X86::VFMADDSUBPDr213m:
-  case X86::VFMADDSUBPSr213r:   case X86::VFMADDSUBPSr213m:
-  case X86::VFMSUBADDPDr213r:   case X86::VFMSUBADDPDr213m:
-  case X86::VFMSUBADDPSr213r:   case X86::VFMSUBADDPSr213m:
-  case X86::VFMADDSUBPDr213rY:  case X86::VFMADDSUBPDr213mY:
-  case X86::VFMADDSUBPSr213rY:  case X86::VFMADDSUBPSr213mY:
-  case X86::VFMSUBADDPDr213rY:  case X86::VFMSUBADDPDr213mY:
-  case X86::VFMSUBADDPSr213rY:  case X86::VFMSUBADDPSr213mY:
-
-  case X86::VFMADDPDr213r:      case X86::VFMADDPDr213m:
-  case X86::VFMADDPSr213r:      case X86::VFMADDPSr213m:
-  case X86::VFMSUBPDr213r:      case X86::VFMSUBPDr213m:
-  case X86::VFMSUBPSr213r:      case X86::VFMSUBPSr213m:
-  case X86::VFNMADDPDr213r:     case X86::VFNMADDPDr213m:
-  case X86::VFNMADDPSr213r:     case X86::VFNMADDPSr213m:
-  case X86::VFNMSUBPDr213r:     case X86::VFNMSUBPDr213m:
-  case X86::VFNMSUBPSr213r:     case X86::VFNMSUBPSr213m:
-  case X86::VFMADDPDr213rY:     case X86::VFMADDPDr213mY:
-  case X86::VFMADDPSr213rY:     case X86::VFMADDPSr213mY:
-  case X86::VFMSUBPDr213rY:     case X86::VFMSUBPDr213mY:
-  case X86::VFMSUBPSr213rY:     case X86::VFMSUBPSr213mY:
-  case X86::VFNMADDPDr213rY:    case X86::VFNMADDPDr213mY:
-  case X86::VFNMADDPSr213rY:    case X86::VFNMADDPSr213mY:
-  case X86::VFNMSUBPDr213rY:    case X86::VFNMSUBPDr213mY:
-  case X86::VFNMSUBPSr213rY:    case X86::VFNMSUBPSr213mY:
-
-  case X86::VFMADDSUBPDr231r:   case X86::VFMADDSUBPDr231m:
-  case X86::VFMADDSUBPSr231r:   case X86::VFMADDSUBPSr231m:
-  case X86::VFMSUBADDPDr231r:   case X86::VFMSUBADDPDr231m:
-  case X86::VFMSUBADDPSr231r:   case X86::VFMSUBADDPSr231m:
-  case X86::VFMADDSUBPDr231rY:  case X86::VFMADDSUBPDr231mY:
-  case X86::VFMADDSUBPSr231rY:  case X86::VFMADDSUBPSr231mY:
-  case X86::VFMSUBADDPDr231rY:  case X86::VFMSUBADDPDr231mY:
-  case X86::VFMSUBADDPSr231rY:  case X86::VFMSUBADDPSr231mY:
-
-  case X86::VFMADDPDr231r:      case X86::VFMADDPDr231m:
-  case X86::VFMADDPSr231r:      case X86::VFMADDPSr231m:
-  case X86::VFMSUBPDr231r:      case X86::VFMSUBPDr231m:
-  case X86::VFMSUBPSr231r:      case X86::VFMSUBPSr231m:
-  case X86::VFNMADDPDr231r:     case X86::VFNMADDPDr231m:
-  case X86::VFNMADDPSr231r:     case X86::VFNMADDPSr231m:
-  case X86::VFNMSUBPDr231r:     case X86::VFNMSUBPDr231m:
-  case X86::VFNMSUBPSr231r:     case X86::VFNMSUBPSr231m:
-  case X86::VFMADDPDr231rY:     case X86::VFMADDPDr231mY:
-  case X86::VFMADDPSr231rY:     case X86::VFMADDPSr231mY:
-  case X86::VFMSUBPDr231rY:     case X86::VFMSUBPDr231mY:
-  case X86::VFMSUBPSr231rY:     case X86::VFMSUBPSr231mY:
-  case X86::VFNMADDPDr231rY:    case X86::VFNMADDPDr231mY:
-  case X86::VFNMADDPSr231rY:    case X86::VFNMADDPSr231mY:
-  case X86::VFNMSUBPDr231rY:    case X86::VFNMSUBPDr231mY:
-  case X86::VFNMSUBPSr231rY:    case X86::VFNMSUBPSr231mY:
+  FMA3_CASES(r132)
+  FMA3_CASES(r213)
+  FMA3_CASES(r231)
+
+  // AVX-512 instructions
+  FMA3_CASES_AVX512(132)
+  FMA3_CASES_AVX512(213)
+  FMA3_CASES_AVX512(231)
     return true;
 
-  case X86::VFMADDSDr132r_Int:  case X86::VFMADDSDr132m_Int:
-  case X86::VFMADDSSr132r_Int:  case X86::VFMADDSSr132m_Int:
-  case X86::VFMSUBSDr132r_Int:  case X86::VFMSUBSDr132m_Int:
-  case X86::VFMSUBSSr132r_Int:  case X86::VFMSUBSSr132m_Int:
-  case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
-  case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
-  case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
-  case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
-
-  case X86::VFMADDSDr213r_Int:  case X86::VFMADDSDr213m_Int:
-  case X86::VFMADDSSr213r_Int:  case X86::VFMADDSSr213m_Int:
-  case X86::VFMSUBSDr213r_Int:  case X86::VFMSUBSDr213m_Int:
-  case X86::VFMSUBSSr213r_Int:  case X86::VFMSUBSSr213m_Int:
-  case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
-  case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
-  case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
-  case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
-
-  case X86::VFMADDSDr231r_Int:  case X86::VFMADDSDr231m_Int:
-  case X86::VFMADDSSr231r_Int:  case X86::VFMADDSSr231m_Int:
-  case X86::VFMSUBSDr231r_Int:  case X86::VFMSUBSDr231m_Int:
-  case X86::VFMSUBSSr231r_Int:  case X86::VFMSUBSSr231m_Int:
-  case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
-  case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
-  case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
-  case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
+  FMA3_CASES_SCALAR_INT(r132)
+  FMA3_CASES_SCALAR_INT(r213)
+  FMA3_CASES_SCALAR_INT(r231)
+
+  // AVX-512 instructions
+  FMA3_CASES_SCALAR_INT_AVX512(132)
+  FMA3_CASES_SCALAR_INT_AVX512(213)
+  FMA3_CASES_SCALAR_INT_AVX512(231)
     IsIntrinsic = true;
     return true;
   default:
     return false;
   }
   llvm_unreachable("Opcode not handled by the switch");
+
+#undef FMA3_CASE
+#undef FMA3_SCALAR_PAIR
+#undef FMA3_PACKED_PAIR
+#undef FMA3_PACKED_SET
+#undef FMA3_CASES
+#undef FMA3_SCALAR_PAIR_AVX512
+#undef FMA3_PACKED_PAIR_AVX512
+#undef FMA3_PACKED_SET_AVX512
+#undef FMA3_CASES_AVX512
+#undef FMA3_CASES_SCALAR_INT
+#undef FMA3_CASES_SCALAR_INT_AVX512
 }
 
 /// Returns an adjusted FMA opcode that must be used in FMA instruction that
@@ -3312,104 +3274,110 @@ static unsigned getFMA3OpcodeToCommuteOp
                                                bool IsIntrinOpcode,
                                                unsigned SrcOpIdx1,
                                                unsigned SrcOpIdx2) {
+#define FMA3_ENTRY(Name, Suffix) \
+  { X86::Name##132##Suffix, X86::Name##213##Suffix, X86::Name##231##Suffix },
+
+#define FMA3_SCALAR_PAIR(Name, Suffix) \
+  FMA3_ENTRY(Name##SSr, Suffix) \
+  FMA3_ENTRY(Name##SDr, Suffix)
+
+#define FMA3_PACKED_PAIR(Name, Suffix) \
+  FMA3_ENTRY(Name##PSr, Suffix) \
+  FMA3_ENTRY(Name##PDr, Suffix)
+
+#define FMA3_PACKED_SIZES(Name, Suffix) \
+  FMA3_PACKED_PAIR(Name, Suffix) \
+  FMA3_PACKED_PAIR(Name, Suffix##Y)
+
+#define FMA3_TABLE_ALL(Name) \
+  FMA3_SCALAR_PAIR(Name, r) \
+  FMA3_PACKED_SIZES(Name, r) \
+  FMA3_SCALAR_PAIR(Name, m) \
+  FMA3_PACKED_SIZES(Name, m)
+
+#define FMA3_TABLE_PACKED(Name) \
+  FMA3_PACKED_SIZES(Name, r) \
+  FMA3_PACKED_SIZES(Name, m)
+
+#define FMA3_TABLE_SCALAR_INT(Name) \
+  FMA3_SCALAR_PAIR(Name, r_Int) \
+  FMA3_SCALAR_PAIR(Name, m_Int)
+
+#define FMA3_SCALAR_PAIR_AVX512(Name, Suffix) \
+  FMA3_ENTRY(Name, SS##Suffix) \
+  FMA3_ENTRY(Name, SD##Suffix)
+
+#define FMA3_PACKED_PAIR_AVX512(Name, Suffix) \
+  FMA3_ENTRY(Name, PS##Suffix) \
+  FMA3_ENTRY(Name, PD##Suffix)
+
+#define FMA3_PACKED_SIZES_AVX512(Name, Suffix) \
+  FMA3_PACKED_PAIR_AVX512(Name, Z128##Suffix) \
+  FMA3_PACKED_PAIR_AVX512(Name, Z256##Suffix) \
+  FMA3_PACKED_PAIR_AVX512(Name, Z##Suffix)
+
+#define FMA3_TABLE_ALL_AVX512(Name) \
+  FMA3_SCALAR_PAIR_AVX512(Name, r) \
+  FMA3_PACKED_SIZES_AVX512(Name, r) \
+  FMA3_SCALAR_PAIR_AVX512(Name, m) \
+  FMA3_PACKED_SIZES_AVX512(Name, m)
+
+#define FMA3_TABLE_PACKED_AVX512(Name) \
+  FMA3_PACKED_SIZES_AVX512(Name, r) \
+  FMA3_PACKED_SIZES_AVX512(Name, m)
+
+#define FMA3_TABLE_SCALAR_INT_AVX512(Name) \
+  FMA3_SCALAR_PAIR_AVX512(Name, r_Int) \
+  FMA3_SCALAR_PAIR_AVX512(Name, m_Int)
+
   // Define the array that holds FMA opcodes in groups
   // of 3 opcodes(132, 213, 231) in each group.
   static const uint16_t RegularOpcodeGroups[][3] = {
-    { X86::VFMADDSSr132r,   X86::VFMADDSSr213r,   X86::VFMADDSSr231r  },
-    { X86::VFMADDSDr132r,   X86::VFMADDSDr213r,   X86::VFMADDSDr231r  },
-    { X86::VFMADDPSr132r,   X86::VFMADDPSr213r,   X86::VFMADDPSr231r  },
-    { X86::VFMADDPDr132r,   X86::VFMADDPDr213r,   X86::VFMADDPDr231r  },
-    { X86::VFMADDPSr132rY,  X86::VFMADDPSr213rY,  X86::VFMADDPSr231rY },
-    { X86::VFMADDPDr132rY,  X86::VFMADDPDr213rY,  X86::VFMADDPDr231rY },
-    { X86::VFMADDSSr132m,   X86::VFMADDSSr213m,   X86::VFMADDSSr231m  },
-    { X86::VFMADDSDr132m,   X86::VFMADDSDr213m,   X86::VFMADDSDr231m  },
-    { X86::VFMADDPSr132m,   X86::VFMADDPSr213m,   X86::VFMADDPSr231m  },
-    { X86::VFMADDPDr132m,   X86::VFMADDPDr213m,   X86::VFMADDPDr231m  },
-    { X86::VFMADDPSr132mY,  X86::VFMADDPSr213mY,  X86::VFMADDPSr231mY },
-    { X86::VFMADDPDr132mY,  X86::VFMADDPDr213mY,  X86::VFMADDPDr231mY },
-
-    { X86::VFMSUBSSr132r,   X86::VFMSUBSSr213r,   X86::VFMSUBSSr231r  },
-    { X86::VFMSUBSDr132r,   X86::VFMSUBSDr213r,   X86::VFMSUBSDr231r  },
-    { X86::VFMSUBPSr132r,   X86::VFMSUBPSr213r,   X86::VFMSUBPSr231r  },
-    { X86::VFMSUBPDr132r,   X86::VFMSUBPDr213r,   X86::VFMSUBPDr231r  },
-    { X86::VFMSUBPSr132rY,  X86::VFMSUBPSr213rY,  X86::VFMSUBPSr231rY },
-    { X86::VFMSUBPDr132rY,  X86::VFMSUBPDr213rY,  X86::VFMSUBPDr231rY },
-    { X86::VFMSUBSSr132m,   X86::VFMSUBSSr213m,   X86::VFMSUBSSr231m  },
-    { X86::VFMSUBSDr132m,   X86::VFMSUBSDr213m,   X86::VFMSUBSDr231m  },
-    { X86::VFMSUBPSr132m,   X86::VFMSUBPSr213m,   X86::VFMSUBPSr231m  },
-    { X86::VFMSUBPDr132m,   X86::VFMSUBPDr213m,   X86::VFMSUBPDr231m  },
-    { X86::VFMSUBPSr132mY,  X86::VFMSUBPSr213mY,  X86::VFMSUBPSr231mY },
-    { X86::VFMSUBPDr132mY,  X86::VFMSUBPDr213mY,  X86::VFMSUBPDr231mY },
-
-    { X86::VFNMADDSSr132r,  X86::VFNMADDSSr213r,  X86::VFNMADDSSr231r  },
-    { X86::VFNMADDSDr132r,  X86::VFNMADDSDr213r,  X86::VFNMADDSDr231r  },
-    { X86::VFNMADDPSr132r,  X86::VFNMADDPSr213r,  X86::VFNMADDPSr231r  },
-    { X86::VFNMADDPDr132r,  X86::VFNMADDPDr213r,  X86::VFNMADDPDr231r  },
-    { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY },
-    { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY },
-    { X86::VFNMADDSSr132m,  X86::VFNMADDSSr213m,  X86::VFNMADDSSr231m  },
-    { X86::VFNMADDSDr132m,  X86::VFNMADDSDr213m,  X86::VFNMADDSDr231m  },
-    { X86::VFNMADDPSr132m,  X86::VFNMADDPSr213m,  X86::VFNMADDPSr231m  },
-    { X86::VFNMADDPDr132m,  X86::VFNMADDPDr213m,  X86::VFNMADDPDr231m  },
-    { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY },
-    { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY },
-
-    { X86::VFNMSUBSSr132r,  X86::VFNMSUBSSr213r,  X86::VFNMSUBSSr231r  },
-    { X86::VFNMSUBSDr132r,  X86::VFNMSUBSDr213r,  X86::VFNMSUBSDr231r  },
-    { X86::VFNMSUBPSr132r,  X86::VFNMSUBPSr213r,  X86::VFNMSUBPSr231r  },
-    { X86::VFNMSUBPDr132r,  X86::VFNMSUBPDr213r,  X86::VFNMSUBPDr231r  },
-    { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY },
-    { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY },
-    { X86::VFNMSUBSSr132m,  X86::VFNMSUBSSr213m,  X86::VFNMSUBSSr231m  },
-    { X86::VFNMSUBSDr132m,  X86::VFNMSUBSDr213m,  X86::VFNMSUBSDr231m  },
-    { X86::VFNMSUBPSr132m,  X86::VFNMSUBPSr213m,  X86::VFNMSUBPSr231m  },
-    { X86::VFNMSUBPDr132m,  X86::VFNMSUBPDr213m,  X86::VFNMSUBPDr231m  },
-    { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY },
-    { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY },
-
-    { X86::VFMADDSUBPSr132r,  X86::VFMADDSUBPSr213r,  X86::VFMADDSUBPSr231r  },
-    { X86::VFMADDSUBPDr132r,  X86::VFMADDSUBPDr213r,  X86::VFMADDSUBPDr231r  },
-    { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY },
-    { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY },
-    { X86::VFMADDSUBPSr132m,  X86::VFMADDSUBPSr213m,  X86::VFMADDSUBPSr231m  },
-    { X86::VFMADDSUBPDr132m,  X86::VFMADDSUBPDr213m,  X86::VFMADDSUBPDr231m  },
-    { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY },
-    { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY },
-
-    { X86::VFMSUBADDPSr132r,  X86::VFMSUBADDPSr213r,  X86::VFMSUBADDPSr231r  },
-    { X86::VFMSUBADDPDr132r,  X86::VFMSUBADDPDr213r,  X86::VFMSUBADDPDr231r  },
-    { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY },
-    { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY },
-    { X86::VFMSUBADDPSr132m,  X86::VFMSUBADDPSr213m,  X86::VFMSUBADDPSr231m  },
-    { X86::VFMSUBADDPDr132m,  X86::VFMSUBADDPDr213m,  X86::VFMSUBADDPDr231m  },
-    { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
-    { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
+    FMA3_TABLE_ALL(VFMADD)
+    FMA3_TABLE_ALL(VFMSUB)
+    FMA3_TABLE_ALL(VFNMADD)
+    FMA3_TABLE_ALL(VFNMSUB)
+    FMA3_TABLE_PACKED(VFMADDSUB)
+    FMA3_TABLE_PACKED(VFMSUBADD)
+
+    // AVX-512 instructions
+    FMA3_TABLE_ALL_AVX512(VFMADD)
+    FMA3_TABLE_ALL_AVX512(VFMSUB)
+    FMA3_TABLE_ALL_AVX512(VFNMADD)
+    FMA3_TABLE_ALL_AVX512(VFNMSUB)
+    FMA3_TABLE_PACKED_AVX512(VFMADDSUB)
+    FMA3_TABLE_PACKED_AVX512(VFMSUBADD)
   };
 
   // Define the array that holds FMA*_Int opcodes in groups
   // of 3 opcodes(132, 213, 231) in each group.
   static const uint16_t IntrinOpcodeGroups[][3] = {
-    { X86::VFMADDSSr132r_Int,  X86::VFMADDSSr213r_Int,  X86::VFMADDSSr231r_Int },
-    { X86::VFMADDSDr132r_Int,  X86::VFMADDSDr213r_Int,  X86::VFMADDSDr231r_Int },
-    { X86::VFMADDSSr132m_Int,  X86::VFMADDSSr213m_Int,  X86::VFMADDSSr231m_Int },
-    { X86::VFMADDSDr132m_Int,  X86::VFMADDSDr213m_Int,  X86::VFMADDSDr231m_Int },
-
-    { X86::VFMSUBSSr132r_Int,  X86::VFMSUBSSr213r_Int,  X86::VFMSUBSSr231r_Int },
-    { X86::VFMSUBSDr132r_Int,  X86::VFMSUBSDr213r_Int,  X86::VFMSUBSDr231r_Int },
-    { X86::VFMSUBSSr132m_Int,  X86::VFMSUBSSr213m_Int,  X86::VFMSUBSSr231m_Int },
-    { X86::VFMSUBSDr132m_Int,  X86::VFMSUBSDr213m_Int,  X86::VFMSUBSDr231m_Int },
-
-    { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
-    { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
-    { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
-    { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
-
-    { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
-    { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
-    { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
-    { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
+    FMA3_TABLE_SCALAR_INT(VFMADD)
+    FMA3_TABLE_SCALAR_INT(VFMSUB)
+    FMA3_TABLE_SCALAR_INT(VFNMADD)
+    FMA3_TABLE_SCALAR_INT(VFNMSUB)
+
+    // AVX-512 instructio
+    FMA3_TABLE_SCALAR_INT_AVX512(VFMADD)
+    FMA3_TABLE_SCALAR_INT_AVX512(VFMSUB)
+    FMA3_TABLE_SCALAR_INT_AVX512(VFNMADD)
+    FMA3_TABLE_SCALAR_INT_AVX512(VFNMSUB)
   };
 
+#undef FMA3_ENTRY
+#undef FMA3_SCALAR_PAIR
+#undef FMA3_PACKED_PAIR
+#undef FMA3_PACKED_SIZES
+#undef FMA3_TABLE_ALL
+#undef FMA3_TABLE_PACKED
+#undef FMA3_TABLE_SCALAR_INT
+#undef FMA3_SCALAR_PAIR_AVX512
+#undef FMA3_PACKED_PAIR_AVX512
+#undef FMA3_PACKED_SIZES_AVX512
+#undef FMA3_TABLE_ALL_AVX512
+#undef FMA3_TABLE_PACKED_AVX512
+#undef FMA3_TABLE_SCALAR_INT_AVX512
+
   const unsigned Form132Index = 0;
   const unsigned Form213Index = 1;
   const unsigned Form231Index = 2;

Modified: llvm/trunk/test/CodeGen/X86/avx512-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-fma.ll?rev=276521&r1=276520&r2=276521&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-fma.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-fma.ll Sat Jul 23 02:16:56 2016
@@ -67,34 +67,20 @@ define <8 x double> @test_x86_fmsub_pd_z
 }
 
 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
-; KNL-LABEL: test_x86_fmsub_213:
-; KNL:       ## BB#0:
-; KNL-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
-; KNL-NEXT:    vmovaps %zmm1, %zmm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test_x86_fmsub_213:
-; SKX:       ## BB#0:
-; SKX-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
-; SKX-NEXT:    vmovaps %xmm1, %xmm0
-; SKX-NEXT:    retq
+; ALL-LABEL: test_x86_fmsub_213:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
+; ALL-NEXT:    retq
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
   ret double %res
 }
 
 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
-; KNL-LABEL: test_x86_fmsub_213_m:
-; KNL:       ## BB#0:
-; KNL-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
-; KNL-NEXT:    vmovaps %zmm1, %zmm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test_x86_fmsub_213_m:
-; SKX:       ## BB#0:
-; SKX-NEXT:    vfmsub213sd (%rdi), %xmm0, %xmm1
-; SKX-NEXT:    vmovaps %xmm1, %xmm0
-; SKX-NEXT:    retq
+; ALL-LABEL: test_x86_fmsub_213_m:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub213sd (%rdi), %xmm1, %xmm0
+; ALL-NEXT:    retq
   %a2 = load double , double *%a2_ptr
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
@@ -102,17 +88,10 @@ define double @test_x86_fmsub_213_m(doub
 }
 
 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
-; KNL-LABEL: test_x86_fmsub_231_m:
-; KNL:       ## BB#0:
-; KNL-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
-; KNL-NEXT:    vmovaps %zmm1, %zmm0
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: test_x86_fmsub_231_m:
-; SKX:       ## BB#0:
-; SKX-NEXT:    vfmsub231sd (%rdi), %xmm0, %xmm1
-; SKX-NEXT:    vmovaps %xmm1, %xmm0
-; SKX-NEXT:    retq
+; ALL-LABEL: test_x86_fmsub_231_m:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vfmsub132sd (%rdi), %xmm1, %xmm0
+; ALL-NEXT:    retq
   %a2 = load double , double *%a2_ptr
   %x = fmul double %a0, %a2
   %res = fsub double %x, %a1

Modified: llvm/trunk/test/CodeGen/X86/fma_patterns.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns.ll?rev=276521&r1=276520&r2=276521&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns.ll Sat Jul 23 02:16:56 2016
@@ -1,5 +1,5 @@
-; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=FMA4
@@ -22,8 +22,7 @@ define float @test_f32_fmadd(float %a0,
 ;
 ; AVX512-LABEL: test_f32_fmadd:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul float %a0, %a1
   %res = fadd float %x, %a2
@@ -83,8 +82,7 @@ define double @test_f64_fmadd(double %a0
 ;
 ; AVX512-LABEL: test_f64_fmadd:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul double %a0, %a1
   %res = fadd double %x, %a2
@@ -148,8 +146,7 @@ define float @test_f32_fmsub(float %a0,
 ;
 ; AVX512-LABEL: test_f32_fmsub:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfmsub213ss %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul float %a0, %a1
   %res = fsub float %x, %a2
@@ -209,8 +206,7 @@ define double @test_f64_fmsub(double %a0
 ;
 ; AVX512-LABEL: test_f64_fmsub:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfmsub213sd %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul double %a0, %a1
   %res = fsub double %x, %a2
@@ -274,8 +270,7 @@ define float @test_f32_fnmadd(float %a0,
 ;
 ; AVX512-LABEL: test_f32_fnmadd:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfnmadd213ss %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul float %a0, %a1
   %res = fsub float %a2, %x
@@ -335,8 +330,7 @@ define double @test_f64_fnmadd(double %a
 ;
 ; AVX512-LABEL: test_f64_fnmadd:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfnmadd213sd %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul double %a0, %a1
   %res = fsub double %a2, %x
@@ -400,8 +394,7 @@ define float @test_f32_fnmsub(float %a0,
 ;
 ; AVX512-LABEL: test_f32_fnmsub:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfnmsub213ss %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul float %a0, %a1
   %y = fsub float -0.000000e+00, %x
@@ -464,8 +457,7 @@ define double @test_f64_fnmsub(double %a
 ;
 ; AVX512-LABEL: test_f64_fnmsub:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %x = fmul double %a0, %a1
   %y = fsub double -0.000000e+00, %x
@@ -533,8 +525,7 @@ define <4 x float> @test_4f32_fmadd_load
 ; AVX512-LABEL: test_4f32_fmadd_load:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vmovaps (%rdi), %xmm2
-; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vmovaps %xmm2, %xmm0
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %x = load <4 x float>, <4 x float>* %a0
   %y = fmul <4 x float> %x, %a1
@@ -556,8 +547,7 @@ define <2 x double> @test_2f64_fmsub_loa
 ; AVX512-LABEL: test_2f64_fmsub_load:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vmovapd (%rdi), %xmm2
-; AVX512-NEXT:    vfmsub213pd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vmovaps %xmm2, %xmm0
+; AVX512-NEXT:    vfmsub213pd %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %x = load <2 x double>, <2 x double>* %a0
   %y = fmul <2 x double> %x, %a1
@@ -829,8 +819,7 @@ define float @test_f32_interp(float %x,
 ; AVX512-LABEL: test_f32_interp:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vfnmadd213ss %xmm1, %xmm2, %xmm1
-; AVX512-NEXT:    vfmadd213ss %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vmovaps %xmm2, %xmm0
+; AVX512-NEXT:    vfmadd213ss %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %t1 = fsub float 1.0, %t
   %tx = fmul float %x, %t
@@ -854,9 +843,8 @@ define <4 x float> @test_v4f32_interp(<4
 ;
 ; AVX512-LABEL: test_v4f32_interp:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovaps %xmm2, %xmm3
-; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm1, %xmm3
-; AVX512-NEXT:    vfmadd213ps %xmm3, %xmm2, %xmm0
+; AVX512-NEXT:    vfnmadd213ps %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vfmadd213ps %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %t1 = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t
   %tx = fmul <4 x float> %x, %t
@@ -880,9 +868,8 @@ define <8 x float> @test_v8f32_interp(<8
 ;
 ; AVX512-LABEL: test_v8f32_interp:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovaps %ymm2, %ymm3
-; AVX512-NEXT:    vfnmadd213ps %ymm1, %ymm1, %ymm3
-; AVX512-NEXT:    vfmadd213ps %ymm3, %ymm2, %ymm0
+; AVX512-NEXT:    vfnmadd213ps %ymm1, %ymm2, %ymm1
+; AVX512-NEXT:    vfmadd213ps %ymm1, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
   %t1 = fsub <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
   %tx = fmul <8 x float> %x, %t
@@ -907,8 +894,7 @@ define double @test_f64_interp(double %x
 ; AVX512-LABEL: test_f64_interp:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vfnmadd213sd %xmm1, %xmm2, %xmm1
-; AVX512-NEXT:    vfmadd213sd %xmm1, %xmm0, %xmm2
-; AVX512-NEXT:    vmovaps %xmm2, %xmm0
+; AVX512-NEXT:    vfmadd213sd %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %t1 = fsub double 1.0, %t
   %tx = fmul double %x, %t
@@ -932,9 +918,8 @@ define <2 x double> @test_v2f64_interp(<
 ;
 ; AVX512-LABEL: test_v2f64_interp:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovaps %xmm2, %xmm3
-; AVX512-NEXT:    vfnmadd213pd %xmm1, %xmm1, %xmm3
-; AVX512-NEXT:    vfmadd213pd %xmm3, %xmm2, %xmm0
+; AVX512-NEXT:    vfnmadd213pd %xmm1, %xmm2, %xmm1
+; AVX512-NEXT:    vfmadd213pd %xmm1, %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %t1 = fsub <2 x double> <double 1.0, double 1.0>, %t
   %tx = fmul <2 x double> %x, %t
@@ -958,9 +943,8 @@ define <4 x double> @test_v4f64_interp(<
 ;
 ; AVX512-LABEL: test_v4f64_interp:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovaps %ymm2, %ymm3
-; AVX512-NEXT:    vfnmadd213pd %ymm1, %ymm1, %ymm3
-; AVX512-NEXT:    vfmadd213pd %ymm3, %ymm2, %ymm0
+; AVX512-NEXT:    vfnmadd213pd %ymm1, %ymm2, %ymm1
+; AVX512-NEXT:    vfmadd213pd %ymm1, %ymm2, %ymm0
 ; AVX512-NEXT:    retq
   %t1 = fsub <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t
   %tx = fmul <4 x double> %x, %t
@@ -1101,8 +1085,7 @@ define <4 x float> @test_v4f32_fma_fmul_
 ;
 ; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfmadd132ps {{.*}}(%rip), %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %m0 = fmul <4 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0>
   %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0>
@@ -1128,8 +1111,7 @@ define double @test_f64_fneg_fmul(double
 ; AVX512-LABEL: test_f64_fneg_fmul:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vxorps %xmm2, %xmm2, %xmm2
-; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm0, %xmm1
-; AVX512-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512-NEXT:    vfnmsub213sd %xmm2, %xmm1, %xmm0
 ; AVX512-NEXT:    retq
   %m = fmul nsz double %x, %y
   %n = fsub double -0.0, %m

Modified: llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll?rev=276521&r1=276520&r2=276521&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma_patterns_wide.ll Sat Jul 23 02:16:56 2016
@@ -218,8 +218,7 @@ define <16 x float> @test_16f32_fmadd_lo
 ; AVX512-LABEL: test_16f32_fmadd_load:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vmovaps (%rdi), %zmm2
-; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm0, %zmm2
-; AVX512-NEXT:    vmovaps %zmm2, %zmm0
+; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm2, %zmm0
 ; AVX512-NEXT:    retq
   %x = load <16 x float>, <16 x float>* %a0
   %y = fmul <16 x float> %x, %a1
@@ -243,8 +242,7 @@ define <8 x double> @test_8f64_fmsub_loa
 ; AVX512-LABEL: test_8f64_fmsub_load:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vmovapd (%rdi), %zmm2
-; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm0, %zmm2
-; AVX512-NEXT:    vmovapd %zmm2, %zmm0
+; AVX512-NEXT:    vfmsub213pd %zmm1, %zmm2, %zmm0
 ; AVX512-NEXT:    retq
   %x = load <8 x double>, <8 x double>* %a0
   %y = fmul <8 x double> %x, %a1
@@ -543,9 +541,8 @@ define <16 x float> @test_v16f32_interp(
 ;
 ; AVX512-LABEL: test_v16f32_interp:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovaps %zmm2, %zmm3
-; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm1, %zmm3
-; AVX512-NEXT:    vfmadd213ps %zmm3, %zmm2, %zmm0
+; AVX512-NEXT:    vfnmadd213ps %zmm1, %zmm2, %zmm1
+; AVX512-NEXT:    vfmadd213ps %zmm1, %zmm2, %zmm0
 ; AVX512-NEXT:    retq
   %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t
   %tx = fmul <16 x float> %x, %t
@@ -573,9 +570,8 @@ define <8 x double> @test_v8f64_interp(<
 ;
 ; AVX512-LABEL: test_v8f64_interp:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vmovapd %zmm2, %zmm3
-; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm1, %zmm3
-; AVX512-NEXT:    vfmadd213pd %zmm3, %zmm2, %zmm0
+; AVX512-NEXT:    vfnmadd213pd %zmm1, %zmm2, %zmm1
+; AVX512-NEXT:    vfmadd213pd %zmm1, %zmm2, %zmm0
 ; AVX512-NEXT:    retq
   %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t
   %tx = fmul <8 x double> %x, %t
@@ -728,8 +724,7 @@ define <16 x float> @test_v16f32_fma_fmu
 ;
 ; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y:
 ; AVX512:       # BB#0:
-; AVX512-NEXT:    vfmadd231ps {{.*}}(%rip), %zmm0, %zmm1
-; AVX512-NEXT:    vmovaps %zmm1, %zmm0
+; AVX512-NEXT:    vfmadd132ps {{.*}}(%rip), %zmm1, %zmm0
 ; AVX512-NEXT:    retq
   %m0 = fmul <16 x float> %x,  <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
   %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0>




More information about the llvm-commits mailing list