<html><head><meta http-equiv="Content-Type" content="text/html charset=windows-1252"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;">LGTM. Thanks, Elena. I’m very excited to see this landing.<div><br></div><div>-Jim</div><div><br><div><div>On Jul 24, 2013, at 4:02 AM, Elena Demikhovsky <<a href="mailto:elena.demikhovsky@intel.com">elena.demikhovsky@intel.com</a>> wrote:</div><br class="Apple-interchange-newline"><blockquote type="cite"><div style="font-size: 12px; font-style: normal; font-variant: normal; font-weight: normal; letter-spacing: normal; line-height: normal; orphans: auto; text-align: start; text-indent: 0px; text-transform: none; white-space: normal; widows: auto; word-spacing: 0px; -webkit-text-stroke-width: 0px;">Author: delena<br>Date: Wed Jul 24 06:02:47 2013<br>New Revision: 187030<br><br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project?rev=187030&view=rev">http://llvm.org/viewvc/llvm-project?rev=187030&view=rev</a><br>Log:<br>I'm starting to commit KNL backend. I'll push patches one-by-one. This patch includes support for the extended register set XMM16-31, YMM16-31, ZMM0-31.<br>The full ISA you can see here:<span class="Apple-converted-space"> </span><a href="http://software.intel.com/en-us/intel-isa-extensions">http://software.intel.com/en-us/intel-isa-extensions</a><br><br>Modified:<br>   llvm/trunk/lib/Target/X86/X86.td<br>   llvm/trunk/lib/Target/X86/X86CallingConv.td<br>   llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>   llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp<br>   llvm/trunk/lib/Target/X86/X86RegisterInfo.h<br>   llvm/trunk/lib/Target/X86/X86RegisterInfo.td<br>   llvm/trunk/lib/Target/X86/X86Subtarget.h<br>   llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp<br><br>Modified: llvm/trunk/lib/Target/X86/X86.td<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86.td (original)<br>+++ llvm/trunk/lib/Target/X86/X86.td Wed Jul 24 06:02:47 2013<br>@@ -86,6 +86,16 @@ def FeatureAVX     : SubtargetFeature<"a<br>def FeatureAVX2    : SubtargetFeature<"avx2", "X86SSELevel", "AVX2",<br>                                      "Enable AVX2 instructions",<br>                                      [FeatureAVX]>;<br>+def FeatureAVX512   : SubtargetFeature<"avx-512", "X86SSELevel", "AVX512",<br>+                                      "Enable AVX-512 instructions",<br>+                                      [FeatureAVX2]>;<br>+def FeatureERI      : SubtargetFeature<"avx-512-eri", "HasERI", "true",<br>+                      "Enable AVX-512 Exponential and Reciprocal Instructions">;<br>+def FeatureCDI      : SubtargetFeature<"avx-512-cdi", "HasCDI", "true",<br>+                      "Enable AVX-512 Conflict Detection Instructions">;<br>+def FeaturePFI      : SubtargetFeature<"avx-512-pfi", "HasPFI", "true",<br>+                      "Enable AVX-512 PreFetch Instructions">;<br>+<br>def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",<br>                         "Enable packed carry-less multiplication instructions",<br>                               [FeatureSSE2]>;<br>@@ -227,6 +237,15 @@ def : ProcessorModel<"core-avx2", Haswel<br>                      FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,<br>                      FeatureHLE]>;<br><br>+// KNL<br>+// FIXME: define KNL model<br>+def : ProcessorModel<"knl", HaswellModel,<br>+                     [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,<br>+                      FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT,<br>+                      FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,<br>+                      FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,<br>+                      FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE]>;<br>+<br>def : Proc<"k6",              [FeatureMMX]>;<br>def : Proc<"k6-2",            [Feature3DNow]>;<br>def : Proc<"k6-3",            [Feature3DNow]>;<br><br>Modified: llvm/trunk/lib/Target/X86/X86CallingConv.td<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86CallingConv.td?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86CallingConv.td (original)<br>+++ llvm/trunk/lib/Target/X86/X86CallingConv.td Wed Jul 24 06:02:47 2013<br>@@ -49,6 +49,12 @@ def RetCC_X86Common : CallingConv<[<br>  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],<br>            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,<br><br>+  // 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3<br>+  // can only be used by ABI non-compliant code. This vector type is only<br>+  // supported while using the AVX-512 target feature.<br>+  CCIfType<[v16i32, v8i64, v16f32, v8f64],<br>+            CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,<br>+<br>  // MMX vector types are always returned in MM0. If the target doesn't have<br>  // MM0, it doesn't support these vector types.<br>  CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,<br>@@ -99,6 +105,10 @@ def RetCC_Intel_OCL_BI : CallingConv<[<br>  CCIfType<[v8f32, v4f64, v8i32, v4i64],<br>            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,<br><br>+  // 512-bit FP vectors<br>+  CCIfType<[v16f32, v8f64, v16i32, v8i64],<br>+            CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,<br>+<br>  // i32, i64 in the standard way<br>  CCDelegateTo<RetCC_X86Common><br>]>;<br>@@ -213,10 +223,15 @@ def CC_X86_64_C : CallingConv<[<br>  // fixed arguments to vararg functions are supposed to be passed in<br>  // registers.  Actually modeling that would be a lot of work, though.<br>  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],<br>-                          CCIfSubtarget<"hasAVX()",<br>+                          CCIfSubtarget<"hasFp256()",<br>                          CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,<br>                                         YMM4, YMM5, YMM6, YMM7]>>>>,<br><br>+  // The first 8 512-bit vector arguments are passed in ZMM registers.<br>+  CCIfNotVarArg<CCIfType<[v16i32, v8i64, v16f32, v8f64],<br>+            CCIfSubtarget<"hasAVX512()",<br>+            CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,<br>+<br>  // Integer/FP values get stored in stack slots that are 8 bytes in size and<br>  // 8-byte aligned if there are no more registers to hold them.<br>  CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,<br>@@ -230,7 +245,11 @@ def CC_X86_64_C : CallingConv<[<br><br>  // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.<br>  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],<br>-           CCAssignToStack<32, 32>><br>+           CCAssignToStack<32, 32>>,<br>+<br>+  // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.<br>+  CCIfType<[v16i32, v8i64, v16f32, v8f64],<br>+           CCAssignToStack<64, 64>><br>]>;<br><br>// Calling convention used on Win64<br>@@ -251,6 +270,9 @@ def CC_X86_Win64_C : CallingConv<[<br>  // 256 bit vectors are passed by pointer<br>  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], CCPassIndirect<i64>>,<br><br>+  // 512 bit vectors are passed by pointer<br>+  CCIfType<[v16i32, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,<br>+<br>  // The first 4 MMX vector arguments are passed in GPRs.<br>  CCIfType<[x86mmx], CCBitConvertToType<i64>>,<br><br>@@ -345,7 +367,7 @@ def CC_X86_32_Common : CallingConv<[<br><br>  // The first 4 AVX 256-bit vector arguments are passed in YMM registers.<br>  CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],<br>-                CCIfSubtarget<"hasAVX()",<br>+                CCIfSubtarget<"hasFp256()",<br>                CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,<br><br>  // Other SSE vectors get 16-byte stack slots that are 16-byte aligned.<br>@@ -469,6 +491,10 @@ def CC_Intel_OCL_BI : CallingConv<[<br>  CCIfType<[v8f32, v4f64, v8i32, v4i64],<br>           CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>,<br><br>+  // The 512-bit vector arguments are passed in ZMM registers.<br>+  CCIfType<[v16f32, v8f64, v16i32, v8i64],<br>+           CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>,<br>+<br>  CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,<br>  CCIfSubtarget<"is64Bit()",       CCDelegateTo<CC_X86_64_C>>,<br>  CCDelegateTo<CC_X86_32_C><br>@@ -535,6 +561,10 @@ def CSR_Win64_Intel_OCL_BI_AVX : CalleeS<br>                                                  R13, R14, R15,<span class="Apple-converted-space"> </span><br>                                                  (sequence "YMM%u", 6, 15))>;<br><br>+def CSR_Win64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI,<br>+                                                     R12, R13, R14, R15,<span class="Apple-converted-space"> </span><br>+                                                     (sequence "ZMM%u", 6, 21),<br>+                                                     K4, K5, K6, K7)>;<br>//Standard C + XMM 8-15<br>def CSR_64_Intel_OCL_BI       : CalleeSavedRegs<(add CSR_64,<br>                                                 (sequence "XMM%u", 8, 15))>;<br>@@ -542,3 +572,7 @@ def CSR_64_Intel_OCL_BI       : CalleeSa<br>//Standard C + YMM 8-15<br>def CSR_64_Intel_OCL_BI_AVX    : CalleeSavedRegs<(add CSR_64,<br>                                                  (sequence "YMM%u", 8, 15))>;<br>+<br>+def CSR_64_Intel_OCL_BI_AVX512    : CalleeSavedRegs<(add CSR_64,<br>+                                                  (sequence "ZMM%u", 16, 31),<br>+                                                  K4, K5, K6, K7)>;<br><br>Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)<br>+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Jul 24 06:02:47 2013<br>@@ -18595,6 +18595,11 @@ X86TargetLowering::getRegForInlineAsmCon<br>      case MVT::v8f32:<br>      case MVT::v4f64:<br>        return std::make_pair(0U, &X86::VR256RegClass);<br>+      case MVT::v8f64:<br>+      case MVT::v16f32:<br>+      case MVT::v16i32:<br>+      case MVT::v8i64:<br>+        return std::make_pair(0U, &X86::VR512RegClass);<br>      }<br>      break;<br>    }<br>@@ -18705,7 +18710,13 @@ X86TargetLowering::getRegForInlineAsmCon<br>    }<br>  } else if (Res.second == &X86::FR32RegClass ||<br>             Res.second == &X86::FR64RegClass ||<br>-             Res.second == &X86::VR128RegClass) {<br>+             Res.second == &X86::VR128RegClass ||<br>+             Res.second == &X86::VR256RegClass ||<br>+             Res.second == &X86::FR32XRegClass ||<br>+             Res.second == &X86::FR64XRegClass ||<br>+             Res.second == &X86::VR128XRegClass ||<br>+             Res.second == &X86::VR256XRegClass ||<br>+             Res.second == &X86::VR512RegClass) {<br>    // Handle references to XMM physical registers that got mapped into the<br>    // wrong class.  This can happen with constraints like {xmm0} where the<br>    // target independent register mapper will just pick the first match it can<br>@@ -18719,6 +18730,8 @@ X86TargetLowering::getRegForInlineAsmCon<br>      Res.second = &X86::VR128RegClass;<br>    else if (X86::VR256RegClass.hasType(VT))<br>      Res.second = &X86::VR256RegClass;<br>+    else if (X86::VR512RegClass.hasType(VT))<br>+      Res.second = &X86::VR512RegClass;<br>  }<br><br>  return Res;<br><br>Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp (original)<br>+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.cpp Wed Jul 24 06:02:47 2013<br>@@ -241,6 +241,11 @@ X86RegisterInfo::getCalleeSavedRegs(cons<br><br>  case CallingConv::Intel_OCL_BI: {<br>    bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();<br>+    bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();<br>+    if (HasAVX512 && IsWin64)<br>+      return CSR_Win64_Intel_OCL_BI_AVX512_SaveList;<br>+    if (HasAVX512 && Is64Bit)<br>+      return CSR_64_Intel_OCL_BI_AVX512_SaveList;<br>    if (HasAVX && IsWin64)<br>      return CSR_Win64_Intel_OCL_BI_AVX_SaveList;<br>    if (HasAVX && Is64Bit)<br>@@ -275,8 +280,13 @@ X86RegisterInfo::getCalleeSavedRegs(cons<br>const uint32_t*<br>X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {<br>  bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();<br>+  bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512();<br><br>  if (CC == CallingConv::Intel_OCL_BI) {<br>+    if (IsWin64 && HasAVX512)<br>+      return CSR_Win64_Intel_OCL_BI_AVX512_RegMask;<br>+    if (Is64Bit && HasAVX512)<br>+      return CSR_64_Intel_OCL_BI_AVX512_RegMask;<br>    if (IsWin64 && HasAVX)<br>      return CSR_Win64_Intel_OCL_BI_AVX_RegMask;<br>    if (Is64Bit && HasAVX)<br>@@ -380,6 +390,12 @@ BitVector X86RegisterInfo::getReservedRe<br>        Reserved.set(*AI);<br>    }<br>  }<br>+  if (!Is64Bit || !TM.getSubtarget<X86Subtarget>().hasAVX512()) {<br>+    for (unsigned n = 16; n != 32; ++n) {<br>+      for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI)<br>+        Reserved.set(*AI);<br>+    }<br>+  }<br><br>  return Reserved;<br>}<br>@@ -690,4 +706,16 @@ unsigned getX86SubSuperRegister(unsigned<br>    }<br>  }<br>}<br>+<br>+unsigned get512BitSuperRegister(unsigned Reg) {<br>+  if (Reg >= X86::XMM0 && Reg <= X86::XMM31)<br>+    return X86::ZMM0 + (Reg - X86::XMM0);<br>+  if (Reg >= X86::YMM0 && Reg <= X86::YMM31)<br>+    return X86::ZMM0 + (Reg - X86::YMM0);<br>+  if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31)<br>+    return Reg;<br>+  llvm_unreachable("Unexpected SIMD register");<br>+  return 0;<br>+}<br>+<br>}<br><br>Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.h<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.h?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86RegisterInfo.h (original)<br>+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.h Wed Jul 24 06:02:47 2013<br>@@ -137,6 +137,9 @@ public:<br>// e.g. getX86SubSuperRegister(X86::EAX, MVT::i16) return X86:AX<br>unsigned getX86SubSuperRegister(unsigned, MVT::SimpleValueType, bool High=false);<br><br>+//get512BitRegister - X86 utility - returns 512-bit super register<br>+unsigned get512BitSuperRegister(unsigned Reg);<br>+<br>} // End llvm namespace<br><br>#endif<br><br>Modified: llvm/trunk/lib/Target/X86/X86RegisterInfo.td<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86RegisterInfo.td?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86RegisterInfo.td (original)<br>+++ llvm/trunk/lib/Target/X86/X86RegisterInfo.td Wed Jul 24 06:02:47 2013<br>@@ -26,6 +26,7 @@ let Namespace = "X86" in {<br>  def sub_16bit   : SubRegIndex<16>;<br>  def sub_32bit   : SubRegIndex<32>;<br>  def sub_xmm     : SubRegIndex<128>;<br>+  def sub_ymm     : SubRegIndex<256>;<br>}<br><br>//===----------------------------------------------------------------------===//<br>@@ -186,27 +187,52 @@ def XMM12: X86Reg<"xmm12", 12>, DwarfReg<br>def XMM13: X86Reg<"xmm13", 13>, DwarfRegNum<[30, -2, -2]>;<br>def XMM14: X86Reg<"xmm14", 14>, DwarfRegNum<[31, -2, -2]>;<br>def XMM15: X86Reg<"xmm15", 15>, DwarfRegNum<[32, -2, -2]>;<br>+<br>+def XMM16:  X86Reg<"xmm16", 16>, DwarfRegNum<[60, -2, -2]>;<br>+def XMM17:  X86Reg<"xmm17", 17>, DwarfRegNum<[61, -2, -2]>;<br>+def XMM18:  X86Reg<"xmm18", 18>, DwarfRegNum<[62, -2, -2]>;<br>+def XMM19:  X86Reg<"xmm19", 19>, DwarfRegNum<[63, -2, -2]>;<br>+def XMM20:  X86Reg<"xmm20", 20>, DwarfRegNum<[64, -2, -2]>;<br>+def XMM21:  X86Reg<"xmm21", 21>, DwarfRegNum<[65, -2, -2]>;<br>+def XMM22:  X86Reg<"xmm22", 22>, DwarfRegNum<[66, -2, -2]>;<br>+def XMM23:  X86Reg<"xmm23", 23>, DwarfRegNum<[67, -2, -2]>;<br>+def XMM24:  X86Reg<"xmm24", 24>, DwarfRegNum<[68, -2, -2]>;<br>+def XMM25:  X86Reg<"xmm25", 25>, DwarfRegNum<[69, -2, -2]>;<br>+def XMM26:  X86Reg<"xmm26", 26>, DwarfRegNum<[70, -2, -2]>;<br>+def XMM27:  X86Reg<"xmm27", 27>, DwarfRegNum<[71, -2, -2]>;<br>+def XMM28:  X86Reg<"xmm28", 28>, DwarfRegNum<[72, -2, -2]>;<br>+def XMM29:  X86Reg<"xmm29", 29>, DwarfRegNum<[73, -2, -2]>;<br>+def XMM30:  X86Reg<"xmm30", 30>, DwarfRegNum<[74, -2, -2]>;<br>+def XMM31:  X86Reg<"xmm31", 31>, DwarfRegNum<[75, -2, -2]>;<br>+<br>} // CostPerUse<br><br>-// YMM Registers, used by AVX instructions<br>+// YMM0-15 registers, used by AVX instructions and<br>+// YMM16-31 registers, used by AVX-512 instructions.<br>let SubRegIndices = [sub_xmm] in {<br>-def YMM0:  X86Reg<"ymm0",   0, [XMM0]>,  DwarfRegAlias<XMM0>;<br>-def YMM1:  X86Reg<"ymm1",   1, [XMM1]>,  DwarfRegAlias<XMM1>;<br>-def YMM2:  X86Reg<"ymm2",   2, [XMM2]>,  DwarfRegAlias<XMM2>;<br>-def YMM3:  X86Reg<"ymm3",   3, [XMM3]>,  DwarfRegAlias<XMM3>;<br>-def YMM4:  X86Reg<"ymm4",   4, [XMM4]>,  DwarfRegAlias<XMM4>;<br>-def YMM5:  X86Reg<"ymm5",   5, [XMM5]>,  DwarfRegAlias<XMM5>;<br>-def YMM6:  X86Reg<"ymm6",   6, [XMM6]>,  DwarfRegAlias<XMM6>;<br>-def YMM7:  X86Reg<"ymm7",   7, [XMM7]>,  DwarfRegAlias<XMM7>;<br>-def YMM8:  X86Reg<"ymm8",   8, [XMM8]>,  DwarfRegAlias<XMM8>;<br>-def YMM9:  X86Reg<"ymm9",   9, [XMM9]>,  DwarfRegAlias<XMM9>;<br>-def YMM10: X86Reg<"ymm10", 10, [XMM10]>, DwarfRegAlias<XMM10>;<br>-def YMM11: X86Reg<"ymm11", 11, [XMM11]>, DwarfRegAlias<XMM11>;<br>-def YMM12: X86Reg<"ymm12", 12, [XMM12]>, DwarfRegAlias<XMM12>;<br>-def YMM13: X86Reg<"ymm13", 13, [XMM13]>, DwarfRegAlias<XMM13>;<br>-def YMM14: X86Reg<"ymm14", 14, [XMM14]>, DwarfRegAlias<XMM14>;<br>-def YMM15: X86Reg<"ymm15", 15, [XMM15]>, DwarfRegAlias<XMM15>;<br>-}<br>+  foreach  Index = 0-31 in {<br>+    def YMM#Index : X86Reg<"ymm"#Index, Index, [!cast<X86Reg>("XMM"#Index)]>,<br>+                    DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;<br>+  }<br>+}<br>+<br>+// ZMM Registers, used by AVX-512 instructions.<br>+let SubRegIndices = [sub_ymm] in {<br>+  foreach  Index = 0-31 in {<br>+    def ZMM#Index : X86Reg<"zmm"#Index, Index, [!cast<X86Reg>("YMM"#Index)]>,<br>+                    DwarfRegAlias<!cast<X86Reg>("XMM"#Index)>;<br>+  }<br>+}<br>+<br>+  // Mask Registers, used by AVX-512 instructions.<br>+  def K0 : X86Reg<"k0", 0>, DwarfRegNum<[118, -2, -2]>;<br>+  def K1 : X86Reg<"k1", 1>, DwarfRegNum<[119, -2, -2]>;<br>+  def K2 : X86Reg<"k2", 2>, DwarfRegNum<[120, -2, -2]>;<br>+  def K3 : X86Reg<"k3", 3>, DwarfRegNum<[121, -2, -2]>;<br>+  def K4 : X86Reg<"k4", 4>, DwarfRegNum<[122, -2, -2]>;<br>+  def K5 : X86Reg<"k5", 5>, DwarfRegNum<[123, -2, -2]>;<br>+  def K6 : X86Reg<"k6", 6>, DwarfRegNum<[124, -2, -2]>;<br>+  def K7 : X86Reg<"k7", 7>, DwarfRegNum<[125, -2, -2]>;<br><br>class STRegister<string n, bits<16> Enc, list<Register> A> : X86Reg<n, Enc> {<br>  let Aliases = A;<br>@@ -421,3 +447,25 @@ def FPCCR : RegisterClass<"X86", [i16],<br>  let CopyCost = -1;  // Don't allow copying of status registers.<br>  let isAllocatable = 0;<br>}<br>+<br>+// AVX-512 vector/mask registers.<br>+def VR512 : RegisterClass<"X86", [v16f32, v8f64, v16i32, v8i64], 512,<br>+    (sequence "ZMM%u", 0, 31)>;<br>+<br>+// Scalar AVX-512 floating point registers.<br>+def FR32X : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 31)>;<br>+<br>+def FR64X : RegisterClass<"X86", [f64], 64, (add FR32X)>;<br>+<br>+// Extended VR128 and VR256 for AVX-512 instructions<br>+def VR128X : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],<br>+                          128, (add FR32X)>;<br>+def VR256X : RegisterClass<"X86", [v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],<br>+                          256, (sequence "YMM%u", 0, 31)>;<br>+<br>+def VK8     : RegisterClass<"X86", [v8i1],   8, (sequence "K%u", 0, 7)>;<br>+def VK16    : RegisterClass<"X86", [v16i1], 16, (add VK8)>;<br>+<br>+def VK8WM   : RegisterClass<"X86", [v8i1],   8, (sub VK8, K0)>;<br>+def VK16WM  : RegisterClass<"X86", [v16i1], 16, (add VK8WM)>;<br>+<br><br>Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)<br>+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Wed Jul 24 06:02:47 2013<br>@@ -42,7 +42,7 @@ enum Style {<br>class X86Subtarget : public X86GenSubtargetInfo {<br>protected:<br>  enum X86SSEEnum {<br>-    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2<br>+    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512<br>  };<br><br>  enum X863DNowEnum {<br>@@ -169,6 +169,15 @@ protected:<br>  ///             address generation (AG) time.<br>  bool LEAUsesAG;<br><br>+  /// Processor has AVX-512 PreFetch Instructions<br>+  bool HasPFI;<br>+  <br>+  /// Processor has AVX-512 Exponential and Reciprocal Instructions<br>+  bool HasERI;<br>+  <br>+  /// Processor has AVX-512 Conflict Detection Instructions<br>+  bool HasCDI;<br>+  <br>  /// stackAlignment - The minimum alignment known to hold of the stack frame on<br>  /// entry to the function and which must be maintained by every function.<br>  unsigned stackAlignment;<br>@@ -249,6 +258,7 @@ public:<br>  bool hasSSE42() const { return X86SSELevel >= SSE42; }<br>  bool hasAVX() const { return X86SSELevel >= AVX; }<br>  bool hasAVX2() const { return X86SSELevel >= AVX2; }<br>+  bool hasAVX512() const { return X86SSELevel >= AVX512; }<br>  bool hasFp256() const { return hasAVX(); }<br>  bool hasInt256() const { return hasAVX2(); }<br>  bool hasSSE4A() const { return HasSSE4A; }<br>@@ -282,6 +292,9 @@ public:<br>  bool padShortFunctions() const { return PadShortFunctions; }<br>  bool callRegIndirect() const { return CallRegIndirect; }<br>  bool LEAusesAG() const { return LEAUsesAG; }<br>+  bool hasCDI() const { return HasCDI; }<br>+  bool hasPFI() const { return HasPFI; }<br>+  bool hasERI() const { return HasERI; }<br><br>  bool isAtom() const { return X86ProcFamily == IntelAtom; }<br><br><br>Modified: llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp<br>URL:<span class="Apple-converted-space"> </span><a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp?rev=187030&r1=187029&r2=187030&view=diff">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp?rev=187030&r1=187029&r2=187030&view=diff</a><br>==============================================================================<br>--- llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp (original)<br>+++ llvm/trunk/lib/Target/X86/X86VZeroUpper.cpp Wed Jul 24 06:02:47 2013<br>@@ -105,23 +105,28 @@ FunctionPass *llvm::createX86IssueVZeroU<br>}<br><br>static bool isYmmReg(unsigned Reg) {<br>-  if (Reg >= X86::YMM0 && Reg <= X86::YMM15)<br>-    return true;<br>+  return (Reg >= X86::YMM0 && Reg <= X86::YMM31);<br>+}<br><br>-  return false;<br>+static bool isZmmReg(unsigned Reg) {<br>+  return (Reg >= X86::ZMM0 && Reg <= X86::ZMM31);<br>}<br><br>static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) {<br>  for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(),<br>       E = MRI.livein_end(); I != E; ++I)<br>-    if (isYmmReg(I->first))<br>+    if (isYmmReg(I->first) || isZmmReg(I->first))<br>      return true;<br><br>  return false;<br>}<br><br>static bool clobbersAllYmmRegs(const MachineOperand &MO) {<br>-  for (unsigned reg = X86::YMM0; reg < X86::YMM15; ++reg) {<br>+  for (unsigned reg = X86::YMM0; reg < X86::YMM31; ++reg) {<br>+    if (!MO.clobbersPhysReg(reg))<br>+      return false;<br>+  }<br>+  for (unsigned reg = X86::ZMM0; reg < X86::ZMM31; ++reg) {<br>    if (!MO.clobbersPhysReg(reg))<br>      return false;<br>  }<br><br><br>_______________________________________________<br>llvm-commits mailing list<br><a href="mailto:llvm-commits@cs.uiuc.edu">llvm-commits@cs.uiuc.edu</a><br><a href="http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits">http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits</a></div></blockquote></div><br></div></body></html>