<div dir="ltr"><br><br><div class="gmail_quote"><div dir="ltr">On Fri, Oct 9, 2015 at 10:41 AM Chandler Carruth <<a href="mailto:chandlerc@gmail.com">chandlerc@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Thu, Oct 8, 2015 at 1:11 PM Eric Christopher via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: echristo<br>

Date: Thu Oct  8 15:10:06 2015<br>

New Revision: 249731<br>

<br>

URL: <a href="http://llvm.org/viewvc/llvm-project?rev=249731&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=249731&view=rev</a><br>

Log:<br>

Move the MMX subtarget feature out of the SSE set of features and into<br>

its own variable.<br>

<br>

This is needed so that we can explicitly turn off MMX without turning<br>

off SSE and also so that we can diagnose feature set incompatibilities<br>

that involve MMX without SSE.<br>

<br>

Rationale:<br>

<br>

// sse3<br>

__m128d test_mm_addsub_pd(__m128d A, __m128d B) {<br>

  return _mm_addsub_pd(A, B);<br>

}<br>

<br>

// mmx<br>

void shift(__m64 a, __m64 b, int c) {<br>

  _mm_slli_pi16(a, c);<br>

  _mm_slli_pi32(a, c);<br>

  _mm_slli_si64(a, c);<br>

  _mm_srli_pi16(a, c);<br>

  _mm_srli_pi32(a, c);<br>

  _mm_srli_si64(a, c);<br>

  _mm_srai_pi16(a, c);<br>

  _mm_srai_pi32(a, c);<br>

}<br>

<br>

clang -msse3 -mno-mmx file.c -c<br>

<br>

For this code we should be able to explicitly turn off MMX<br>

without affecting the compilation of the SSE3 function and then<br>

diagnose and error on compiling the MMX function.<br>

<br>

This matches the existing gcc behavior and follows the spirit of<br>

the SSE/MMX separation in llvm where we can (and do) turn off<br>

MMX code generation except in the presence of intrinsics.<br>

<br>

Updated a couple of tests, but primarily tested with a couple of tests<br>

for turning on only mmx and only sse.<br>

<br>

This is paired with a patch to clang to take advantage of this behavior.<br>

<br>

Added:<br>

    llvm/trunk/test/CodeGen/X86/mmx-only.ll<br>

    llvm/trunk/test/CodeGen/X86/sse-only.ll<br>

Modified:<br>

    llvm/trunk/lib/Target/X86/X86.td<br>

    llvm/trunk/lib/Target/X86/X86Subtarget.cpp<br>

    llvm/trunk/lib/Target/X86/X86Subtarget.h<br>

    llvm/trunk/test/CodeGen/X86/mmx-intrinsics.ll<br>

    llvm/trunk/test/CodeGen/X86/mult-alt-x86.ll<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86.td<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=249731&r1=249730&r2=249731&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=249731&r1=249730&r2=249731&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86.td (original)<br>

+++ llvm/trunk/lib/Target/X86/X86.td Thu Oct  8 15:10:06 2015<br>

@@ -37,14 +37,17 @@ def FeatureCMOV    : SubtargetFeature<"c<br>

 def FeaturePOPCNT   : SubtargetFeature<"popcnt", "HasPOPCNT", "true",<br>

                                        "Support POPCNT instruction">;<br>

<br>

-<br>

-def FeatureMMX     : SubtargetFeature<"mmx","X86SSELevel", "MMX",<br>

+// The MMX subtarget feature is separate from the rest of the SSE features<br>

+// because it's important (for odd compatibility reasons) to be able to<br>

+// turn it off explicitly while allowing SSE+ to be on.<br>

+def FeatureMMX     : SubtargetFeature<"mmx","HasMMX", "true",<br>

                                       "Enable MMX instructions">;<br>

+<br>

 def FeatureSSE1    : SubtargetFeature<"sse", "X86SSELevel", "SSE1",<br>

                                       "Enable SSE instructions",<br>

                                       // SSE codegen depends on cmovs, and all<br>

                                       // SSE1+ processors support them.<br>

-                                      [FeatureMMX, FeatureCMOV]>;<br>

+                                      [FeatureCMOV]>;<br>

 def FeatureSSE2    : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",<br>

                                       "Enable SSE2 instructions",<br>

                                       [FeatureSSE1]>;<br>

@@ -219,184 +222,241 @@ def : Proc<"pentium-mmx",     [FeatureSl<br>

 def : Proc<"i686",            [FeatureSlowUAMem16]>;<br>

 def : Proc<"pentiumpro",      [FeatureSlowUAMem16, FeatureCMOV]>;<br>

 def : Proc<"pentium2",        [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;<br>

-def : Proc<"pentium3",        [FeatureSlowUAMem16, FeatureSSE1]>;<br>

-def : Proc<"pentium3m",       [FeatureSlowUAMem16, FeatureSSE1,<br>

+def : Proc<"pentium3",        [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1]>;<br>

+def : Proc<"pentium3m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,<br>

                                FeatureSlowBTMem]>;<br>

-def : Proc<"pentium-m",       [FeatureSlowUAMem16, FeatureSSE2,<br>

+def : Proc<"pentium-m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,<br>

                                FeatureSlowBTMem]>;<br>

-def : Proc<"pentium4",        [FeatureSlowUAMem16, FeatureSSE2]>;<br>

-def : Proc<"pentium4m",       [FeatureSlowUAMem16, FeatureSSE2,<br>

+def : Proc<"pentium4",        [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2]>;<br>

+def : Proc<"pentium4m",       [FeatureSlowUAMem16, FeatureMMX, FeatureSSE2,<br>

                                FeatureSlowBTMem]>;<br>

<br>

 // Intel Core Duo.<br>

-def : ProcessorModel<"yonah", SandyBridgeModel,<br>

-                     [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;<br>

+def : ProcessorModel<<br>

+          "yonah", SandyBridgeModel,<br>

+          [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;<br>

<br>

 // NetBurst.<br>

-def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;<br>

-def : Proc<"nocona",   [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,<br>

-                        FeatureSlowBTMem]>;<br>

+def : Proc<"prescott",<br>

+           [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE3, FeatureSlowBTMem ]>;<br>

+def : Proc<"nocona", [<br>

+  FeatureSlowUAMem16,<br>

+  FeatureMMX,<br>

+  FeatureSSE3,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem<br>

+]>;<br></blockquote><div><br></div></div></div><div dir="ltr"><div class="gmail_quote"><div>Ow. You reformatted every list in the same commit. =/ I would have much preferred leaving them alone, or doing that separately. This diff is moderately unreadable now.</div></div></div><div dir="ltr"><div class="gmail_quote"><div> </div></div></div></blockquote><div><br></div><div>Urgh. Yes. Sorry. I really should have done it separately.</div><div><br></div><div>-eric</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">

<br>

 // Intel Core 2 Solo/Duo.<br>

-def : ProcessorModel<"core2", SandyBridgeModel,<br>

-                     [FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,<br>

-                      FeatureSlowBTMem]>;<br>

-def : ProcessorModel<"penryn", SandyBridgeModel,<br>

-                     [FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,<br>

-                      FeatureSlowBTMem]>;<br>

+def : ProcessorModel<"core2", SandyBridgeModel, [<br>

+  FeatureSlowUAMem16,<br>

+  FeatureMMX,<br>

+  FeatureSSSE3,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem<br>

+]>;<br>

+def : ProcessorModel<"penryn", SandyBridgeModel, [<br>

+  FeatureSlowUAMem16,<br>

+  FeatureMMX,<br>

+  FeatureSSE41,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem<br>

+]>;<br>

<br>

 // Atom CPUs.<br>

 class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [<br>

-                                   ProcIntelAtom,<br>

-                                   FeatureSlowUAMem16,<br>

-                                   FeatureSSSE3,<br>

-                                   FeatureCMPXCHG16B,<br>

-                                   FeatureMOVBE,<br>

-                                   FeatureSlowBTMem,<br>

-                                   FeatureLeaForSP,<br>

-                                   FeatureSlowDivide32,<br>

-                                   FeatureSlowDivide64,<br>

-                                   FeatureCallRegIndirect,<br>

-                                   FeatureLEAUsesAG,<br>

-                                   FeaturePadShortFunctions<br>

-                                 ]>;<br>

+  ProcIntelAtom,<br>

+  FeatureSlowUAMem16,<br>

+  FeatureMMX,<br>

+  FeatureSSSE3,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureMOVBE,<br>

+  FeatureSlowBTMem,<br>

+  FeatureLeaForSP,<br>

+  FeatureSlowDivide32,<br>

+  FeatureSlowDivide64,<br>

+  FeatureCallRegIndirect,<br>

+  FeatureLEAUsesAG,<br>

+  FeaturePadShortFunctions<br>

+]>;<br>

 def : BonnellProc<"bonnell">;<br>

 def : BonnellProc<"atom">; // Pin the generic name to the baseline.<br>

<br>

 class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [<br>

-                                      ProcIntelSLM,<br>

-                                      FeatureSSE42,<br>

-                                      FeatureCMPXCHG16B,<br>

-                                      FeatureMOVBE,<br>

-                                      FeaturePOPCNT,<br>

-                                      FeaturePCLMUL,<br>

-                                      FeatureAES,<br>

-                                      FeatureSlowDivide64,<br>

-                                      FeatureCallRegIndirect,<br>

-                                      FeaturePRFCHW,<br>

-                                      FeatureSlowLEA,<br>

-                                      FeatureSlowIncDec,<br>

-                                      FeatureSlowBTMem<br>

-                                    ]>;<br>

+  ProcIntelSLM,<br>

+  FeatureMMX,<br>

+  FeatureSSE42,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureMOVBE,<br>

+  FeaturePOPCNT,<br>

+  FeaturePCLMUL,<br>

+  FeatureAES,<br>

+  FeatureSlowDivide64,<br>

+  FeatureCallRegIndirect,<br>

+  FeaturePRFCHW,<br>

+  FeatureSlowLEA,<br>

+  FeatureSlowIncDec,<br>

+  FeatureSlowBTMem<br>

+]>;<br>

 def : SilvermontProc<"silvermont">;<br>

 def : SilvermontProc<"slm">; // Legacy alias.<br>

<br>

 // "Arrandale" along with corei3 and corei5<br>

 class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [<br>

-                                   FeatureSSE42,<br>

-                                   FeatureCMPXCHG16B,<br>

-                                   FeatureSlowBTMem,<br>

-                                   FeaturePOPCNT<br>

-                                 ]>;<br>

+  FeatureMMX,<br>

+  FeatureSSE42,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeaturePOPCNT<br>

+]>;<br>

 def : NehalemProc<"nehalem">;<br>

 def : NehalemProc<"corei7">;<br>

<br>

 // Westmere is a similar machine to nehalem with some additional features.<br>

 // Westmere is the corei3/i5/i7 path from nehalem to sandybridge<br>

 class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [<br>

-                                    FeatureSSE42,<br>

-                                    FeatureCMPXCHG16B,<br>

-                                    FeatureSlowBTMem,<br>

-                                    FeaturePOPCNT,<br>

-                                    FeatureAES,<br>

-                                    FeaturePCLMUL<br>

-                                  ]>;<br>

+  FeatureMMX,<br>

+  FeatureSSE42,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL<br>

+]>;<br>

 def : WestmereProc<"westmere">;<br>

<br>

 // SSE is not listed here since llvm treats AVX as a reimplementation of SSE,<br>

 // rather than a superset.<br>

 class SandyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [<br>

-                                       FeatureAVX,<br>

-                                       FeatureCMPXCHG16B,<br>

-                                       FeatureSlowBTMem,<br>

-                                       FeatureSlowUAMem32,<br>

-                                       FeaturePOPCNT,<br>

-                                       FeatureAES,<br>

-                                       FeaturePCLMUL<br>

-                                     ]>;<br>

+  FeatureMMX,<br>

+  FeatureAVX,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeatureSlowUAMem32,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL<br>

+]>;<br>

 def : SandyBridgeProc<"sandybridge">;<br>

 def : SandyBridgeProc<"corei7-avx">; // Legacy alias.<br>

<br>

 class IvyBridgeProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [<br>

-                                     FeatureAVX,<br>

-                                     FeatureCMPXCHG16B,<br>

-                                     FeatureSlowBTMem,<br>

-                                     FeatureSlowUAMem32,<br>

-                                     FeaturePOPCNT,<br>

-                                     FeatureAES,<br>

-                                     FeaturePCLMUL,<br>

-                                     FeatureRDRAND,<br>

-                                     FeatureF16C,<br>

-                                     FeatureFSGSBase<br>

-                                   ]>;<br>

+  FeatureMMX,<br>

+  FeatureAVX,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeatureSlowUAMem32,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL,<br>

+  FeatureRDRAND,<br>

+  FeatureF16C,<br>

+  FeatureFSGSBase<br>

+]>;<br>

 def : IvyBridgeProc<"ivybridge">;<br>

 def : IvyBridgeProc<"core-avx-i">; // Legacy alias.<br>

<br>

 class HaswellProc<string Name> : ProcessorModel<Name, HaswellModel, [<br>

-                                   FeatureAVX2,<br>

-                                   FeatureCMPXCHG16B,<br>

-                                   FeatureSlowBTMem,<br>

-                                   FeaturePOPCNT,<br>

-                                   FeatureAES,<br>

-                                   FeaturePCLMUL,<br>

-                                   FeatureRDRAND,<br>

-                                   FeatureF16C,<br>

-                                   FeatureFSGSBase,<br>

-                                   FeatureMOVBE,<br>

-                                   FeatureLZCNT,<br>

-                                   FeatureBMI,<br>

-                                   FeatureBMI2,<br>

-                                   FeatureFMA,<br>

-                                   FeatureRTM,<br>

-                                   FeatureHLE,<br>

-                                   FeatureSlowIncDec<br>

-                                 ]>;<br>

+  FeatureMMX,<br>

+  FeatureAVX2,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL,<br>

+  FeatureRDRAND,<br>

+  FeatureF16C,<br>

+  FeatureFSGSBase,<br>

+  FeatureMOVBE,<br>

+  FeatureLZCNT,<br>

+  FeatureBMI,<br>

+  FeatureBMI2,<br>

+  FeatureFMA,<br>

+  FeatureRTM,<br>

+  FeatureHLE,<br>

+  FeatureSlowIncDec<br>

+]>;<br>

 def : HaswellProc<"haswell">;<br>

 def : HaswellProc<"core-avx2">; // Legacy alias.<br>

<br>

 class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [<br>

-                                     FeatureAVX2,<br>

-                                     FeatureCMPXCHG16B,<br>

-                                     FeatureSlowBTMem,<br>

-                                     FeaturePOPCNT,<br>

-                                     FeatureAES,<br>

-                                     FeaturePCLMUL,<br>

-                                     FeatureRDRAND,<br>

-                                     FeatureF16C,<br>

-                                     FeatureFSGSBase,<br>

-                                     FeatureMOVBE,<br>

-                                     FeatureLZCNT,<br>

-                                     FeatureBMI,<br>

-                                     FeatureBMI2,<br>

-                                     FeatureFMA,<br>

-                                     FeatureRTM,<br>

-                                     FeatureHLE,<br>

-                                     FeatureADX,<br>

-                                     FeatureRDSEED,<br>

-                                     FeatureSlowIncDec<br>

-                                   ]>;<br>

+  FeatureMMX,<br>

+  FeatureAVX2,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL,<br>

+  FeatureRDRAND,<br>

+  FeatureF16C,<br>

+  FeatureFSGSBase,<br>

+  FeatureMOVBE,<br>

+  FeatureLZCNT,<br>

+  FeatureBMI,<br>

+  FeatureBMI2,<br>

+  FeatureFMA,<br>

+  FeatureRTM,<br>

+  FeatureHLE,<br>

+  FeatureADX,<br>

+  FeatureRDSEED,<br>

+  FeatureSlowIncDec<br>

+]>;<br>

 def : BroadwellProc<"broadwell">;<br>

<br>

 // FIXME: define KNL model<br>

-class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel,<br>

-                     [FeatureAVX512, FeatureERI, FeatureCDI, FeaturePFI,<br>

-                      FeatureCMPXCHG16B, FeaturePOPCNT,<br>

-                      FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C,<br>

-                      FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI,<br>

-                      FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE,<br>

-                      FeatureSlowIncDec, FeatureMPX]>;<br>

+class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [<br>

+  FeatureMMX,<br>

+  FeatureAVX512,<br>

+  FeatureERI,<br>

+  FeatureCDI,<br>

+  FeaturePFI,<br>

+  FeatureCMPXCHG16B,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL,<br>

+  FeatureRDRAND,<br>

+  FeatureF16C,<br>

+  FeatureFSGSBase,<br>

+  FeatureMOVBE,<br>

+  FeatureLZCNT,<br>

+  FeatureBMI,<br>

+  FeatureBMI2,<br>

+  FeatureFMA,<br>

+  FeatureRTM,<br>

+  FeatureHLE,<br>

+  FeatureSlowIncDec,<br>

+  FeatureMPX<br>

+]>;<br>

 def : KnightsLandingProc<"knl">;<br>

<br>

 // FIXME: define SKX model<br>

-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel,<br>

-                     [FeatureAVX512, FeatureCDI,<br>

-                      FeatureDQI, FeatureBWI, FeatureVLX,<br>

-                      FeatureCMPXCHG16B, FeatureSlowBTMem,<br>

-                      FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND,<br>

-                      FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT,<br>

-                      FeatureBMI, FeatureBMI2, FeatureFMA, FeatureRTM,<br>

-                      FeatureHLE, FeatureADX, FeatureRDSEED, FeatureSlowIncDec,<br>

-                      FeatureMPX]>;<br>

+class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [<br>

+  FeatureMMX,<br>

+  FeatureAVX512,<br>

+  FeatureCDI,<br>

+  FeatureDQI,<br>

+  FeatureBWI,<br>

+  FeatureVLX,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureSlowBTMem,<br>

+  FeaturePOPCNT,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL,<br>

+  FeatureRDRAND,<br>

+  FeatureF16C,<br>

+  FeatureFSGSBase,<br>

+  FeatureMOVBE,<br>

+  FeatureLZCNT,<br>

+  FeatureBMI,<br>

+  FeatureBMI2,<br>

+  FeatureFMA,<br>

+  FeatureRTM,<br>

+  FeatureHLE,<br>

+  FeatureADX,<br>

+  FeatureRDSEED,<br>

+  FeatureSlowIncDec,<br>

+  FeatureMPX<br>

+]>;<br>

 def : SkylakeProc<"skylake">;<br>

 def : SkylakeProc<"skx">; // Legacy alias.<br>

<br>

@@ -447,52 +507,117 @@ def : Proc<"barcelona",       [FeatureSS<br>

                                FeatureSlowSHLD]>;<br>

<br>

 // Bobcat<br>

-def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,<br>

-                               FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,<br>

-                               FeatureSlowSHLD]>;<br>

+def : Proc<"btver1", [<br>

+  FeatureMMX,<br>

+  FeatureSSSE3,<br>

+  FeatureSSE4A,<br>

+  FeatureCMPXCHG16B,<br>

+  FeaturePRFCHW,<br>

+  FeatureLZCNT,<br>

+  FeaturePOPCNT,<br>

+  FeatureSlowSHLD<br>

+]>;<br>

<br>

 // Jaguar<br>

-def : ProcessorModel<"btver2", BtVer2Model,<br>

-                     [FeatureAVX, FeatureSSE4A, FeatureCMPXCHG16B,<br>

-                      FeaturePRFCHW, FeatureAES, FeaturePCLMUL,<br>

-                      FeatureBMI, FeatureF16C, FeatureMOVBE,<br>

-                      FeatureLZCNT, FeaturePOPCNT,<br>

-                      FeatureSlowSHLD]>;<br>

+def : ProcessorModel<"btver2", BtVer2Model, [<br>

+  FeatureMMX,<br>

+  FeatureAVX,<br>

+  FeatureSSE4A,<br>

+  FeatureCMPXCHG16B,<br>

+  FeaturePRFCHW,<br>

+  FeatureAES,<br>

+  FeaturePCLMUL,<br>

+  FeatureBMI,<br>

+  FeatureF16C,<br>

+  FeatureMOVBE,<br>

+  FeatureLZCNT,<br>

+  FeaturePOPCNT,<br>

+  FeatureSlowSHLD<br>

+]>;<br>

<br>

 // Bulldozer<br>

-def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,<br>

-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,<br>

-                               FeatureAVX, FeatureSSE4A, FeatureLZCNT,<br>

-                               FeaturePOPCNT, FeatureSlowSHLD]>;<br>

+def : Proc<"bdver1", [<br>

+  FeatureXOP,<br>

+  FeatureFMA4,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureAES,<br>

+  FeaturePRFCHW,<br>

+  FeaturePCLMUL,<br>

+  FeatureMMX,<br>

+  FeatureAVX,<br>

+  FeatureSSE4A,<br>

+  FeatureLZCNT,<br>

+  FeaturePOPCNT,<br>

+  FeatureSlowSHLD<br>

+]>;<br>

 // Piledriver<br>

-def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,<br>

-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,<br>

-                               FeatureAVX, FeatureSSE4A, FeatureF16C,<br>

-                               FeatureLZCNT, FeaturePOPCNT, FeatureBMI,<br>

-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;<br>

+def : Proc<"bdver2", [<br>

+  FeatureXOP,<br>

+  FeatureFMA4,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureAES,<br>

+  FeaturePRFCHW,<br>

+  FeaturePCLMUL,<br>

+  FeatureMMX,<br>

+  FeatureAVX,<br>

+  FeatureSSE4A,<br>

+  FeatureF16C,<br>

+  FeatureLZCNT,<br>

+  FeaturePOPCNT,<br>

+  FeatureBMI,<br>

+  FeatureTBM,<br>

+  FeatureFMA,<br>

+  FeatureSlowSHLD<br>

+]>;<br>

<br>

 // Steamroller<br>

-def : Proc<"bdver3",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,<br>

-                               FeatureAES, FeaturePRFCHW, FeaturePCLMUL,<br>

-                               FeatureAVX, FeatureSSE4A, FeatureF16C,<br>

-                               FeatureLZCNT, FeaturePOPCNT, FeatureBMI,<br>

-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD,<br>

-                               FeatureFSGSBase]>;<br>

+def : Proc<"bdver3", [<br>

+  FeatureXOP,<br>

+  FeatureFMA4,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureAES,<br>

+  FeaturePRFCHW,<br>

+  FeaturePCLMUL,<br>

+  FeatureMMX,<br>

+  FeatureAVX,<br>

+  FeatureSSE4A,<br>

+  FeatureF16C,<br>

+  FeatureLZCNT,<br>

+  FeaturePOPCNT,<br>

+  FeatureBMI,<br>

+  FeatureTBM,<br>

+  FeatureFMA,<br>

+  FeatureSlowSHLD,<br>

+  FeatureFSGSBase<br>

+]>;<br>

<br>

 // Excavator<br>

-def : Proc<"bdver4",          [FeatureAVX2, FeatureXOP, FeatureFMA4,<br>

-                               FeatureCMPXCHG16B, FeatureAES, FeaturePRFCHW,<br>

-                               FeaturePCLMUL, FeatureF16C, FeatureLZCNT,<br>

-                               FeaturePOPCNT, FeatureBMI, FeatureBMI2,<br>

-                               FeatureTBM, FeatureFMA, FeatureSSE4A,<br>

-                               FeatureFSGSBase]>;<br>

+def : Proc<"bdver4", [<br>

+  FeatureMMX,<br>

+  FeatureAVX2,<br>

+  FeatureXOP,<br>

+  FeatureFMA4,<br>

+  FeatureCMPXCHG16B,<br>

+  FeatureAES,<br>

+  FeaturePRFCHW,<br>

+  FeaturePCLMUL,<br>

+  FeatureF16C,<br>

+  FeatureLZCNT,<br>

+  FeaturePOPCNT,<br>

+  FeatureBMI,<br>

+  FeatureBMI2,<br>

+  FeatureTBM,<br>

+  FeatureFMA,<br>

+  FeatureSSE4A,<br>

+  FeatureFSGSBase<br>

+]>;<br>

<br>

 def : Proc<"geode",           [FeatureSlowUAMem16, Feature3DNowA]>;<br>

<br>

 def : Proc<"winchip-c6",      [FeatureSlowUAMem16, FeatureMMX]>;<br>

 def : Proc<"winchip2",        [FeatureSlowUAMem16, Feature3DNow]>;<br>

 def : Proc<"c3",              [FeatureSlowUAMem16, Feature3DNow]>;<br>

-def : Proc<"c3-2",            [FeatureSlowUAMem16, FeatureSSE1]>;<br>

+def : Proc<"c3-2", [ FeatureSlowUAMem16, FeatureMMX, FeatureSSE1 ]>;<br>

<br>

 // We also provide a generic 64-bit specific x86 processor model which tries to<br>

 // be good for modern chips without enabling instruction set encodings past the<br>

@@ -504,8 +629,9 @@ def : Proc<"c3-2",            [FeatureSl<br>

 // covers a huge swath of x86 processors. If there are specific scheduling<br>

 // knobs which need to be tuned differently for AMD chips, we might consider<br>

 // forming a common base for them.<br>

-def : ProcessorModel<"x86-64", SandyBridgeModel,<br>

-                     [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;<br>

+def : ProcessorModel<<br>

+          "x86-64", SandyBridgeModel,<br>

+          [ FeatureMMX, FeatureSSE2, Feature64Bit, FeatureSlowBTMem ]>;<br>

<br>

 //===----------------------------------------------------------------------===//<br>

 // Register File Description<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=249731&r1=249730&r2=249731&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=249731&r1=249730&r2=249731&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)<br>

+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Thu Oct  8 15:10:06 2015<br>

@@ -228,9 +228,10 @@ void X86Subtarget::initSubtargetFeatures<br>

 }<br>

<br>

 void X86Subtarget::initializeEnvironment() {<br>

-  X86SSELevel = NoMMXSSE;<br>

+  X86SSELevel = NoSSE;<br>

   X863DNowLevel = NoThreeDNow;<br>

   HasCMov = false;<br>

+  HasMMX = false;<br>

   HasX86_64 = false;<br>

   HasPOPCNT = false;<br>

   HasSSE4A = false;<br>

<br>

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=249731&r1=249730&r2=249731&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=249731&r1=249730&r2=249731&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)<br>

+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Thu Oct  8 15:10:06 2015<br>

@@ -47,7 +47,7 @@ class X86Subtarget final : public X86Gen<br>

<br>

 protected:<br>

   enum X86SSEEnum {<br>

-    NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F<br>

+    NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F<br>

   };<br>

<br>

   enum X863DNowEnum {<br>

@@ -64,7 +64,7 @@ protected:<br>

   /// Which PIC style to use<br>

   PICStyles::Style PICStyle;<br>

<br>

-  /// MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.<br>

+  /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported.<br>

   X86SSEEnum X86SSELevel;<br>

<br>

   /// 3DNow, 3DNow Athlon, or none supported.<br>

@@ -74,6 +74,9 @@ protected:<br>

   /// (generally pentium pro+).<br>

   bool HasCMov;<br>

<br>

+  /// True if this processor supports MMX instructions.<br>

+  bool HasMMX;<br>

+<br>

   /// True if the processor supports X86-64 instructions.<br>

   bool HasX86_64;<br>

<br>

@@ -319,7 +322,7 @@ public:<br>

   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }<br>

<br>

   bool hasCMov() const { return HasCMov; }<br>

-  bool hasMMX() const { return X86SSELevel >= MMX; }<br>

+  bool hasMMX() const { return HasMMX; }<br>

   bool hasSSE1() const { return X86SSELevel >= SSE1; }<br>

   bool hasSSE2() const { return X86SSELevel >= SSE2; }<br>

   bool hasSSE3() const { return X86SSELevel >= SSE3; }<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/mmx-intrinsics.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-intrinsics.ll?rev=249731&r1=249730&r2=249731&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-intrinsics.ll?rev=249731&r1=249730&r2=249731&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/mmx-intrinsics.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/mmx-intrinsics.ll Thu Oct  8 15:10:06 2015<br>

@@ -1,7 +1,7 @@<br>

 ; RUN: llc < %s -march=x86 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X86<br>

-; RUN: llc < %s -march=x86 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86<br>

+; RUN: llc < %s -march=x86 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X86<br>

 ; RUN: llc < %s -march=x86-64 -mattr=+mmx,+ssse3,-avx | FileCheck %s --check-prefix=ALL --check-prefix=X64<br>

-; RUN: llc < %s -march=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64<br>

+; RUN: llc < %s -march=x86-64 -mattr=+mmx,+avx | FileCheck %s --check-prefix=ALL --check-prefix=X64<br>

<br>

 declare x86_mmx @llvm.x86.ssse3.phadd.w(x86_mmx, x86_mmx) nounwind readnone<br>

<br>

<br>

Added: llvm/trunk/test/CodeGen/X86/mmx-only.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-only.ll?rev=249731&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mmx-only.ll?rev=249731&view=auto</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/mmx-only.ll (added)<br>

+++ llvm/trunk/test/CodeGen/X86/mmx-only.ll Thu Oct  8 15:10:06 2015<br>

@@ -0,0 +1,21 @@<br>

+; RUN: llc < %s -march=x86 -mattr=+mmx | FileCheck %s<br>

+; RUN: llc < %s -march=x86 -mattr=+mmx,-sse | FileCheck %s<br>

+<br>

+; Test that turning off sse doesn't turn off mmx.<br>

+<br>

+declare x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx, x86_mmx) nounwind readnone<br>

+<br>

+define i64 @test88(<1 x i64> %a, <1 x i64> %b) nounwind readnone {<br>

+; CHECK-LABEL: @test88<br>

+; CHECK: pcmpgtd<br>

+entry:<br>

+  %0 = bitcast <1 x i64> %b to <2 x i32><br>

+  %1 = bitcast <1 x i64> %a to <2 x i32><br>

+  %mmx_var.i = bitcast <2 x i32> %1 to x86_mmx<br>

+  %mmx_var1.i = bitcast <2 x i32> %0 to x86_mmx<br>

+  %2 = tail call x86_mmx @llvm.x86.mmx.pcmpgt.d(x86_mmx %mmx_var.i, x86_mmx %mmx_var1.i) nounwind<br>

+  %3 = bitcast x86_mmx %2 to <2 x i32><br>

+  %4 = bitcast <2 x i32> %3 to <1 x i64><br>

+  %5 = extractelement <1 x i64> %4, i32 0<br>

+  ret i64 %5<br>

+}<br>

<br>

Modified: llvm/trunk/test/CodeGen/X86/mult-alt-x86.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mult-alt-x86.ll?rev=249731&r1=249730&r2=249731&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mult-alt-x86.ll?rev=249731&r1=249730&r2=249731&view=diff</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/mult-alt-x86.ll (original)<br>

+++ llvm/trunk/test/CodeGen/X86/mult-alt-x86.ll Thu Oct  8 15:10:06 2015<br>

@@ -1,4 +1,4 @@<br>

-; RUN: llc < %s -march=x86 -mattr=+sse2 -no-integrated-as<br>

+; RUN: llc < %s -march=x86 -mattr=+mmx,+sse2 -no-integrated-as<br>

 ; ModuleID = 'mult-alt-x86.c'<br>

 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"<br>

 target triple = "i686-pc-win32"<br>

<br>

Added: llvm/trunk/test/CodeGen/X86/sse-only.ll<br>

URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-only.ll?rev=249731&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-only.ll?rev=249731&view=auto</a><br>

==============================================================================<br>

--- llvm/trunk/test/CodeGen/X86/sse-only.ll (added)<br>

+++ llvm/trunk/test/CodeGen/X86/sse-only.ll Thu Oct  8 15:10:06 2015<br>

@@ -0,0 +1,19 @@<br>

+; RUN: llc < %s -march=x86 -mattr=+sse2,-mmx | FileCheck %s<br>

+<br>

+; Test that turning off mmx doesn't turn off sse<br>

+<br>

+define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {<br>

+; CHECK-LABEL: test1:<br>

+; CHECK:       ## BB#0:<br>

+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax<br>

+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx<br>

+; CHECK-NEXT:    movapd (%ecx), %xmm0<br>

+; CHECK-NEXT:    movlpd {{[0-9]+}}(%esp), %xmm0<br>

+; CHECK-NEXT:    movapd %xmm0, (%eax)<br>

+; CHECK-NEXT:    retl<br>

+       %tmp3 = load <2 x double>, <2 x double>* %A, align 16<br>

+       %tmp7 = insertelement <2 x double> undef, double %B, i32 0<br>

+       %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 ><br>

+       store <2 x double> %tmp9, <2 x double>* %r, align 16<br>

+       ret void<br>

+}<br>

<br>

<br>

_______________________________________________<br>

llvm-commits mailing list<br>

<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>

<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>

</blockquote></div></div></blockquote></div></div>