[llvm-commits] [llvm] r91910 - in /llvm/trunk: lib/Target/X86/X86.td lib/Target/X86/X86InstrInfo.cpp lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86InstrSSE.td lib/Target/X86/X86Subtarget.cpp lib/Target/X86/X86Subtarget.h test/CodeGen/X86/break-sse-dep.ll

Evan Cheng evan.cheng at apple.com
Tue Dec 22 09:47:23 PST 2009


Author: evancheng
Date: Tue Dec 22 11:47:23 2009
New Revision: 91910

URL: http://llvm.org/viewvc/llvm-project?rev=91910&view=rev
Log:
Remove target attribute break-sse-dep. Instead, do not fold load into sse partial update instructions unless optimizing for size.

Modified:
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.h
    llvm/trunk/test/CodeGen/X86/break-sse-dep.ll

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Tue Dec 22 11:47:23 2009
@@ -57,8 +57,6 @@
                                       "Support 64-bit instructions">;
 def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
                                        "Bit testing of memory is slow">;
-def FeatureBreakSSEDep : SubtargetFeature<"break-sse-dep", "BreakSSEDep","true",
-                       "Should break SSE partial update dep with load / xorps">;
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions">;
 
@@ -88,27 +86,17 @@
 def : Proc<"pentium3",        [FeatureSSE1]>;
 def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
 def : Proc<"pentium4",        [FeatureSSE2]>;
-def : Proc<"x86-64",          [FeatureSSE2,   Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
-def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem,
-                               FeatureBreakSSEDep]>;
+def : Proc<"x86-64",          [FeatureSSE2,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
 // Sandy Bridge does not have FMA
-def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit,
-                               FeatureBreakSSEDep]>;
+def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Dec 22 11:47:23 2009
@@ -2370,7 +2370,7 @@
   // Check switch flag 
   if (NoFusing) return NULL;
 
-  if (TM.getSubtarget<X86Subtarget>().shouldBreakSSEDep())
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
     switch (MI->getOpcode()) {
     case X86::CVTSD2SSrr:
     case X86::Int_CVTSD2SSrr:
@@ -2422,7 +2422,7 @@
   // Check switch flag 
   if (NoFusing) return NULL;
 
-  if (TM.getSubtarget<X86Subtarget>().shouldBreakSSEDep())
+  if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
     switch (MI->getOpcode()) {
     case X86::CVTSD2SSrr:
     case X86::Int_CVTSD2SSrr:

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Tue Dec 22 11:47:23 2009
@@ -298,11 +298,10 @@
 def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
                              "TM.getCodeModel() == CodeModel::Kernel">;
 def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def OptForSize   : Predicate<"OptForSize">;
 def OptForSpeed  : Predicate<"!OptForSize">;
 def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 def CallImmAddr  : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
-def SSEBreakDep  : Predicate<"Subtarget->shouldBreakSSEDep() && !OptForSize">;
-def NoSSEBreakDep: Predicate<"!Subtarget->shouldBreakSSEDep() || OptForSize">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Dec 22 11:47:23 2009
@@ -827,7 +827,7 @@
   def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
                 !strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
                 [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
-            Requires<[HasSSE1, NoSSEBreakDep]>;
+            Requires<[HasSSE1, OptForSize]>;
 
   // Vector operation, reg.
   def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1120,7 +1120,7 @@
 def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
                       [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
-                      Requires<[HasSSE2, NoSSEBreakDep]>;
+                  Requires<[HasSSE2, OptForSize]>;
 def CVTSI2SDrr  : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
                       "cvtsi2sd\t{$src, $dst|$dst, $src}",
                       [(set FR64:$dst, (sint_to_fp GR32:$src))]>;
@@ -1157,10 +1157,10 @@
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
                    [(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
-                 Requires<[HasSSE2, NoSSEBreakDep]>;
+                 Requires<[HasSSE2, OptForSize]>;
 
 def : Pat<(extloadf32 addr:$src),
-          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>;
+          (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
 
 // Match intrinsics which expect XMM operand(s).
 def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -3232,7 +3232,7 @@
                     [(set VR128:$dst,
                           (V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
                     TA, OpSize,
-                Requires<[HasSSE41, NoSSEBreakDep]>;
+                Requires<[HasSSE41]>;
 
   // Vector intrinsic operation, reg
   def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Tue Dec 22 11:47:23 2009
@@ -266,7 +266,6 @@
     unsigned Model  = 0;
     DetectFamilyModel(EAX, Family, Model);
     IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
-    BreakSSEDep = IsIntel;
 
     GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
     HasX86_64 = (EDX >> 29) & 0x1;
@@ -287,7 +286,6 @@
   , HasFMA3(false)
   , HasFMA4(false)
   , IsBTMemSlow(false)
-  , BreakSSEDep(false)
   , DarwinVers(0)
   , stackAlignment(8)
   // FIXME: this is a known good value for Yonah. How about others?

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Tue Dec 22 11:47:23 2009
@@ -78,14 +78,6 @@
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
-  /// BreakSSEDep - True if codegen should unfold load or insert xorps / pxor
-  /// to break register dependency for a partial register update SSE
-  /// instruction. This is needed for instructions such as CVTSS2SD which
-  /// only update the lower part of the register, and the result of the updated
-  /// part does not depend on the contents of the destination before the
-  /// instruction, and the non-updated portion of the register is not used.
-  bool BreakSSEDep;
-  
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
   unsigned char DarwinVers; // Is any darwin-x86 platform.
@@ -150,7 +142,6 @@
   bool hasFMA3() const { return HasFMA3; }
   bool hasFMA4() const { return HasFMA4; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
-  bool shouldBreakSSEDep() const { return BreakSSEDep; }
 
   bool isTargetDarwin() const { return TargetType == isDarwin; }
   bool isTargetELF() const { return TargetType == isELF; }

Modified: llvm/trunk/test/CodeGen/X86/break-sse-dep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/break-sse-dep.ll?rev=91910&r1=91909&r2=91910&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/break-sse-dep.ll (original)
+++ llvm/trunk/test/CodeGen/X86/break-sse-dep.ll Tue Dec 22 11:47:23 2009
@@ -1,27 +1,20 @@
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,+break-sse-dep | FileCheck %s --check-prefix=YES
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-break-sse-dep | FileCheck %s --check-prefix=NO
+; RUN: llc < %s -march=x86-64 -mattr=+sse2 | FileCheck %s
 
 define double @t1(float* nocapture %x) nounwind readonly ssp {
 entry:
-; YES: t1:
-; YES: movss (%rdi), %xmm0
-; YES; cvtss2sd %xmm0, %xmm0
+; CHECK: t1:
+; CHECK: movss (%rdi), %xmm0
+; CHECK; cvtss2sd %xmm0, %xmm0
 
-; NO: t1:
-; NO; cvtss2sd (%rdi), %xmm0
   %0 = load float* %x, align 4
   %1 = fpext float %0 to double
   ret double %1
 }
 
-define float @t2(double* nocapture %x) nounwind readonly ssp {
+define float @t2(double* nocapture %x) nounwind readonly ssp optsize {
 entry:
-; YES: t2:
-; YES: movsd (%rdi), %xmm0
-; YES; cvtsd2ss %xmm0, %xmm0
-
-; NO: t2:
-; NO; cvtsd2ss (%rdi), %xmm0
+; CHECK: t2:
+; CHECK; cvtsd2ss (%rdi), %xmm0
   %0 = load double* %x, align 8
   %1 = fptrunc double %0 to float
   ret float %1





More information about the llvm-commits mailing list