[llvm-commits] [llvm] r61557 - in /llvm/trunk: lib/Target/X86/X86.td lib/Target/X86/X86InstrInfo.td lib/Target/X86/X86Subtarget.cpp lib/Target/X86/X86Subtarget.h test/CodeGen/X86/bt.ll

Evan Cheng evan.cheng at apple.com
Thu Jan 1 21:35:46 PST 2009


Author: evancheng
Date: Thu Jan  1 23:35:45 2009
New Revision: 61557

URL: http://llvm.org/viewvc/llvm-project?rev=61557&view=rev
Log:
Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register.

Modified:
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/lib/Target/X86/X86InstrInfo.td
    llvm/trunk/lib/Target/X86/X86Subtarget.cpp
    llvm/trunk/lib/Target/X86/X86Subtarget.h
    llvm/trunk/test/CodeGen/X86/bt.ll

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=61557&r1=61556&r2=61557&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Thu Jan  1 23:35:45 2009
@@ -48,6 +48,8 @@
 def Feature64Bit   : SubtargetFeature<"64bit", "HasX86_64", "true",
                                       "Support 64-bit instructions",
                                       [FeatureSSE2]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+                                       "Bit testing of memory is slow">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -66,27 +68,27 @@
 def : Proc<"pentiumpro",      []>;
 def : Proc<"pentium2",        [FeatureMMX]>;
 def : Proc<"pentium3",        [FeatureSSE1]>;
-def : Proc<"pentium-m",       [FeatureSSE2]>;
+def : Proc<"pentium-m",       [FeatureSSE2, FeatureSlowBTMem]>;
 def : Proc<"pentium4",        [FeatureSSE2]>;
-def : Proc<"x86-64",          [Feature64Bit]>;
-def : Proc<"yonah",           [FeatureSSE3]>;
-def : Proc<"prescott",        [FeatureSSE3]>;
-def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit]>;
-def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit]>;
-def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit]>;
+def : Proc<"x86-64",          [Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah",           [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott",        [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona",          [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;
 def : Proc<"k6-3",            [FeatureMMX,    Feature3DNow]>;
-def : Proc<"athlon",          [FeatureMMX,    Feature3DNowA]>;
-def : Proc<"athlon-tbird",    [FeatureMMX,    Feature3DNowA]>;
-def : Proc<"athlon-4",        [FeatureSSE1,   Feature3DNowA]>;
-def : Proc<"athlon-xp",       [FeatureSSE1,   Feature3DNowA]>;
-def : Proc<"athlon-mp",       [FeatureSSE1,   Feature3DNowA]>;
-def : Proc<"k8",              [Feature3DNowA, Feature64Bit]>;
-def : Proc<"opteron",         [Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon64",        [Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon-fx",       [Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon",          [FeatureMMX,    Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird",    [FeatureMMX,    Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4",        [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp",       [FeatureSSE1,   Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8",              [Feature3DNowA, Feature64Bit,  FeatureSlowBTMem]>;
+def : Proc<"opteron",         [Feature3DNowA, Feature64Bit,  FeatureSlowBTMem]>;
+def : Proc<"athlon64",        [Feature3DNowA, Feature64Bit,  FeatureSlowBTMem]>;
+def : Proc<"athlon-fx",       [Feature3DNowA, Feature64Bit,  FeatureSlowBTMem]>;
 
 def : Proc<"winchip-c6",      [FeatureMMX]>;
 def : Proc<"winchip2",        [FeatureMMX, Feature3DNow]>;

Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=61557&r1=61556&r2=61557&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Thu Jan  1 23:35:45 2009
@@ -222,6 +222,7 @@
 def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
 def IsStatic     : Predicate<"TM.getRelocationModel() == Reloc::Static">;
 def OptForSpeed  : Predicate<"!OptForSize">;
+def FastBTMem    : Predicate<"!Subtarget->isBTMemSlow()">;
 
 //===----------------------------------------------------------------------===//
 // X86 Instruction Format Definitions.
@@ -2666,11 +2667,11 @@
 def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
                "bt{w}\t{$src2, $src1|$src1, $src2}",
                [(X86bt (loadi16 addr:$src1), GR16:$src2),
-                (implicit EFLAGS)]>, OpSize, TB;
+                (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
 def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
                "bt{l}\t{$src2, $src1|$src1, $src2}",
                [(X86bt (loadi32 addr:$src1), GR32:$src2),
-                (implicit EFLAGS)]>, TB;
+                (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
 } // Defs = [EFLAGS]
 
 // Sign/Zero extenders

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=61557&r1=61556&r2=61557&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Thu Jan  1 23:35:45 2009
@@ -149,6 +149,18 @@
   return true;
 }
 
+static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
 void X86Subtarget::AutoDetectSubtargetFeatures() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   union {
@@ -169,8 +181,15 @@
   if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
   if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
 
-  if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
-      memcmp(text.c, "AuthenticAMD", 12) == 0) {
+  bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+  bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+  if (IsIntel || IsAMD) {
+    // Determine if bit test memory instructions are slow.
+    unsigned Family = 0;
+    unsigned Model  = 0;
+    DetectFamilyModel(EAX, Family, Model);
+    IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+
     X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
     HasX86_64 = (EDX >> 29) & 0x1;
   }
@@ -180,15 +199,9 @@
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
     return "generic";
-  unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
-  unsigned Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
-  if (Family == 6 || Family == 0xf) {
-    if (Family == 0xf)
-      // Examine extended family ID if family ID is F.
-      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
-    // Examine extended model ID if family ID is 6 or F.
-    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
-  }
+  unsigned Family = 0;
+  unsigned Model  = 0;
+  DetectFamilyModel(EAX, Family, Model);
 
   X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
   bool Em64T = (EDX >> 29) & 0x1;
@@ -285,6 +298,7 @@
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
   , HasX86_64(false)
+  , IsBTMemSlow(false)
   , DarwinVers(0)
   , IsLinux(false)
   , stackAlignment(8)

Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=61557&r1=61556&r2=61557&view=diff

==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Thu Jan  1 23:35:45 2009
@@ -64,6 +64,9 @@
   /// HasX86_64 - True if the processor supports X86-64 instructions.
   ///
   bool HasX86_64;
+
+  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+  bool IsBTMemSlow;
   
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -127,6 +130,8 @@
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
 
+  bool isBTMemSlow() const { return IsBTMemSlow; }
+
   unsigned getAsmFlavor() const {
     return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
   }

Modified: llvm/trunk/test/CodeGen/X86/bt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bt.ll?rev=61557&r1=61556&r2=61557&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/bt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bt.ll Thu Jan  1 23:35:45 2009
@@ -1,4 +1,6 @@
 ; RUN: llvm-as < %s | llc | grep btl
+; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | grep esp
+; RUN: llvm-as < %s | llc -mcpu=penryn   | grep btl | not grep esp
 ; PR3253
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"





More information about the llvm-commits mailing list