[llvm] r245733 - remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 21 13:39:17 PDT 2015


Author: spatel
Date: Fri Aug 21 15:39:17 2015
New Revision: 245733

URL: http://llvm.org/viewvc/llvm-project?rev=245733&view=rev
Log:
remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later

See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software
Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data.

Modified:
    llvm/trunk/lib/Target/X86/X86.td
    llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll

Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=245733&r1=245732&r2=245733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Fri Aug 21 15:39:17 2015
@@ -433,21 +433,19 @@ def : Proc<"opteron-sse3",    [FeatureSl
 def : Proc<"athlon64-sse3",   [FeatureSlowUAMem, FeatureSSE3,   Feature3DNowA,
                                FeatureCMPXCHG16B, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
-def : Proc<"amdfam10",        [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"amdfam10",        [FeatureSSE4A,
                                Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
                                FeaturePOPCNT, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
-def : Proc<"barcelona",       [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"barcelona",       [FeatureSSE4A,
                                Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
                                FeaturePOPCNT, FeatureSlowBTMem,
                                FeatureSlowSHLD]>;
 
-// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here.
-
 // Bobcat
 def : Proc<"btver1",          [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
                                FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
-                               FeatureSlowSHLD, FeatureSlowUAMem]>;
+                               FeatureSlowSHLD]>;
 
 // Jaguar
 def : ProcessorModel<"btver2", BtVer2Model,
@@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Mod
 def : Proc<"bdver1",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
                                FeatureAVX, FeatureSSE4A, FeatureLZCNT,
-                               FeaturePOPCNT, FeatureSlowSHLD,
-                               FeatureSlowUAMem]>;
+                               FeaturePOPCNT, FeatureSlowSHLD]>;
 // Piledriver
 def : Proc<"bdver2",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
                                FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
                                FeatureAVX, FeatureSSE4A, FeatureF16C,
                                FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
-                               FeatureTBM, FeatureFMA, FeatureSlowSHLD,
-                               FeatureSlowUAMem]>;
+                               FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
 
 // Steamroller
 def : Proc<"bdver3",          [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
@@ -477,7 +473,7 @@ def : Proc<"bdver3",          [FeatureXO
                                FeatureAVX, FeatureSSE4A, FeatureF16C,
                                FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
                                FeatureTBM, FeatureFMA, FeatureSlowSHLD,
-                               FeatureFSGSBase, FeatureSlowUAMem]>;
+                               FeatureFSGSBase]>;
 
 // Excavator
 def : Proc<"bdver4",          [FeatureAVX2, FeatureXOP, FeatureFMA4,
@@ -485,7 +481,7 @@ def : Proc<"bdver4",          [FeatureAV
                                FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
                                FeaturePOPCNT, FeatureBMI, FeatureBMI2,
                                FeatureTBM, FeatureFMA, FeatureSSE4A,
-                               FeatureFSGSBase, FeatureSlowUAMem]>;
+                               FeatureFSGSBase]>;
 
 def : Proc<"geode",           [FeatureSlowUAMem, Feature3DNowA]>;
 

Modified: llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll?rev=245733&r1=245732&r2=245733&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll Fri Aug 21 15:39:17 2015
@@ -39,14 +39,14 @@
 ; AMD chips with fast unaligned memory accesses
 ; FIXME: These are wrong except for btver2.
 
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10      2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona     2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1        2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10      2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona     2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2        2>&1 | FileCheck %s --check-prefix=FAST
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1        2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3        2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4        2>&1 | FileCheck %s --check-prefix=FAST
 
 ; Other chips with slow unaligned memory accesses
 




More information about the llvm-commits mailing list