[llvm] r245733 - remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 21 13:39:17 PDT 2015
Author: spatel
Date: Fri Aug 21 15:39:17 2015
New Revision: 245733
URL: http://llvm.org/viewvc/llvm-project?rev=245733&view=rev
Log:
remove 'FeatureSlowUAMem' from AMD CPUs based on 10H micro-arch or later
See discussion in D12154 ( http://reviews.llvm.org/D12154 ), AMD Software
Optimization Guides for 10H/12H/15H/16H, and Agner Fog's experimental data.
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=245733&r1=245732&r2=245733&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Fri Aug 21 15:39:17 2015
@@ -433,21 +433,19 @@ def : Proc<"opteron-sse3", [FeatureSl
def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"amdfam10", [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"amdfam10", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"barcelona", [FeatureSlowUAMem, FeatureSSE4A,
+def : Proc<"barcelona", [FeatureSSE4A,
Feature3DNowA, FeatureCMPXCHG16B, FeatureLZCNT,
FeaturePOPCNT, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-// FIXME: We should remove 'FeatureSlowUAMem' from AMD chips under here.
-
// Bobcat
def : Proc<"btver1", [FeatureSSSE3, FeatureSSE4A, FeatureCMPXCHG16B,
FeaturePRFCHW, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD, FeatureSlowUAMem]>;
+ FeatureSlowSHLD]>;
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model,
@@ -461,15 +459,13 @@ def : ProcessorModel<"btver2", BtVer2Mod
def : Proc<"bdver1", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowSHLD,
- FeatureSlowUAMem]>;
+ FeaturePOPCNT, FeatureSlowSHLD]>;
// Piledriver
def : Proc<"bdver2", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
FeatureAES, FeaturePRFCHW, FeaturePCLMUL,
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
- FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureSlowUAMem]>;
+ FeatureTBM, FeatureFMA, FeatureSlowSHLD]>;
// Steamroller
def : Proc<"bdver3", [FeatureXOP, FeatureFMA4, FeatureCMPXCHG16B,
@@ -477,7 +473,7 @@ def : Proc<"bdver3", [FeatureXO
FeatureAVX, FeatureSSE4A, FeatureF16C,
FeatureLZCNT, FeaturePOPCNT, FeatureBMI,
FeatureTBM, FeatureFMA, FeatureSlowSHLD,
- FeatureFSGSBase, FeatureSlowUAMem]>;
+ FeatureFSGSBase]>;
// Excavator
def : Proc<"bdver4", [FeatureAVX2, FeatureXOP, FeatureFMA4,
@@ -485,7 +481,7 @@ def : Proc<"bdver4", [FeatureAV
FeaturePCLMUL, FeatureF16C, FeatureLZCNT,
FeaturePOPCNT, FeatureBMI, FeatureBMI2,
FeatureTBM, FeatureFMA, FeatureSSE4A,
- FeatureFSGSBase, FeatureSlowUAMem]>;
+ FeatureFSGSBase]>;
def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;
Modified: llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll?rev=245733&r1=245732&r2=245733&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll (original)
+++ llvm/trunk/test/CodeGen/X86/slow-unaligned-mem.ll Fri Aug 21 15:39:17 2015
@@ -39,14 +39,14 @@
; AMD chips with fast unaligned memory accesses
; FIXME: These are wrong except for btver2.
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=amdfam10 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=barcelona 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver1 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=btver2 2>&1 | FileCheck %s --check-prefix=FAST
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=SLOW
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver1 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver2 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver3 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=bdver4 2>&1 | FileCheck %s --check-prefix=FAST
; Other chips with slow unaligned memory accesses
More information about the llvm-commits
mailing list