[llvm] ad171d2 - [X86] Change the tuning settings for pentium4 to be more modern since its the default 32-bit cpu in clang
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 16 12:51:36 PDT 2020
Author: Craig Topper
Date: 2020-07-16T12:51:25-07:00
New Revision: ad171d24b92d8cf1a9e64567af74cea353b5bf5b
URL: https://github.com/llvm/llvm-project/commit/ad171d24b92d8cf1a9e64567af74cea353b5bf5b
DIFF: https://github.com/llvm/llvm-project/commit/ad171d24b92d8cf1a9e64567af74cea353b5bf5b.diff
LOG: [X86] Change the tuning settings for pentium4 to be more modern since its the default 32-bit cpu in clang
Alternative to D83897. I believe the big change here is that I removed slow unaligned memory 16
Down side that it may adversely effect tuning if someone explicitly targets -march=pentium4 and expects pentium4 tuned code. Of course pentium4 is so old our default behavior with the previous settings may not have been the best either.
Reviewed By: echristo, RKSimon
Differential Revision: https://reviews.llvm.org/D83913
Added:
Modified:
llvm/lib/Target/X86/X86.td
llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
llvm/test/CodeGen/X86/slow-unaligned-mem.ll
llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
llvm/test/DebugInfo/COFF/types-array.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index dc1ff72add49..10d3007e5839 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1072,10 +1072,20 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel,
FeatureCMOV, FeatureInsertVZEROUPPER]>;
foreach P = ["pentium4", "pentium4m"] in {
+// def : ProcessorModel<P, GenericPostRAModel,
+// [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+// FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+// FeatureCMOV, FeatureInsertVZEROUPPER]>;
+
+ // Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
+ // give it more modern tunings.
+ // FIXME: This wouldn't be needed if we supported mtune.
def : ProcessorModel<P, GenericPostRAModel,
- [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+ [FeatureX87, FeatureCMPXCHG8B,
FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
- FeatureCMOV, FeatureInsertVZEROUPPER]>;
+ FeatureCMOV, FeatureInsertVZEROUPPER,
+ FeatureSlow3OpsLEA, FeatureSlowDivide64,
+ FeatureSlowIncDec, FeatureMacroFusion]>;
}
// Intel Quark.
diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
index 25e3691913c8..380c18fbf5c5 100644
--- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
+++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
@@ -16,19 +16,19 @@ entry:
; X32-LABEL: func_cf_vector_x86
; X32: movl 12(%ebp), %eax
; X32: movl 8(%ebp), %ecx
- ; X32: movsd 24(%eax), %xmm4 # xmm4 = mem[0],zero
- ; X32: movsd %xmm4, 24(%esp)
- ; X32: movsd 16(%eax), %xmm5 # xmm5 = mem[0],zero
- ; X32: movsd %xmm5, 16(%esp)
- ; X32: movsd (%eax), %xmm6 # xmm6 = mem[0],zero
- ; X32: movsd 8(%eax), %xmm7 # xmm7 = mem[0],zero
- ; X32: movsd %xmm7, 8(%esp)
- ; X32: movsd %xmm6, (%esp)
+ ; X32: movups (%eax), %xmm0
+ ; X32: movups 16(%eax), %xmm1
+ ; X32: movaps %xmm0, (%esp)
+ ; X32: movaps %xmm1, 16(%esp)
+ ; X32: movsd (%esp), %xmm4
+ ; X32: movsd 8(%esp), %xmm5
+ ; X32: movsd 16(%esp), %xmm6
+ ; X32: movsd 24(%esp), %xmm7
; X32: calll *___guard_check_icall_fptr
- ; X32: movaps %xmm6, %xmm0
- ; X32: movaps %xmm7, %xmm1
- ; X32: movaps %xmm5, %xmm2
- ; X32: movaps %xmm4, %xmm3
+ ; X32: movaps %xmm4, %xmm0
+ ; X32: movaps %xmm5, %xmm1
+ ; X32: movaps %xmm6, %xmm2
+ ; X32: movaps %xmm7, %xmm3
; X32: calll *%ecx
}
attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index f2c7c2fa4a56..295fdfb5a261 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -3,8 +3,6 @@
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott 2>&1 | FileCheck %s --check-prefix=SLOW
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=SLOW
@@ -14,6 +12,10 @@
; Intel chips with fast unaligned memory accesses
+; Marked fast because this is the default 32-bit mode CPU in clang.
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4 2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m 2>&1 | FileCheck %s --check-prefix=FAST
+
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST
diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
index d42dcf0dd174..23a1f56fdf40 100644
--- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
@@ -40,7 +40,7 @@
; OBJ: SubSectionType: FrameData (0xF5)
; OBJ: FrameData {
; OBJ: RvaStart: 0x0
-; OBJ: CodeSize: 0x34
+; OBJ: CodeSize: 0x36
; OBJ: PrologSize: 0x9
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
@@ -50,7 +50,7 @@
; OBJ: }
; OBJ: FrameData {
; OBJ: RvaStart: 0x7
-; OBJ: CodeSize: 0x2D
+; OBJ: CodeSize: 0x2F
; OBJ: PrologSize: 0x2
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
@@ -61,7 +61,7 @@
; OBJ: }
; OBJ: FrameData {
; OBJ: RvaStart: 0x8
-; OBJ: CodeSize: 0x2C
+; OBJ: CodeSize: 0x2E
; OBJ: PrologSize: 0x1
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
@@ -73,7 +73,7 @@
; OBJ: }
; OBJ: FrameData {
; OBJ: RvaStart: 0x9
-; OBJ: CodeSize: 0x2B
+; OBJ: CodeSize: 0x2D
; OBJ: PrologSize: 0x0
; OBJ: FrameFunc [
; OBJ-NEXT: $T0 .raSearch =
diff --git a/llvm/test/DebugInfo/COFF/types-array.ll b/llvm/test/DebugInfo/COFF/types-array.ll
index 2962f970aca1..19ddcf9ffe2c 100644
--- a/llvm/test/DebugInfo/COFF/types-array.ll
+++ b/llvm/test/DebugInfo/COFF/types-array.ll
@@ -51,7 +51,7 @@
; CHECK: PtrParent: 0x0
; CHECK: PtrEnd: 0x0
; CHECK: PtrNext: 0x0
-; CHECK: CodeSize: 0x39
+; CHECK: CodeSize: 0x2A
; CHECK: DbgStart: 0x0
; CHECK: DbgEnd: 0x0
; CHECK: FunctionType: f (0x1002)
@@ -73,7 +73,7 @@
; CHECK: LocalVariableAddrRange {
; CHECK: OffsetStart: .text+0x6
; CHECK: ISectStart: 0x0
-; CHECK: Range: 0x33
+; CHECK: Range: 0x24
; CHECK: }
; CHECK: }
; CHECK: ProcEnd {
More information about the llvm-commits
mailing list