[llvm] r311318 - [x86] Teach the "generic" x86 CPU to avoid patterns that are slow on
Chandler Carruth via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 21 01:45:22 PDT 2017
Author: chandlerc
Date: Mon Aug 21 01:45:22 2017
New Revision: 311318
URL: http://llvm.org/viewvc/llvm-project?rev=311318&view=rev
Log:
[x86] Teach the "generic" x86 CPU to avoid patterns that are slow on
widely used processors.
This occured to me when I saw that we were generating 'inc' and 'dec'
when for Haswell and newer we shouldn't. However, there were a few "X is
slow" things that we should probably just set.
I've avoided any of the "X is fast" features because most of those would
be pretty serious regressions on processors where X isn't actually fast.
The slow things are likely to be negligible costs on processors where
these aren't slow and a significant win when they are slow.
In retrospect this seems somewhat obvious. Not sure why we didn't do
this a long time ago.
Differential Revision: https://reviews.llvm.org/D36947
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/CodeGen/X86/lea32-schedule.ll
llvm/trunk/test/CodeGen/X86/lea64-schedule.ll
llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
llvm/trunk/test/DebugInfo/COFF/register-variables.ll
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=311318&r1=311317&r2=311318&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Mon Aug 21 01:45:22 2017
@@ -866,9 +866,17 @@ def : Proc<"c3-2", [FeatureX8
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned differently for AMD chips, we might consider
// forming a common base for them.
-def : ProcessorModel<"x86-64", SandyBridgeModel,
- [FeatureX87, FeatureMMX, FeatureSSE2, FeatureFXSR,
- Feature64Bit, FeatureSlowBTMem ]>;
+def : ProcessorModel<"x86-64", SandyBridgeModel, [
+ FeatureX87,
+ FeatureMMX,
+ FeatureSSE2,
+ FeatureFXSR,
+ Feature64Bit,
+ FeatureSlow3OpsLEA,
+ FeatureSlowBTMem,
+ FeatureSlowIncDec,
+ FeatureSlowUAMem32
+]>;
//===----------------------------------------------------------------------===//
// Register File Description
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=311318&r1=311317&r2=311318&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Mon Aug 21 01:45:22 2017
@@ -752,7 +752,9 @@ define <8 x float> @test_cvtdq2ps(<8 x i
; GENERIC-LABEL: test_cvtdq2ps:
; GENERIC: # BB#0:
; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00]
+; GENERIC-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50]
+; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:0.50]
+; GENERIC-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00]
; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1954,9 +1956,11 @@ define <8 x float> @test_movsldup(<8 x f
define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) {
; GENERIC-LABEL: test_movupd:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movupd:
@@ -1997,9 +2001,11 @@ define <4 x double> @test_movupd(<4 x do
define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) {
; GENERIC-LABEL: test_movups:
; GENERIC: # BB#0:
-; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50]
+; GENERIC-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50]
+; GENERIC-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:0.50]
; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00]
-; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00]
+; GENERIC-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movups:
Modified: llvm/trunk/test/CodeGen/X86/lea32-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea32-schedule.ll?rev=311318&r1=311317&r2=311318&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea32-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea32-schedule.ll Mon Aug 21 01:45:22 2017
@@ -179,7 +179,8 @@ define i32 @test_lea_add_offset(i32, i32
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; GENERIC-NEXT: leal 16(%rdi,%rsi), %eax # sched: [1:0.50]
+; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
+; GENERIC-NEXT: addl $16, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset:
@@ -241,7 +242,9 @@ define i32 @test_lea_add_offset_big(i32,
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; GENERIC-NEXT: leal -4096(%rdi,%rsi), %eax # sched: [1:0.50]
+; GENERIC-NEXT: leal (%rdi,%rsi), %eax # sched: [1:0.50]
+; GENERIC-NEXT: addl $-4096, %eax # imm = 0xF000
+; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset_big:
@@ -356,7 +359,8 @@ define i32 @test_lea_mul_offset(i32) {
; GENERIC-LABEL: test_lea_mul_offset:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; GENERIC-NEXT: leal -32(%rdi,%rdi,2), %eax # sched: [1:0.50]
+; GENERIC-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50]
+; GENERIC-NEXT: addl $-32, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset:
@@ -411,7 +415,9 @@ define i32 @test_lea_mul_offset_big(i32)
; GENERIC-LABEL: test_lea_mul_offset_big:
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; GENERIC-NEXT: leal 10000(%rdi,%rdi,8), %eax # sched: [1:0.50]
+; GENERIC-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
+; GENERIC-NEXT: addl $10000, %eax # imm = 0x2710
+; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset_big:
@@ -529,7 +535,8 @@ define i32 @test_lea_add_scale_offset(i3
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; GENERIC-NEXT: leal 96(%rdi,%rsi,4), %eax # sched: [1:0.50]
+; GENERIC-NEXT: leal (%rdi,%rsi,4), %eax # sched: [1:0.50]
+; GENERIC-NEXT: addl $96, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset:
@@ -592,7 +599,9 @@ define i32 @test_lea_add_scale_offset_bi
; GENERIC: # BB#0:
; GENERIC-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; GENERIC-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
-; GENERIC-NEXT: leal -1200(%rdi,%rsi,8), %eax # sched: [1:0.50]
+; GENERIC-NEXT: leal (%rdi,%rsi,8), %eax # sched: [1:0.50]
+; GENERIC-NEXT: addl $-1200, %eax # imm = 0xFB50
+; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset_big:
Modified: llvm/trunk/test/CodeGen/X86/lea64-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea64-schedule.ll?rev=311318&r1=311317&r2=311318&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea64-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea64-schedule.ll Mon Aug 21 01:45:22 2017
@@ -149,7 +149,8 @@ define i64 @test_lea_add(i64, i64) {
define i64 @test_lea_add_offset(i64, i64) {
; GENERIC-LABEL: test_lea_add_offset:
; GENERIC: # BB#0:
-; GENERIC-NEXT: leaq 16(%rdi,%rsi), %rax # sched: [1:0.50]
+; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
+; GENERIC-NEXT: addq $16, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset:
@@ -197,7 +198,9 @@ define i64 @test_lea_add_offset(i64, i64
define i64 @test_lea_add_offset_big(i64, i64) {
; GENERIC-LABEL: test_lea_add_offset_big:
; GENERIC: # BB#0:
-; GENERIC-NEXT: leaq -4096(%rdi,%rsi), %rax # sched: [1:0.50]
+; GENERIC-NEXT: leaq (%rdi,%rsi), %rax # sched: [1:0.50]
+; GENERIC-NEXT: addq $-4096, %rax # imm = 0xF000
+; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_offset_big:
@@ -292,7 +295,8 @@ define i64 @test_lea_mul(i64) {
define i64 @test_lea_mul_offset(i64) {
; GENERIC-LABEL: test_lea_mul_offset:
; GENERIC: # BB#0:
-; GENERIC-NEXT: leaq -32(%rdi,%rdi,2), %rax # sched: [1:0.50]
+; GENERIC-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50]
+; GENERIC-NEXT: addq $-32, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset:
@@ -340,7 +344,9 @@ define i64 @test_lea_mul_offset(i64) {
define i64 @test_lea_mul_offset_big(i64) {
; GENERIC-LABEL: test_lea_mul_offset_big:
; GENERIC: # BB#0:
-; GENERIC-NEXT: leaq 10000(%rdi,%rdi,8), %rax # sched: [1:0.50]
+; GENERIC-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
+; GENERIC-NEXT: addq $10000, %rax # imm = 0x2710
+; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_mul_offset_big:
@@ -436,7 +442,8 @@ define i64 @test_lea_add_scale(i64, i64)
define i64 @test_lea_add_scale_offset(i64, i64) {
; GENERIC-LABEL: test_lea_add_scale_offset:
; GENERIC: # BB#0:
-; GENERIC-NEXT: leaq 96(%rdi,%rsi,4), %rax # sched: [1:0.50]
+; GENERIC-NEXT: leaq (%rdi,%rsi,4), %rax # sched: [1:0.50]
+; GENERIC-NEXT: addq $96, %rax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset:
@@ -485,7 +492,9 @@ define i64 @test_lea_add_scale_offset(i6
define i64 @test_lea_add_scale_offset_big(i64, i64) {
; GENERIC-LABEL: test_lea_add_scale_offset_big:
; GENERIC: # BB#0:
-; GENERIC-NEXT: leaq -1200(%rdi,%rsi,8), %rax # sched: [1:0.50]
+; GENERIC-NEXT: leaq (%rdi,%rsi,8), %rax # sched: [1:0.50]
+; GENERIC-NEXT: addq $-1200, %rax # imm = 0xFB50
+; GENERIC-NEXT: # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_lea_add_scale_offset_big:
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=311318&r1=311317&r2=311318&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Mon Aug 21 01:45:22 2017
@@ -299,7 +299,8 @@ define <4 x i32> @test_v16i32_0_1_2_12 (
define <8 x float> @shuffle_v16f32_extract_256(float* %RET, float* %a) {
; ALL-LABEL: shuffle_v16f32_extract_256:
; ALL: # BB#0:
-; ALL-NEXT: vmovups 32(%rsi), %ymm0
+; ALL-NEXT: vmovups 32(%rsi), %xmm0
+; ALL-NEXT: vinsertf128 $1, 48(%rsi), %ymm0, %ymm0
; ALL-NEXT: retq
%ptr_a = bitcast float* %a to <16 x float>*
%v_a = load <16 x float>, <16 x float>* %ptr_a, align 4
Modified: llvm/trunk/test/DebugInfo/COFF/register-variables.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/DebugInfo/COFF/register-variables.ll?rev=311318&r1=311317&r2=311318&view=diff
==============================================================================
--- llvm/trunk/test/DebugInfo/COFF/register-variables.ll (original)
+++ llvm/trunk/test/DebugInfo/COFF/register-variables.ll Mon Aug 21 01:45:22 2017
@@ -42,7 +42,7 @@
; ASM-DAG: #DEBUG_VALUE: inlineinc:a <- %EAX
; ASM-DAG: #DEBUG_VALUE: a <- %EAX
; ASM-DAG: #DEBUG_VALUE: f:p <- %ESI
-; ASM: incl %eax
+; ASM: addl $1, %eax
; ASM: [[after_inc_eax:\.Ltmp.*]]:
; ASM: #DEBUG_VALUE: inlineinc:b <- %EAX
; ASM: #DEBUG_VALUE: b <- %EAX
@@ -104,7 +104,7 @@
; OBJ: LocalVariableAddrRange {
; OBJ: OffsetStart: .text+0x7
; OBJ: ISectStart: 0x0
-; OBJ: Range: 0x18
+; OBJ: Range: 0x19
; OBJ: }
; OBJ: }
; OBJ: LocalSym {
@@ -118,7 +118,7 @@
; OBJ: LocalVariableAddrRange {
; OBJ: OffsetStart: .text+0xC
; OBJ: ISectStart: 0x0
-; OBJ: Range: 0x6
+; OBJ: Range: 0x7
; OBJ: }
; OBJ: }
; OBJ: LocalSym {
@@ -144,7 +144,7 @@
; OBJ: DefRangeRegisterSym {
; OBJ: Register: 17
; OBJ: MayHaveNoName: 0
-; OBJ: OffsetStart: .text+0x12
+; OBJ: OffsetStart: .text+0x13
; OBJ: ISectStart: 0x0
; OBJ: Range: 0x6
; OBJ: }
@@ -166,7 +166,7 @@
; OBJ: LocalVariableAddrRange {
; OBJ: OffsetStart: .text+0xC
; OBJ: ISectStart: 0x0
-; OBJ: Range: 0x6
+; OBJ: Range: 0x7
; OBJ: }
; OBJ: }
; OBJ: LocalSym {
@@ -178,7 +178,7 @@
; OBJ: DefRangeRegisterSym {
; OBJ: Register: 17
; OBJ: LocalVariableAddrRange {
-; OBJ: OffsetStart: .text+0x12
+; OBJ: OffsetStart: .text+0x13
; OBJ: ISectStart: 0x0
; OBJ: Range: 0x6
; OBJ: }
More information about the llvm-commits
mailing list