[llvm] r319829 - [X86][AVX512] Tag GATHER/SCATTER instruction scheduler classes
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 5 12:47:11 PST 2017
Author: rksimon
Date: Tue Dec 5 12:47:11 2017
New Revision: 319829
URL: http://llvm.org/viewvc/llvm-project?rev=319829&view=rev
Log:
[X86][AVX512] Tag GATHER/SCATTER instruction scheduler classes
NOTE: At the moment these use the WriteLoad/WriteStore classes, which severely underestimates the costs. This needs to be reviewed.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319829&r1=319828&r2=319829&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Dec 5 12:47:11 2017
@@ -8482,6 +8482,7 @@ defm : AVX512_pmovx_patterns<"VPMOVZX",
//===----------------------------------------------------------------------===//
// GATHER - SCATTER Operations
+// FIXME: Improve scheduling of gather/scatter instructions.
multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
X86MemOperand memop, PatFrag GatherNode,
RegisterClass MaskRC = _.KRCWM> {
@@ -8494,7 +8495,7 @@ multiclass avx512_gather<bits<8> opc, st
[(set _.RC:$dst, MaskRC:$mask_wb,
(GatherNode (_.VT _.RC:$src1), MaskRC:$mask,
vectoraddr:$src2))]>, EVEX, EVEX_K,
- EVEX_CD8<_.EltSize, CD8VT1>;
+ EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
}
multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
@@ -8552,7 +8553,8 @@ let mayStore = 1, Constraints = "$mask =
"\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
[(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
_.KRCWM:$mask, vectoraddr:$dst))]>,
- EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+ EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
+ Sched<[WriteStore]>;
}
multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
@@ -8603,7 +8605,7 @@ multiclass avx512_gather_scatter_prefetc
let Predicates = [HasPFI], hasSideEffects = 1 in
def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
!strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
- []>, EVEX, EVEX_K;
+ [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
}
defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=319829&r1=319828&r2=319829&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Dec 5 12:47:11 2017
@@ -8442,10 +8442,10 @@ let Predicates = [HasAVX2, NoVLX] in {
(VPSRAVDYrm VR256:$src1, addr:$src2)>;
}
-
-
//===----------------------------------------------------------------------===//
// VGATHER - GATHER Operations
+
+// FIXME: Improve scheduling of gather instructions.
multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
ValueType VTy, PatFrag GatherNode128,
PatFrag GatherNode256, RegisterClass RC256,
@@ -8457,14 +8457,16 @@ multiclass avx2_gather<bits<8> opc, stri
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
[(set (VTx VR128:$dst), (MTx VR128:$mask_wb),
(GatherNode128 VR128:$src1, VR128:$mask,
- vectoraddr:$src2))]>, VEX;
+ vectoraddr:$src2))]>,
+ VEX, Sched<[WriteLoad]>;
def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
(ins RC256:$src1, memop256:$src2, RC256:$mask),
!strconcat(OpcodeStr,
"\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
[(set (VTy RC256:$dst), (MTy RC256:$mask_wb),
(GatherNode256 RC256:$src1, RC256:$mask,
- vectoraddr:$src2))]>, VEX, VEX_L;
+ vectoraddr:$src2))]>,
+ VEX, VEX_L, Sched<[WriteLoad]>;
}
let Predicates = [UseAVX2] in {
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=319829&r1=319828&r2=319829&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue Dec 5 12:47:11 2017
@@ -236,7 +236,7 @@ define <4 x i32> @test_extracti128(<8 x
define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
; GENERIC-LABEL: test_gatherdpd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherdpd:
@@ -271,7 +271,7 @@ declare <2 x double> @llvm.x86.avx2.gath
define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
; GENERIC-LABEL: test_gatherdpd_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0
+; GENERIC-NEXT: vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherdpd_ymm:
@@ -306,7 +306,7 @@ declare <4 x double> @llvm.x86.avx2.gath
define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
; GENERIC-LABEL: test_gatherdps:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherdps:
@@ -341,7 +341,7 @@ declare <4 x float> @llvm.x86.avx2.gathe
define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
; GENERIC-LABEL: test_gatherdps_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0
+; GENERIC-NEXT: vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherdps_ymm:
@@ -376,7 +376,7 @@ declare <8 x float> @llvm.x86.avx2.gathe
define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
; GENERIC-LABEL: test_gatherqpd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherqpd:
@@ -411,7 +411,7 @@ declare <2 x double> @llvm.x86.avx2.gath
define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
; GENERIC-LABEL: test_gatherqpd_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0
+; GENERIC-NEXT: vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherqpd_ymm:
@@ -446,7 +446,7 @@ declare <4 x double> @llvm.x86.avx2.gath
define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
; GENERIC-LABEL: test_gatherqps:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_gatherqps:
@@ -481,7 +481,7 @@ declare <4 x float> @llvm.x86.avx2.gathe
define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
; GENERIC-LABEL: test_gatherqps_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0
+; GENERIC-NEXT: vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2750,7 +2750,7 @@ define <4 x i64> @test_permq(<4 x i64> %
define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
; GENERIC-LABEL: test_pgatherdd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherdd:
@@ -2760,7 +2760,7 @@ define <4 x i32> @test_pgatherdd(<4 x i3
;
; BROADWELL-LABEL: test_pgatherdd:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT: vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherdd:
@@ -2785,7 +2785,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
; GENERIC-LABEL: test_pgatherdd_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
+; GENERIC-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherdd_ymm:
@@ -2795,7 +2795,7 @@ define <8 x i32> @test_pgatherdd_ymm(<8
;
; BROADWELL-LABEL: test_pgatherdd_ymm:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
+; BROADWELL-NEXT: vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherdd_ymm:
@@ -2820,7 +2820,7 @@ declare <8 x i32> @llvm.x86.avx2.gather.
define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
; GENERIC-LABEL: test_pgatherdq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherdq:
@@ -2830,7 +2830,7 @@ define <2 x i64> @test_pgatherdq(<2 x i6
;
; BROADWELL-LABEL: test_pgatherdq:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT: vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherdq:
@@ -2855,7 +2855,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.
define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
; GENERIC-LABEL: test_pgatherdq_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
+; GENERIC-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherdq_ymm:
@@ -2865,7 +2865,7 @@ define <4 x i64> @test_pgatherdq_ymm(<4
;
; BROADWELL-LABEL: test_pgatherdq_ymm:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
+; BROADWELL-NEXT: vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherdq_ymm:
@@ -2890,7 +2890,7 @@ declare <4 x i64> @llvm.x86.avx2.gather.
define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
; GENERIC-LABEL: test_pgatherqd:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherqd:
@@ -2900,7 +2900,7 @@ define <4 x i32> @test_pgatherqd(<4 x i3
;
; BROADWELL-LABEL: test_pgatherqd:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherqd:
@@ -2925,7 +2925,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
; GENERIC-LABEL: test_pgatherqd_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
+; GENERIC-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: vzeroupper
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2937,7 +2937,7 @@ define <4 x i32> @test_pgatherqd_ymm(<4
;
; BROADWELL-LABEL: test_pgatherqd_ymm:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
+; BROADWELL-NEXT: vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: vzeroupper # sched: [4:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
@@ -2966,7 +2966,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
; GENERIC-LABEL: test_pgatherqq:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherqq:
@@ -2976,7 +2976,7 @@ define <2 x i64> @test_pgatherqq(<2 x i6
;
; BROADWELL-LABEL: test_pgatherqq:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT: vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherqq:
@@ -3001,7 +3001,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.
define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
; GENERIC-LABEL: test_pgatherqq_ymm:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
+; GENERIC-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; HASWELL-LABEL: test_pgatherqq_ymm:
@@ -3011,7 +3011,7 @@ define <4 x i64> @test_pgatherqq_ymm(<4
;
; BROADWELL-LABEL: test_pgatherqq_ymm:
; BROADWELL: # %bb.0:
-; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
+; BROADWELL-NEXT: vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_pgatherqq_ymm:
More information about the llvm-commits
mailing list