[llvm] r319829 - [X86][AVX512] Tag GATHER/SCATTER instruction scheduler classes

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 5 12:47:11 PST 2017


Author: rksimon
Date: Tue Dec  5 12:47:11 2017
New Revision: 319829

URL: http://llvm.org/viewvc/llvm-project?rev=319829&view=rev
Log:
[X86][AVX512] Tag GATHER/SCATTER instruction scheduler classes

NOTE: At the moment these use the WriteLoad/WriteStore classes, which severely underestimates the costs. This needs to be reviewed.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx2-schedule.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=319829&r1=319828&r2=319829&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Tue Dec  5 12:47:11 2017
@@ -8482,6 +8482,7 @@ defm : AVX512_pmovx_patterns<"VPMOVZX",
 //===----------------------------------------------------------------------===//
 // GATHER - SCATTER Operations
 
+// FIXME: Improve scheduling of gather/scatter instructions.
 multiclass avx512_gather<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          X86MemOperand memop, PatFrag GatherNode,
                          RegisterClass MaskRC = _.KRCWM> {
@@ -8494,7 +8495,7 @@ multiclass avx512_gather<bits<8> opc, st
             [(set _.RC:$dst, MaskRC:$mask_wb,
               (GatherNode  (_.VT _.RC:$src1), MaskRC:$mask,
                      vectoraddr:$src2))]>, EVEX, EVEX_K,
-             EVEX_CD8<_.EltSize, CD8VT1>;
+             EVEX_CD8<_.EltSize, CD8VT1>, Sched<[WriteLoad]>;
 }
 
 multiclass avx512_gather_q_pd<bits<8> dopc, bits<8> qopc,
@@ -8552,7 +8553,8 @@ let mayStore = 1, Constraints = "$mask =
             "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}"),
             [(set _.KRCWM:$mask_wb, (ScatterNode (_.VT _.RC:$src),
                                      _.KRCWM:$mask,  vectoraddr:$dst))]>,
-            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
+            EVEX, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>,
+            Sched<[WriteStore]>;
 }
 
 multiclass avx512_scatter_q_pd<bits<8> dopc, bits<8> qopc,
@@ -8603,7 +8605,7 @@ multiclass avx512_gather_scatter_prefetc
   let Predicates = [HasPFI], hasSideEffects = 1 in
   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"),
-            []>, EVEX, EVEX_K;
+            [], IIC_SSE_PREFETCH>, EVEX, EVEX_K, Sched<[WriteLoad]>;
 }
 
 defm VGATHERPF0DPS: avx512_gather_scatter_prefetch<0xC6, MRM1m, "vgatherpf0dps",

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=319829&r1=319828&r2=319829&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Dec  5 12:47:11 2017
@@ -8442,10 +8442,10 @@ let Predicates = [HasAVX2, NoVLX] in {
             (VPSRAVDYrm VR256:$src1, addr:$src2)>;
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // VGATHER - GATHER Operations
+
+// FIXME: Improve scheduling of gather instructions.
 multiclass avx2_gather<bits<8> opc, string OpcodeStr, ValueType VTx,
                        ValueType VTy, PatFrag GatherNode128, 
                        PatFrag GatherNode256, RegisterClass RC256,
@@ -8457,14 +8457,16 @@ multiclass avx2_gather<bits<8> opc, stri
               "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
             [(set (VTx VR128:$dst), (MTx VR128:$mask_wb),
                   (GatherNode128 VR128:$src1, VR128:$mask,
-                                vectoraddr:$src2))]>, VEX;
+                                vectoraddr:$src2))]>,
+            VEX, Sched<[WriteLoad]>;
   def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
             (ins RC256:$src1, memop256:$src2, RC256:$mask),
             !strconcat(OpcodeStr,
               "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
             [(set (VTy RC256:$dst), (MTy RC256:$mask_wb),
                   (GatherNode256 RC256:$src1, RC256:$mask,
-                                vectoraddr:$src2))]>, VEX, VEX_L;
+                                vectoraddr:$src2))]>,
+            VEX, VEX_L, Sched<[WriteLoad]>;
 }
 
 let Predicates = [UseAVX2] in {

Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=319829&r1=319828&r2=319829&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Tue Dec  5 12:47:11 2017
@@ -236,7 +236,7 @@ define <4 x i32> @test_extracti128(<8 x
 define <2 x double> @test_gatherdpd(<2 x double> %a0, i8* %a1, <4 x i32> %a2, <2 x double> %a3) {
 ; GENERIC-LABEL: test_gatherdpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdpd:
@@ -271,7 +271,7 @@ declare <2 x double> @llvm.x86.avx2.gath
 define <4 x double> @test_gatherdpd_ymm(<4 x double> %a0, i8* %a1, <4 x i32> %a2, <4 x double> %a3) {
 ; GENERIC-LABEL: test_gatherdpd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0
+; GENERIC-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,8), %ymm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdpd_ymm:
@@ -306,7 +306,7 @@ declare <4 x double> @llvm.x86.avx2.gath
 define <4 x float> @test_gatherdps(<4 x float> %a0, i8* %a1, <4 x i32> %a2, <4 x float> %a3) {
 ; GENERIC-LABEL: test_gatherdps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdps:
@@ -341,7 +341,7 @@ declare <4 x float> @llvm.x86.avx2.gathe
 define <8 x float> @test_gatherdps_ymm(<8 x float> %a0, i8* %a1, <8 x i32> %a2, <8 x float> %a3) {
 ; GENERIC-LABEL: test_gatherdps_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0
+; GENERIC-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,4), %ymm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherdps_ymm:
@@ -376,7 +376,7 @@ declare <8 x float> @llvm.x86.avx2.gathe
 define <2 x double> @test_gatherqpd(<2 x double> %a0, i8* %a1, <2 x i64> %a2, <2 x double> %a3) {
 ; GENERIC-LABEL: test_gatherqpd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqpd:
@@ -411,7 +411,7 @@ declare <2 x double> @llvm.x86.avx2.gath
 define <4 x double> @test_gatherqpd_ymm(<4 x double> %a0, i8* %a1, <4 x i64> %a2, <4 x double> %a3) {
 ; GENERIC-LABEL: test_gatherqpd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0
+; GENERIC-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,8), %ymm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqpd_ymm:
@@ -446,7 +446,7 @@ declare <4 x double> @llvm.x86.avx2.gath
 define <4 x float> @test_gatherqps(<4 x float> %a0, i8* %a1, <2 x i64> %a2, <4 x float> %a3) {
 ; GENERIC-LABEL: test_gatherqps:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_gatherqps:
@@ -481,7 +481,7 @@ declare <4 x float> @llvm.x86.avx2.gathe
 define <4 x float> @test_gatherqps_ymm(<4 x float> %a0, i8* %a1, <4 x i64> %a2, <4 x float> %a3) {
 ; GENERIC-LABEL: test_gatherqps_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0
+; GENERIC-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,4), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2750,7 +2750,7 @@ define <4 x i64> @test_permq(<4 x i64> %
 define <4 x i32> @test_pgatherdd(<4 x i32> %a0, i8* %a1, <4 x i32> %a2, <4 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherdd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdd:
@@ -2760,7 +2760,7 @@ define <4 x i32> @test_pgatherdd(<4 x i3
 ;
 ; BROADWELL-LABEL: test_pgatherdd:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherdd:
@@ -2785,7 +2785,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
 define <8 x i32> @test_pgatherdd_ymm(<8 x i32> %a0, i8* %a1, <8 x i32> %a2, <8 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherdd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
+; GENERIC-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdd_ymm:
@@ -2795,7 +2795,7 @@ define <8 x i32> @test_pgatherdd_ymm(<8
 ;
 ; BROADWELL-LABEL: test_pgatherdd_ymm:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0
+; BROADWELL-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherdd_ymm:
@@ -2820,7 +2820,7 @@ declare <8 x i32> @llvm.x86.avx2.gather.
 define <2 x i64> @test_pgatherdq(<2 x i64> %a0, i8* %a1, <4 x i32> %a2, <2 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherdq:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdq:
@@ -2830,7 +2830,7 @@ define <2 x i64> @test_pgatherdq(<2 x i6
 ;
 ; BROADWELL-LABEL: test_pgatherdq:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherdq:
@@ -2855,7 +2855,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.
 define <4 x i64> @test_pgatherdq_ymm(<4 x i64> %a0, i8* %a1, <4 x i32> %a2, <4 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherdq_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
+; GENERIC-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherdq_ymm:
@@ -2865,7 +2865,7 @@ define <4 x i64> @test_pgatherdq_ymm(<4
 ;
 ; BROADWELL-LABEL: test_pgatherdq_ymm:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0
+; BROADWELL-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherdq_ymm:
@@ -2890,7 +2890,7 @@ declare <4 x i64> @llvm.x86.avx2.gather.
 define <4 x i32> @test_pgatherqd(<4 x i32> %a0, i8* %a1, <2 x i64> %a2, <4 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherqd:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqd:
@@ -2900,7 +2900,7 @@ define <4 x i32> @test_pgatherqd(<4 x i3
 ;
 ; BROADWELL-LABEL: test_pgatherqd:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherqd:
@@ -2925,7 +2925,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
 define <4 x i32> @test_pgatherqd_ymm(<4 x i32> %a0, i8* %a1, <4 x i64> %a2, <4 x i32> %a3) {
 ; GENERIC-LABEL: test_pgatherqd_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
+; GENERIC-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    vzeroupper
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
@@ -2937,7 +2937,7 @@ define <4 x i32> @test_pgatherqd_ymm(<4
 ;
 ; BROADWELL-LABEL: test_pgatherqd_ymm:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0
+; BROADWELL-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
@@ -2966,7 +2966,7 @@ declare <4 x i32> @llvm.x86.avx2.gather.
 define <2 x i64> @test_pgatherqq(<2 x i64> %a0, i8 *%a1, <2 x i64> %a2, <2 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherqq:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
+; GENERIC-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqq:
@@ -2976,7 +2976,7 @@ define <2 x i64> @test_pgatherqq(<2 x i6
 ;
 ; BROADWELL-LABEL: test_pgatherqq:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0
+; BROADWELL-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherqq:
@@ -3001,7 +3001,7 @@ declare <2 x i64> @llvm.x86.avx2.gather.
 define <4 x i64> @test_pgatherqq_ymm(<4 x i64> %a0, i8 *%a1, <4 x i64> %a2, <4 x i64> %a3) {
 ; GENERIC-LABEL: test_pgatherqq_ymm:
 ; GENERIC:       # %bb.0:
-; GENERIC-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
+; GENERIC-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [4:0.50]
 ; GENERIC-NEXT:    retq # sched: [1:1.00]
 ;
 ; HASWELL-LABEL: test_pgatherqq_ymm:
@@ -3011,7 +3011,7 @@ define <4 x i64> @test_pgatherqq_ymm(<4
 ;
 ; BROADWELL-LABEL: test_pgatherqq_ymm:
 ; BROADWELL:       # %bb.0:
-; BROADWELL-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0
+; BROADWELL-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 # sched: [5:0.50]
 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
 ;
 ; SKYLAKE-LABEL: test_pgatherqq_ymm:




More information about the llvm-commits mailing list