[llvm] r327524 - [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 14 08:47:09 PDT 2018
Author: rksimon
Date: Wed Mar 14 08:47:08 2018
New Revision: 327524
URL: http://llvm.org/viewvc/llvm-project?rev=327524&view=rev
Log:
[X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions
They shouldn't be treated as pure loads.
Found while investigating D44428
Modified:
llvm/trunk/lib/Target/X86/X86InstrSSE.td
llvm/trunk/test/CodeGen/X86/avx-schedule.ll
llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Mar 14 08:47:08 2018
@@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str
let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
- f32mem, v4f32, loadf32, WriteLoad>;
+ f32mem, v4f32, loadf32,
+ WriteFShuffleLd>;
def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
f32mem, v8f32, loadf32,
WriteFShuffleLd>, VEX_L;
@@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
(ins i128mem:$src),
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
- Sched<[WriteLoad]>, VEX, VEX_L;
+ Sched<[WriteShuffleLd]>, VEX, VEX_L;
let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
ExeDomain = SSEPackedSingle in
@@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR128:$dst,
(OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
- Sched<[WriteLoad]>, VEX;
+ Sched<[WriteShuffleLd]>, VEX;
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
@@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set VR256:$dst,
(OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
- Sched<[WriteLoad]>, VEX, VEX_L;
+ Sched<[WriteShuffleLd]>, VEX, VEX_L;
// Provide aliases for broadcast from the same register class that
// automatically does the extract.
Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Wed Mar 14 08:47:08 2018
@@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo
;
; BTVER2-LABEL: test_broadcastss:
; BTVER2: # %bb.0:
-; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
+; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-LABEL: test_broadcastss:
Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Mar 14 08:47:08 2018
@@ -9,7 +9,7 @@
define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
; GENERIC-LABEL: test_broadcasti128:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]
+; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [5:1.00]
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x
; GENERIC-LABEL: test_pbroadcastb:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
; GENERIC-LABEL: test_pbroadcastb_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x
; GENERIC-LABEL: test_pbroadcastd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
; GENERIC-LABEL: test_pbroadcastd_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x
; GENERIC-LABEL: test_pbroadcastq:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
; GENERIC-LABEL: test_pbroadcastq_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x
; GENERIC-LABEL: test_pbroadcastw:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
@@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
; GENERIC-LABEL: test_pbroadcastw_ymm:
; GENERIC: # %bb.0:
; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
-; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
+; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
More information about the llvm-commits
mailing list