<div dir="ltr">Aren't these implemented as basically pure loads on Intel CPUs? They are fully handled on the load port right?</div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature" data-smartmail="gmail_signature">~Craig</div></div>
<br><div class="gmail_quote">On Wed, Mar 14, 2018 at 8:47 AM, Simon Pilgrim via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: rksimon<br>
Date: Wed Mar 14 08:47:08 2018<br>
New Revision: 327524<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=327524&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=327524&view=rev</a><br>
Log:<br>
[X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions<br>
<br>
They shouldn't be treated as pure loads.<br>
<br>
Found while investigating D44428<br>
<br>
Modified:<br>
llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td<br>
llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll<br>
llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll<br>
<br>
Modified: llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>X86/X86InstrSSE.td?rev=327524&<wbr>r1=327523&r2=327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td (original)<br>
+++ llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td Wed Mar 14 08:47:08 2018<br>
@@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str<br>
<br>
let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {<br>
def VBROADCASTSSrm : avx_broadcast_rm<0x18, "vbroadcastss", VR128,<br>
- f32mem, v4f32, loadf32, WriteLoad>;<br>
+ f32mem, v4f32, loadf32,<br>
+ WriteFShuffleLd>;<br>
def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,<br>
f32mem, v8f32, loadf32,<br>
WriteFShuffleLd>, VEX_L;<br>
@@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre<br>
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),<br>
(ins i128mem:$src),<br>
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,<br>
- Sched<[WriteLoad]>, VEX, VEX_L;<br>
+ Sched<[WriteShuffleLd]>, VEX, VEX_L;<br>
<br>
let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],<br>
ExeDomain = SSEPackedSingle in<br>
@@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s<br>
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br>
[(set VR128:$dst,<br>
(OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,<br>
- Sched<[WriteLoad]>, VEX;<br>
+ Sched<[WriteShuffleLd]>, VEX;<br>
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),<br>
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br>
[(set VR256:$dst,<br>
@@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s<br>
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br>
[(set VR256:$dst,<br>
(OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,<br>
- Sched<[WriteLoad]>, VEX, VEX_L;<br>
+ Sched<[WriteShuffleLd]>, VEX, VEX_L;<br>
<br>
// Provide aliases for broadcast from the same register class that<br>
// automatically does the extract.<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/avx-schedule.ll?<wbr>rev=327524&r1=327523&r2=<wbr>327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll Wed Mar 14 08:47:08 2018<br>
@@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo<br>
;<br>
; BTVER2-LABEL: test_broadcastss:<br>
; BTVER2: # %bb.0:<br>
-; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]<br>
+; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]<br>
; BTVER2-NEXT: retq # sched: [4:1.00]<br>
;<br>
; ZNVER1-LABEL: test_broadcastss:<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/avx2-schedule.ll?<wbr>rev=327524&r1=327523&r2=<wbr>327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll Wed Mar 14 08:47:08 2018<br>
@@ -9,7 +9,7 @@<br>
define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {<br>
; GENERIC-LABEL: test_broadcasti128:<br>
; GENERIC: # %bb.0:<br>
-; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]<br>
+; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x<br>
; GENERIC-LABEL: test_pbroadcastb:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<<br>
; GENERIC-LABEL: test_pbroadcastb_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x<br>
; GENERIC-LABEL: test_pbroadcastd:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<<br>
; GENERIC-LABEL: test_pbroadcastd_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x<br>
; GENERIC-LABEL: test_pbroadcastq:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<<br>
; GENERIC-LABEL: test_pbroadcastq_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x<br>
; GENERIC-LABEL: test_pbroadcastw:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(<br>
; GENERIC-LABEL: test_pbroadcastw_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>