<div dir="ltr">Aren't these implemented as basically pure loads on Intel CPUs? They are fully handled on the load port right?</div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature" data-smartmail="gmail_signature">~Craig</div></div>
<br><div class="gmail_quote">On Wed, Mar 14, 2018 at 8:47 AM, Simon Pilgrim via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: rksimon<br>
Date: Wed Mar 14 08:47:08 2018<br>
New Revision: 327524<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=327524&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=327524&view=rev</a><br>
Log:<br>
[X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions<br>
<br>
They shouldn't be treated as pure loads.<br>
<br>
Found while investigating D44428<br>
<br>
Modified:<br>
    llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td<br>
    llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll<br>
    llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll<br>
<br>
Modified: llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>X86/X86InstrSSE.td?rev=327524&<wbr>r1=327523&r2=327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td (original)<br>
+++ llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td Wed Mar 14 08:47:08 2018<br>
@@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str<br>
<br>
 let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {<br>
   def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,<br>
-                                             f32mem, v4f32, loadf32, WriteLoad>;<br>
+                                             f32mem, v4f32, loadf32,<br>
+                                             WriteFShuffleLd>;<br>
   def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,<br>
                                              f32mem, v8f32, loadf32,<br>
                                              WriteFShuffleLd>, VEX_L;<br>
@@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre<br>
 def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),<br>
                            (ins i128mem:$src),<br>
                            "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,<br>
-                           Sched<[WriteLoad]>, VEX, VEX_L;<br>
+                           Sched<[WriteShuffleLd]>, VEX, VEX_L;<br>
<br>
 let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],<br>
     ExeDomain = SSEPackedSingle in<br>
@@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s<br>
                   !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br>
                   [(set VR128:$dst,<br>
                    (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,<br>
-                  Sched<[WriteLoad]>, VEX;<br>
+                  Sched<[WriteShuffleLd]>, VEX;<br>
     def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),<br>
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br>
                    [(set VR256:$dst,<br>
@@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s<br>
                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),<br>
                    [(set VR256:$dst,<br>
                     (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,<br>
-                   Sched<[WriteLoad]>, VEX, VEX_L;<br>
+                   Sched<[WriteShuffleLd]>, VEX, VEX_L;<br>
<br>
     // Provide aliases for broadcast from the same register class that<br>
     // automatically does the extract.<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/avx-schedule.ll?<wbr>rev=327524&r1=327523&r2=<wbr>327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll Wed Mar 14 08:47:08 2018<br>
@@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo<br>
 ;<br>
 ; BTVER2-LABEL: test_broadcastss:<br>
 ; BTVER2:       # %bb.0:<br>
-; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]<br>
+; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]<br>
 ; BTVER2-NEXT:    retq # sched: [4:1.00]<br>
 ;<br>
 ; ZNVER1-LABEL: test_broadcastss:<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/avx2-schedule.ll?<wbr>rev=327524&r1=327523&r2=<wbr>327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll Wed Mar 14 08:47:08 2018<br>
@@ -9,7 +9,7 @@<br>
 define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {<br>
 ; GENERIC-LABEL: test_broadcasti128:<br>
 ; GENERIC:       # %bb.0:<br>
-; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [4:0.50]<br>
+; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x<br>
 ; GENERIC-LABEL: test_pbroadcastb:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<<br>
 ; GENERIC-LABEL: test_pbroadcastb_ymm:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x<br>
 ; GENERIC-LABEL: test_pbroadcastd:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<<br>
 ; GENERIC-LABEL: test_pbroadcastd_ymm:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x<br>
 ; GENERIC-LABEL: test_pbroadcastq:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<<br>
 ; GENERIC-LABEL: test_pbroadcastq_ymm:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x<br>
 ; GENERIC-LABEL: test_pbroadcastw:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
@@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(<br>
 ; GENERIC-LABEL: test_pbroadcastw_ymm:<br>
 ; GENERIC:       # %bb.0:<br>
 ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]<br>
-; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]<br>
+; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]<br>
 ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]<br>
 ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
 ;<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>