<div dir="ltr">Does AMD implement them as shuffles or plain loads? I guess I'm really wondering if they should be classified as their own thing if their implementation varies between Intel and AMD. Pretending for the moment that there is some future world where we don't just override everything for Intel CPUs.</div><div class="gmail_extra"><br clear="all"><div><div class="gmail_signature" data-smartmail="gmail_signature">~Craig</div></div>
<br><div class="gmail_quote">On Wed, Mar 14, 2018 at 2:00 PM, Simon Pilgrim <span dir="ltr"><<a href="mailto:llvm-dev@redking.me.uk" target="_blank">llvm-dev@redking.me.uk</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
<div text="#000000" bgcolor="#FFFFFF">
<p>And are overloaded as such by those models - I was trying to get
them to match what we do for MOVDDUP (a broadcast by another
name....) which on older cpus passes through load and shuffle
units.<br>
</p>
The relevant Intel models seem to have gone for an exhaustive
scheduler model, covering most instructions with custom InstrRW;
while AMD models and older Intel models all rely on the classes a
lot more.<br>
<br>
<div class="m_8308829856561103656moz-cite-prefix">On 14/03/2018 17:40, Craig Topper
wrote:<br>
</div>
<blockquote type="cite">
<div dir="ltr">Aren't these implemented as basically pure loads on
Intel CPUs? They are fully handled on the load port right?</div>
<div class="gmail_extra"><br clear="all">
<div>
<div class="m_8308829856561103656gmail_signature" data-smartmail="gmail_signature">~Craig</div>
</div>
<br>
<div class="gmail_quote">On Wed, Mar 14, 2018 at 8:47 AM, Simon
Pilgrim via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span>
wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author:
rksimon<br>
Date: Wed Mar 14 08:47:08 2018<br>
New Revision: 327524<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=327524&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject?rev=327524&view=rev</a><br>
Log:<br>
[X86][AVX] Use WriteFShuffleLd for broadcast reg-mem
instructions<br>
<br>
They shouldn't be treated as pure loads.<br>
<br>
Found while investigating D44428<br>
<br>
Modified:<br>
llvm/trunk/lib/Target/X86/X86I<wbr>nstrSSE.td<br>
llvm/trunk/test/CodeGen/X86/av<wbr>x-schedule.ll<br>
llvm/trunk/test/CodeGen/X86/av<wbr>x2-schedule.ll<br>
<br>
Modified: llvm/trunk/lib/Target/X86/X86I<wbr>nstrSSE.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/lib/Target/X8<wbr>6/X86InstrSSE.td?rev=327524&r1<wbr>=327523&r2=327524&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Target/X86/X86I<wbr>nstrSSE.td (original)<br>
+++ llvm/trunk/lib/Target/X86/X86I<wbr>nstrSSE.td Wed Mar 14
08:47:08 2018<br>
@@ -7482,7 +7482,8 @@ class
avx2_broadcast_rr<bits<8> opc, str<br>
<br>
let ExeDomain = SSEPackedSingle, Predicates = [HasAVX,
NoVLX] in {<br>
def VBROADCASTSSrm : avx_broadcast_rm<0x18,
"vbroadcastss", VR128,<br>
- f32mem, v4f32,
loadf32, WriteLoad>;<br>
+ f32mem, v4f32,
loadf32,<br>
+
WriteFShuffleLd>;<br>
def VBROADCASTSSYrm : avx_broadcast_rm<0x18,
"vbroadcastss", VR256,<br>
f32mem, v8f32,
loadf32,<br>
WriteFShuffleLd>, VEX_L;<br>
@@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0,
Pre<br>
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs
VR256:$dst),<br>
(ins i128mem:$src),<br>
"vbroadcasti128\t{$src,
$dst|$dst, $src}", []>,<br>
- Sched<[WriteLoad]>, VEX,
VEX_L;<br>
+ Sched<[WriteShuffleLd]>,
VEX, VEX_L;<br>
<br>
let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],<br>
ExeDomain = SSEPackedSingle in<br>
@@ -7974,7 +7975,7 @@ multiclass
avx2_broadcast<bits<8> opc, s<br>
!strconcat(OpcodeStr, "\t{$src,
$dst|$dst, $src}"),<br>
[(set VR128:$dst,<br>
(OpVT128 (X86VBroadcast (ld_frag
addr:$src))))]>,<br>
- Sched<[WriteLoad]>, VEX;<br>
+ Sched<[WriteShuffleLd]>, VEX;<br>
def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
(ins VR128:$src),<br>
!strconcat(OpcodeStr, "\t{$src,
$dst|$dst, $src}"),<br>
[(set VR256:$dst,<br>
@@ -7984,7 +7985,7 @@ multiclass
avx2_broadcast<bits<8> opc, s<br>
!strconcat(OpcodeStr, "\t{$src,
$dst|$dst, $src}"),<br>
[(set VR256:$dst,<br>
(OpVT256 (X86VBroadcast (ld_frag
addr:$src))))]>,<br>
- Sched<[WriteLoad]>, VEX, VEX_L;<br>
+ Sched<[WriteShuffleLd]>, VEX,
VEX_L;<br>
<br>
// Provide aliases for broadcast from the same register
class that<br>
// automatically does the extract.<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/av<wbr>x-schedule.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/avx-schedule.ll?rev=<wbr>327524&r1=327523&r2=327524&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/av<wbr>x-schedule.ll
(original)<br>
+++ llvm/trunk/test/CodeGen/X86/av<wbr>x-schedule.ll Wed Mar
14 08:47:08 2018<br>
@@ -859,7 +859,7 @@ define <4 x float>
@test_broadcastss(flo<br>
;<br>
; BTVER2-LABEL: test_broadcastss:<br>
; BTVER2: # %bb.0:<br>
-; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched:
[5:1.00]<br>
+; BTVER2-NEXT: vbroadcastss (%rdi), %xmm0 # sched:
[6:1.00]<br>
; BTVER2-NEXT: retq # sched: [4:1.00]<br>
;<br>
; ZNVER1-LABEL: test_broadcastss:<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/av<wbr>x2-schedule.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-pr<wbr>oject/llvm/trunk/test/CodeGen/<wbr>X86/avx2-schedule.ll?rev=<wbr>327524&r1=327523&r2=327524&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/av<wbr>x2-schedule.ll
(original)<br>
+++ llvm/trunk/test/CodeGen/X86/av<wbr>x2-schedule.ll Wed
Mar 14 08:47:08 2018<br>
@@ -9,7 +9,7 @@<br>
define <8 x i32> @test_broadcasti128(<8 x i32>
%a0, <4 x i32> *%a1) {<br>
; GENERIC-LABEL: test_broadcasti128:<br>
; GENERIC: # %bb.0:<br>
-; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 =
mem[0,1,0,1] sched: [4:0.50]<br>
+; GENERIC-NEXT: vbroadcasti128 {{.*#+}} ymm1 =
mem[0,1,0,1] sched: [5:1.00]<br>
; GENERIC-NEXT: vpaddd %ymm0, %ymm1, %ymm0 # sched:
[3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1762,7 +1762,7 @@ define <16 x i8>
@test_pbroadcastb(<16 x<br>
; GENERIC-LABEL: test_pbroadcastb:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastb %xmm0, %xmm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastb (%rdi), %xmm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched:
[1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1811,7 +1811,7 @@ define <32 x i8>
@test_pbroadcastb_ymm(<<br>
; GENERIC-LABEL: test_pbroadcastb_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastb %xmm0, %ymm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastb (%rdi), %ymm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched:
[3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1860,7 +1860,7 @@ define <4 x i32>
@test_pbroadcastd(<4 x<br>
; GENERIC-LABEL: test_pbroadcastd:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastd %xmm0, %xmm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastd (%rdi), %xmm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched:
[1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1908,7 +1908,7 @@ define <8 x i32>
@test_pbroadcastd_ymm(<<br>
; GENERIC-LABEL: test_pbroadcastd_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastd %xmm0, %ymm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastd (%rdi), %ymm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched:
[3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -1956,7 +1956,7 @@ define <2 x i64>
@test_pbroadcastq(<2 x<br>
; GENERIC-LABEL: test_pbroadcastq:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastq %xmm0, %xmm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastq (%rdi), %xmm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched:
[1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -2004,7 +2004,7 @@ define <4 x i64>
@test_pbroadcastq_ymm(<<br>
; GENERIC-LABEL: test_pbroadcastq_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastq %xmm0, %ymm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastq (%rdi), %ymm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched:
[3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -2052,7 +2052,7 @@ define <8 x i16>
@test_pbroadcastw(<8 x<br>
; GENERIC-LABEL: test_pbroadcastw:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastw %xmm0, %xmm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastw (%rdi), %xmm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched:
[1:0.50]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
@@ -2101,7 +2101,7 @@ define <16 x i16>
@test_pbroadcastw_ymm(<br>
; GENERIC-LABEL: test_pbroadcastw_ymm:<br>
; GENERIC: # %bb.0:<br>
; GENERIC-NEXT: vpbroadcastw %xmm0, %ymm0 # sched:
[1:1.00]<br>
-; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched:
[4:0.50]<br>
+; GENERIC-NEXT: vpbroadcastw (%rdi), %ymm1 # sched:
[5:1.00]<br>
; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched:
[3:1.00]<br>
; GENERIC-NEXT: retq # sched: [1:1.00]<br>
;<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote>
</div>
<br>
</div>
</blockquote>
<br>
</div>
</blockquote></div><br></div>