[llvm] r327524 - [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 14 14:26:27 PDT 2018


Yes afaict most, if not all AMD CPUs do pass broadcasts through a 
fpu/shuffle unit - they don't just use the LS units. That includes for 
broadcastf128 (and sometimes insertf128) cases.

The way this is going, we're going to end up with a WriteBroadcastLd 
too; I don't really mind cleaning up and expanding the scheduler classes 
instead of relying on overloads - it's less work in the long run and 
makes the models more managable - I just wish we could stop having to 
add them to cpu models that don't support them (WriteFMA, WriteVarShift 
etc.).

On 14/03/2018 21:06, Craig Topper wrote:
> Does AMD implement them as shuffles or plain loads? I guess I'm really 
> wondering if they should be classified as their own thing if their 
> implementation varies between Intel and AMD. Pretending for the moment 
> that there is some future world where we don't just override 
> everything for Intel CPUs.
>
> ~Craig
>
> On Wed, Mar 14, 2018 at 2:00 PM, Simon Pilgrim <llvm-dev at redking.me.uk 
> <mailto:llvm-dev at redking.me.uk>> wrote:
>
>     And are overloaded as such by those models - I was trying to get
>     them to match what we do for MOVDDUP (a broadcast by another
>     name....) which on older cpus passes through load and shuffle units.
>
>     The relevant Intel models seem to have gone for an exhaustive
>     scheduler model, covering most instructions with custom InstrRW;
>     while AMD models and older Intel models all rely on the classes a
>     lot more.
>
>     On 14/03/2018 17:40, Craig Topper wrote:
>>     Aren't these implemented as basically pure loads on Intel CPUs?
>>     They are fully handled on the load port right?
>>
>>     ~Craig
>>
>>     On Wed, Mar 14, 2018 at 8:47 AM, Simon Pilgrim via llvm-commits
>>     <llvm-commits at lists.llvm.org
>>     <mailto:llvm-commits at lists.llvm.org>> wrote:
>>
>>         Author: rksimon
>>         Date: Wed Mar 14 08:47:08 2018
>>         New Revision: 327524
>>
>>         URL: http://llvm.org/viewvc/llvm-project?rev=327524&view=rev
>>         <http://llvm.org/viewvc/llvm-project?rev=327524&view=rev>
>>         Log:
>>         [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions
>>
>>         They shouldn't be treated as pure loads.
>>
>>         Found while investigating D44428
>>
>>         Modified:
>>             llvm/trunk/lib/Target/X86/X86InstrSSE.td
>>             llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>>             llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>>
>>         Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
>>         URL:
>>         http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff
>>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff>
>>         ==============================================================================
>>         --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
>>         +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Mar 14
>>         08:47:08 2018
>>         @@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str
>>
>>          let ExeDomain = SSEPackedSingle, Predicates = [HasAVX,
>>         NoVLX] in {
>>            def VBROADCASTSSrm  : avx_broadcast_rm<0x18,
>>         "vbroadcastss", VR128,
>>         -  f32mem, v4f32, loadf32, WriteLoad>;
>>         +  f32mem, v4f32, loadf32,
>>         +  WriteFShuffleLd>;
>>            def VBROADCASTSSYrm : avx_broadcast_rm<0x18,
>>         "vbroadcastss", VR256,
>>         f32mem, v8f32, loadf32,
>>         WriteFShuffleLd>, VEX_L;
>>         @@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre
>>          def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
>>                                     (ins i128mem:$src),
>>         "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
>>         -  Sched<[WriteLoad]>, VEX, VEX_L;
>>         +  Sched<[WriteShuffleLd]>, VEX, VEX_L;
>>
>>          let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
>>              ExeDomain = SSEPackedSingle in
>>         @@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>>                            !strconcat(OpcodeStr, "\t{$src, $dst|$dst,
>>         $src}"),
>>                            [(set VR128:$dst,
>>                             (OpVT128 (X86VBroadcast (ld_frag
>>         addr:$src))))]>,
>>         -                  Sched<[WriteLoad]>, VEX;
>>         +                  Sched<[WriteShuffleLd]>, VEX;
>>              def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins
>>         VR128:$src),
>>                             !strconcat(OpcodeStr, "\t{$src,
>>         $dst|$dst, $src}"),
>>                             [(set VR256:$dst,
>>         @@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>>                             !strconcat(OpcodeStr, "\t{$src,
>>         $dst|$dst, $src}"),
>>                             [(set VR256:$dst,
>>                              (OpVT256 (X86VBroadcast (ld_frag
>>         addr:$src))))]>,
>>         -                   Sched<[WriteLoad]>, VEX, VEX_L;
>>         +                   Sched<[WriteShuffleLd]>, VEX, VEX_L;
>>
>>              // Provide aliases for broadcast from the same register
>>         class that
>>              // automatically does the extract.
>>
>>         Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>>         URL:
>>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
>>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff>
>>         ==============================================================================
>>         --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
>>         +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Wed Mar 14
>>         08:47:08 2018
>>         @@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo
>>          ;
>>          ; BTVER2-LABEL: test_broadcastss:
>>          ; BTVER2:       # %bb.0:
>>         -; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
>>         +; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
>>          ; BTVER2-NEXT:    retq # sched: [4:1.00]
>>          ;
>>          ; ZNVER1-LABEL: test_broadcastss:
>>
>>         Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>>         URL:
>>         http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
>>         <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff>
>>         ==============================================================================
>>         --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
>>         +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Mar 14
>>         08:47:08 2018
>>         @@ -9,7 +9,7 @@
>>          define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x
>>         i32> *%a1) {
>>          ; GENERIC-LABEL: test_broadcasti128:
>>          ; GENERIC:       # %bb.0:
>>         -; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 =
>>         mem[0,1,0,1] sched: [4:0.50]
>>         +; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 =
>>         mem[0,1,0,1] sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x
>>          ; GENERIC-LABEL: test_pbroadcastb:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
>>          ; GENERIC-LABEL: test_pbroadcastb_ymm:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x
>>          ; GENERIC-LABEL: test_pbroadcastd:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
>>          ; GENERIC-LABEL: test_pbroadcastd_ymm:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x
>>          ; GENERIC-LABEL: test_pbroadcastq:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
>>          ; GENERIC-LABEL: test_pbroadcastq_ymm:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x
>>          ; GENERIC-LABEL: test_pbroadcastw:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>         @@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
>>          ; GENERIC-LABEL: test_pbroadcastw_ymm:
>>          ; GENERIC:       # %bb.0:
>>          ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
>>         -; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
>>         +; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]
>>          ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>          ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>          ;
>>
>>
>>         _______________________________________________
>>         llvm-commits mailing list
>>         llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>>         http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>         <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>>
>>
>
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180314/39174d87/attachment.html>


More information about the llvm-commits mailing list