[llvm] r327524 - [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 14 14:00:44 PDT 2018


And are overloaded as such by those models - I was trying to get them to 
match what we do for MOVDDUP (a broadcast by another name....) which on 
older cpus passes through load and shuffle units.

The relevant Intel models seem to have gone for an exhaustive scheduler 
model, covering most instructions with custom InstrRW; while AMD models 
and older Intel models all rely on the classes a lot more.

On 14/03/2018 17:40, Craig Topper wrote:
> Aren't these implemented as basically pure loads on Intel CPUs? They 
> are fully handled on the load port right?
>
> ~Craig
>
> On Wed, Mar 14, 2018 at 8:47 AM, Simon Pilgrim via llvm-commits 
> <llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>> wrote:
>
>     Author: rksimon
>     Date: Wed Mar 14 08:47:08 2018
>     New Revision: 327524
>
>     URL: http://llvm.org/viewvc/llvm-project?rev=327524&view=rev
>     <http://llvm.org/viewvc/llvm-project?rev=327524&view=rev>
>     Log:
>     [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions
>
>     They shouldn't be treated as pure loads.
>
>     Found while investigating D44428
>
>     Modified:
>         llvm/trunk/lib/Target/X86/X86InstrSSE.td
>         llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>         llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>
>     Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff>
>     ==============================================================================
>     --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
>     +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Mar 14 08:47:08 2018
>     @@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str
>
>      let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
>        def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
>     -                                             f32mem, v4f32,
>     loadf32, WriteLoad>;
>     +                                             f32mem, v4f32, loadf32,
>     +  WriteFShuffleLd>;
>        def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
>                                                   f32mem, v8f32, loadf32,
>     WriteFShuffleLd>, VEX_L;
>     @@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre
>      def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
>                                 (ins i128mem:$src),
>                                 "vbroadcasti128\t{$src, $dst|$dst,
>     $src}", []>,
>     -                           Sched<[WriteLoad]>, VEX, VEX_L;
>     +                           Sched<[WriteShuffleLd]>, VEX, VEX_L;
>
>      let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
>          ExeDomain = SSEPackedSingle in
>     @@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>                        !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>                        [(set VR128:$dst,
>                         (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
>     -                  Sched<[WriteLoad]>, VEX;
>     +                  Sched<[WriteShuffleLd]>, VEX;
>          def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins
>     VR128:$src),
>                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst,
>     $src}"),
>                         [(set VR256:$dst,
>     @@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>                         !strconcat(OpcodeStr, "\t{$src, $dst|$dst,
>     $src}"),
>                         [(set VR256:$dst,
>                          (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
>     -                   Sched<[WriteLoad]>, VEX, VEX_L;
>     +                   Sched<[WriteShuffleLd]>, VEX, VEX_L;
>
>          // Provide aliases for broadcast from the same register class
>     that
>          // automatically does the extract.
>
>     Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
>     +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Wed Mar 14
>     08:47:08 2018
>     @@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo
>      ;
>      ; BTVER2-LABEL: test_broadcastss:
>      ; BTVER2:       # %bb.0:
>     -; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
>     +; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
>      ; BTVER2-NEXT:    retq # sched: [4:1.00]
>      ;
>      ; ZNVER1-LABEL: test_broadcastss:
>
>     Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>     URL:
>     http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
>     <http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff>
>     ==============================================================================
>     --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
>     +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Mar 14
>     08:47:08 2018
>     @@ -9,7 +9,7 @@
>      define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
>      ; GENERIC-LABEL: test_broadcasti128:
>      ; GENERIC:       # %bb.0:
>     -; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
>     sched: [4:0.50]
>     +; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1]
>     sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x
>      ; GENERIC-LABEL: test_pbroadcastb:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
>      ; GENERIC-LABEL: test_pbroadcastb_ymm:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x
>      ; GENERIC-LABEL: test_pbroadcastd:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
>      ; GENERIC-LABEL: test_pbroadcastd_ymm:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x
>      ; GENERIC-LABEL: test_pbroadcastq:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
>      ; GENERIC-LABEL: test_pbroadcastq_ymm:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x
>      ; GENERIC-LABEL: test_pbroadcastw:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>     @@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
>      ; GENERIC-LABEL: test_pbroadcastw_ymm:
>      ; GENERIC:       # %bb.0:
>      ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
>     -; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
>     +; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]
>      ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>      ; GENERIC-NEXT:    retq # sched: [1:1.00]
>      ;
>
>
>     _______________________________________________
>     llvm-commits mailing list
>     llvm-commits at lists.llvm.org <mailto:llvm-commits at lists.llvm.org>
>     http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>     <http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits>
>
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180314/192b5087/attachment.html>


More information about the llvm-commits mailing list