[llvm] r327524 - [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 14 14:06:29 PDT 2018


Does AMD implement them as shuffles or plain loads? I guess I'm really
wondering if they should be classified as their own thing if their
implementation varies between Intel and AMD. Pretending for the moment that
there is some future world where we don't just override everything for
Intel CPUs.

~Craig

On Wed, Mar 14, 2018 at 2:00 PM, Simon Pilgrim <llvm-dev at redking.me.uk>
wrote:

> And are overloaded as such by those models - I was trying to get them to
> match what we do for MOVDDUP (a broadcast by another name....) which on
> older cpus passes through load and shuffle units.
> The relevant Intel models seem to have gone for an exhaustive scheduler
> model, covering most instructions with custom InstrRW; while AMD models and
> older Intel models all rely on the classes a lot more.
>
> On 14/03/2018 17:40, Craig Topper wrote:
>
> Aren't these implemented as basically pure loads on Intel CPUs? They are
> fully handled on the load port right?
>
> ~Craig
>
> On Wed, Mar 14, 2018 at 8:47 AM, Simon Pilgrim via llvm-commits <
> llvm-commits at lists.llvm.org> wrote:
>
>> Author: rksimon
>> Date: Wed Mar 14 08:47:08 2018
>> New Revision: 327524
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=327524&view=rev
>> Log:
>> [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions
>>
>> They shouldn't be treated as pure loads.
>>
>> Found while investigating D44428
>>
>> Modified:
>>     llvm/trunk/lib/Target/X86/X86InstrSSE.td
>>     llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>>     llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>>
>> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X8
>> 6/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
>> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Mar 14 08:47:08 2018
>> @@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str
>>
>>  let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
>>    def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
>> -                                             f32mem, v4f32, loadf32,
>> WriteLoad>;
>> +                                             f32mem, v4f32, loadf32,
>> +                                             WriteFShuffleLd>;
>>    def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
>>                                               f32mem, v8f32, loadf32,
>>                                               WriteFShuffleLd>, VEX_L;
>> @@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre
>>  def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
>>                             (ins i128mem:$src),
>>                             "vbroadcasti128\t{$src, $dst|$dst, $src}",
>> []>,
>> -                           Sched<[WriteLoad]>, VEX, VEX_L;
>> +                           Sched<[WriteShuffleLd]>, VEX, VEX_L;
>>
>>  let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
>>      ExeDomain = SSEPackedSingle in
>> @@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>>                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>>                    [(set VR128:$dst,
>>                     (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
>> -                  Sched<[WriteLoad]>, VEX;
>> +                  Sched<[WriteShuffleLd]>, VEX;
>>      def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
>>                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>>                     [(set VR256:$dst,
>> @@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>>                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>>                     [(set VR256:$dst,
>>                      (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
>> -                   Sched<[WriteLoad]>, VEX, VEX_L;
>> +                   Sched<[WriteShuffleLd]>, VEX, VEX_L;
>>
>>      // Provide aliases for broadcast from the same register class that
>>      // automatically does the extract.
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Wed Mar 14 08:47:08 2018
>> @@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo
>>  ;
>>  ; BTVER2-LABEL: test_broadcastss:
>>  ; BTVER2:       # %bb.0:
>> -; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
>> +; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
>>  ; BTVER2-NEXT:    retq # sched: [4:1.00]
>>  ;
>>  ; ZNVER1-LABEL: test_broadcastss:
>>
>> Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Mar 14 08:47:08 2018
>> @@ -9,7 +9,7 @@
>>  define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
>>  ; GENERIC-LABEL: test_broadcasti128:
>>  ; GENERIC:       # %bb.0:
>> -; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched:
>> [4:0.50]
>> +; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched:
>> [5:1.00]
>>  ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x
>>  ; GENERIC-LABEL: test_pbroadcastb:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
>>  ; GENERIC-LABEL: test_pbroadcastb_ymm:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x
>>  ; GENERIC-LABEL: test_pbroadcastd:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
>>  ; GENERIC-LABEL: test_pbroadcastd_ymm:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x
>>  ; GENERIC-LABEL: test_pbroadcastq:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
>>  ; GENERIC-LABEL: test_pbroadcastq_ymm:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x
>>  ; GENERIC-LABEL: test_pbroadcastw:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>> @@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
>>  ; GENERIC-LABEL: test_pbroadcastw_ymm:
>>  ; GENERIC:       # %bb.0:
>>  ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
>> -; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
>> +; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]
>>  ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>>  ;
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180314/c3163405/attachment.html>


More information about the llvm-commits mailing list