[llvm] r327524 - [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 14 10:40:23 PDT 2018


Aren't these implemented as basically pure loads on Intel CPUs? They are
fully handled on the load port right?

~Craig

On Wed, Mar 14, 2018 at 8:47 AM, Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: rksimon
> Date: Wed Mar 14 08:47:08 2018
> New Revision: 327524
>
> URL: http://llvm.org/viewvc/llvm-project?rev=327524&view=rev
> Log:
> [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem instructions
>
> They shouldn't be treated as pure loads.
>
> Found while investigating D44428
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86InstrSSE.td
>     llvm/trunk/test/CodeGen/X86/avx-schedule.ll
>     llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/
> X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff
> ============================================================
> ==================
> --- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
> +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Wed Mar 14 08:47:08 2018
> @@ -7482,7 +7482,8 @@ class avx2_broadcast_rr<bits<8> opc, str
>
>  let ExeDomain = SSEPackedSingle, Predicates = [HasAVX, NoVLX] in {
>    def VBROADCASTSSrm  : avx_broadcast_rm<0x18, "vbroadcastss", VR128,
> -                                             f32mem, v4f32, loadf32,
> WriteLoad>;
> +                                             f32mem, v4f32, loadf32,
> +                                             WriteFShuffleLd>;
>    def VBROADCASTSSYrm : avx_broadcast_rm<0x18, "vbroadcastss", VR256,
>                                               f32mem, v8f32, loadf32,
>                                               WriteFShuffleLd>, VEX_L;
> @@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0, Pre
>  def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
>                             (ins i128mem:$src),
>                             "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
> -                           Sched<[WriteLoad]>, VEX, VEX_L;
> +                           Sched<[WriteShuffleLd]>, VEX, VEX_L;
>
>  let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],
>      ExeDomain = SSEPackedSingle in
> @@ -7974,7 +7975,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>                    [(set VR128:$dst,
>                     (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>,
> -                  Sched<[WriteLoad]>, VEX;
> +                  Sched<[WriteShuffleLd]>, VEX;
>      def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
>                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>                     [(set VR256:$dst,
> @@ -7984,7 +7985,7 @@ multiclass avx2_broadcast<bits<8> opc, s
>                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
>                     [(set VR256:$dst,
>                      (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>,
> -                   Sched<[WriteLoad]>, VEX, VEX_L;
> +                   Sched<[WriteShuffleLd]>, VEX, VEX_L;
>
>      // Provide aliases for broadcast from the same register class that
>      // automatically does the extract.
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-schedule.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/X86/avx-schedule.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-schedule.ll Wed Mar 14 08:47:08 2018
> @@ -859,7 +859,7 @@ define <4 x float> @test_broadcastss(flo
>  ;
>  ; BTVER2-LABEL: test_broadcastss:
>  ; BTVER2:       # %bb.0:
> -; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [5:1.00]
> +; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched: [6:1.00]
>  ; BTVER2-NEXT:    retq # sched: [4:1.00]
>  ;
>  ; ZNVER1-LABEL: test_broadcastss:
>
> Modified: llvm/trunk/test/CodeGen/X86/avx2-schedule.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/
> CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff
> ============================================================
> ==================
> --- llvm/trunk/test/CodeGen/X86/avx2-schedule.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx2-schedule.ll Wed Mar 14 08:47:08 2018
> @@ -9,7 +9,7 @@
>  define <8 x i32> @test_broadcasti128(<8 x i32> %a0, <4 x i32> *%a1) {
>  ; GENERIC-LABEL: test_broadcasti128:
>  ; GENERIC:       # %bb.0:
> -; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched:
> [4:0.50]
> +; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 = mem[0,1,0,1] sched:
> [5:1.00]
>  ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -1762,7 +1762,7 @@ define <16 x i8> @test_pbroadcastb(<16 x
>  ; GENERIC-LABEL: test_pbroadcastb:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -1811,7 +1811,7 @@ define <32 x i8> @test_pbroadcastb_ymm(<
>  ; GENERIC-LABEL: test_pbroadcastb_ymm:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -1860,7 +1860,7 @@ define <4 x i32> @test_pbroadcastd(<4 x
>  ; GENERIC-LABEL: test_pbroadcastd:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -1908,7 +1908,7 @@ define <8 x i32> @test_pbroadcastd_ymm(<
>  ; GENERIC-LABEL: test_pbroadcastd_ymm:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -1956,7 +1956,7 @@ define <2 x i64> @test_pbroadcastq(<2 x
>  ; GENERIC-LABEL: test_pbroadcastq:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -2004,7 +2004,7 @@ define <4 x i64> @test_pbroadcastq_ymm(<
>  ; GENERIC-LABEL: test_pbroadcastq_ymm:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -2052,7 +2052,7 @@ define <8 x i16> @test_pbroadcastw(<8 x
>  ; GENERIC-LABEL: test_pbroadcastw:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
> @@ -2101,7 +2101,7 @@ define <16 x i16> @test_pbroadcastw_ymm(
>  ; GENERIC-LABEL: test_pbroadcastw_ymm:
>  ; GENERIC:       # %bb.0:
>  ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched: [1:1.00]
> -; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [4:0.50]
> +; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched: [5:1.00]
>  ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00]
>  ; GENERIC-NEXT:    retq # sched: [1:1.00]
>  ;
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180314/5d1ed356/attachment.html>


More information about the llvm-commits mailing list