<html>
  <head>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body text="#000000" bgcolor="#FFFFFF">
    <p>And are overloaded as such by those models - I was trying to get
      them to match what we do for MOVDDUP (a broadcast by another
      name....) which on older cpus passes through load and shuffle
      units.<br>
    </p>
    The relevant Intel models seem to have gone for an exhaustive
    scheduler model, covering most instructions with custom InstrRW;
    while AMD models and older Intel models all rely on the classes a
    lot more.<br>
    <br>
    <div class="moz-cite-prefix">On 14/03/2018 17:40, Craig Topper
      wrote:<br>
    </div>
    <blockquote type="cite"
cite="mid:CAF7ks-NfESgiRzyvQV-L97t=KHyU12QEfz4WOz52UMheeATRTw@mail.gmail.com">
      <div dir="ltr">Aren't these implemented as basically pure loads on
        Intel CPUs? They are fully handled on the load port right?</div>
      <div class="gmail_extra"><br clear="all">
        <div>
          <div class="gmail_signature" data-smartmail="gmail_signature">~Craig</div>
        </div>
        <br>
        <div class="gmail_quote">On Wed, Mar 14, 2018 at 8:47 AM, Simon
          Pilgrim via llvm-commits <span dir="ltr"><<a
              href="mailto:llvm-commits@lists.llvm.org" target="_blank"
              moz-do-not-send="true">llvm-commits@lists.llvm.org</a>></span>
          wrote:<br>
          <blockquote class="gmail_quote" style="margin:0 0 0
            .8ex;border-left:1px #ccc solid;padding-left:1ex">Author:
            rksimon<br>
            Date: Wed Mar 14 08:47:08 2018<br>
            New Revision: 327524<br>
            <br>
            URL: <a
              href="http://llvm.org/viewvc/llvm-project?rev=327524&view=rev"
              rel="noreferrer" target="_blank" moz-do-not-send="true">http://llvm.org/viewvc/llvm-<wbr>project?rev=327524&view=rev</a><br>
            Log:<br>
            [X86][AVX] Use WriteFShuffleLd for broadcast reg-mem
            instructions<br>
            <br>
            They shouldn't be treated as pure loads.<br>
            <br>
            Found while investigating D44428<br>
            <br>
            Modified:<br>
                llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td<br>
                llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll<br>
                llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll<br>
            <br>
            Modified: llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td<br>
            URL: <a
href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=327524&r1=327523&r2=327524&view=diff"
              rel="noreferrer" target="_blank" moz-do-not-send="true">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/Target/<wbr>X86/X86InstrSSE.td?rev=327524&<wbr>r1=327523&r2=327524&view=diff</a><br>
            ==============================<wbr>==============================<wbr>==================<br>
            --- llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td (original)<br>
            +++ llvm/trunk/lib/Target/X86/<wbr>X86InstrSSE.td Wed Mar 14
            08:47:08 2018<br>
            @@ -7482,7 +7482,8 @@ class
            avx2_broadcast_rr<bits<8> opc, str<br>
            <br>
             let ExeDomain = SSEPackedSingle, Predicates = [HasAVX,
            NoVLX] in {<br>
               def VBROADCASTSSrm  : avx_broadcast_rm<0x18,
            "vbroadcastss", VR128,<br>
            -                                             f32mem, v4f32,
            loadf32, WriteLoad>;<br>
            +                                             f32mem, v4f32,
            loadf32,<br>
            +                                           
             WriteFShuffleLd>;<br>
               def VBROADCASTSSYrm : avx_broadcast_rm<0x18,
            "vbroadcastss", VR256,<br>
                                                          f32mem, v8f32,
            loadf32,<br>
                                                         
            WriteFShuffleLd>, VEX_L;<br>
            @@ -7518,7 +7519,7 @@ let mayLoad = 1, hasSideEffects = 0,
            Pre<br>
             def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs
            VR256:$dst),<br>
                                        (ins i128mem:$src),<br>
                                        "vbroadcasti128\t{$src,
            $dst|$dst, $src}", []>,<br>
            -                           Sched<[WriteLoad]>, VEX,
            VEX_L;<br>
            +                           Sched<[WriteShuffleLd]>,
            VEX, VEX_L;<br>
            <br>
             let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX],<br>
                 ExeDomain = SSEPackedSingle in<br>
            @@ -7974,7 +7975,7 @@ multiclass
            avx2_broadcast<bits<8> opc, s<br>
                               !strconcat(OpcodeStr, "\t{$src,
            $dst|$dst, $src}"),<br>
                               [(set VR128:$dst,<br>
                                (OpVT128 (X86VBroadcast (ld_frag
            addr:$src))))]>,<br>
            -                  Sched<[WriteLoad]>, VEX;<br>
            +                  Sched<[WriteShuffleLd]>, VEX;<br>
                 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst),
            (ins VR128:$src),<br>
                                !strconcat(OpcodeStr, "\t{$src,
            $dst|$dst, $src}"),<br>
                                [(set VR256:$dst,<br>
            @@ -7984,7 +7985,7 @@ multiclass
            avx2_broadcast<bits<8> opc, s<br>
                                !strconcat(OpcodeStr, "\t{$src,
            $dst|$dst, $src}"),<br>
                                [(set VR256:$dst,<br>
                                 (OpVT256 (X86VBroadcast (ld_frag
            addr:$src))))]>,<br>
            -                   Sched<[WriteLoad]>, VEX, VEX_L;<br>
            +                   Sched<[WriteShuffleLd]>, VEX,
            VEX_L;<br>
            <br>
                 // Provide aliases for broadcast from the same register
            class that<br>
                 // automatically does the extract.<br>
            <br>
            Modified: llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll<br>
            URL: <a
href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff"
              rel="noreferrer" target="_blank" moz-do-not-send="true">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/avx-schedule.ll?<wbr>rev=327524&r1=327523&r2=<wbr>327524&view=diff</a><br>
            ==============================<wbr>==============================<wbr>==================<br>
            --- llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll
            (original)<br>
            +++ llvm/trunk/test/CodeGen/X86/<wbr>avx-schedule.ll Wed Mar
            14 08:47:08 2018<br>
            @@ -859,7 +859,7 @@ define <4 x float>
            @test_broadcastss(flo<br>
             ;<br>
             ; BTVER2-LABEL: test_broadcastss:<br>
             ; BTVER2:       # %bb.0:<br>
            -; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched:
            [5:1.00]<br>
            +; BTVER2-NEXT:    vbroadcastss (%rdi), %xmm0 # sched:
            [6:1.00]<br>
             ; BTVER2-NEXT:    retq # sched: [4:1.00]<br>
             ;<br>
             ; ZNVER1-LABEL: test_broadcastss:<br>
            <br>
            Modified: llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll<br>
            URL: <a
href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-schedule.ll?rev=327524&r1=327523&r2=327524&view=diff"
              rel="noreferrer" target="_blank" moz-do-not-send="true">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/avx2-schedule.ll?<wbr>rev=327524&r1=327523&r2=<wbr>327524&view=diff</a><br>
            ==============================<wbr>==============================<wbr>==================<br>
            --- llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll
            (original)<br>
            +++ llvm/trunk/test/CodeGen/X86/<wbr>avx2-schedule.ll Wed
            Mar 14 08:47:08 2018<br>
            @@ -9,7 +9,7 @@<br>
             define <8 x i32> @test_broadcasti128(<8 x i32>
            %a0, <4 x i32> *%a1) {<br>
             ; GENERIC-LABEL: test_broadcasti128:<br>
             ; GENERIC:       # %bb.0:<br>
            -; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 =
            mem[0,1,0,1] sched: [4:0.50]<br>
            +; GENERIC-NEXT:    vbroadcasti128 {{.*#+}} ymm1 =
            mem[0,1,0,1] sched: [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # sched:
            [3:1.00]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -1762,7 +1762,7 @@ define <16 x i8>
            @test_pbroadcastb(<16 x<br>
             ; GENERIC-LABEL: test_pbroadcastb:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastb %xmm0, %xmm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastb (%rdi), %xmm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # sched:
            [1:0.50]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -1811,7 +1811,7 @@ define <32 x i8>
            @test_pbroadcastb_ymm(<<br>
             ; GENERIC-LABEL: test_pbroadcastb_ymm:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastb %xmm0, %ymm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastb (%rdi), %ymm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # sched:
            [3:1.00]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -1860,7 +1860,7 @@ define <4 x i32>
            @test_pbroadcastd(<4 x<br>
             ; GENERIC-LABEL: test_pbroadcastd:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastd %xmm0, %xmm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastd (%rdi), %xmm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # sched:
            [1:0.50]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -1908,7 +1908,7 @@ define <8 x i32>
            @test_pbroadcastd_ymm(<<br>
             ; GENERIC-LABEL: test_pbroadcastd_ymm:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastd %xmm0, %ymm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastd (%rdi), %ymm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # sched:
            [3:1.00]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -1956,7 +1956,7 @@ define <2 x i64>
            @test_pbroadcastq(<2 x<br>
             ; GENERIC-LABEL: test_pbroadcastq:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastq %xmm0, %xmm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastq (%rdi), %xmm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # sched:
            [1:0.50]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -2004,7 +2004,7 @@ define <4 x i64>
            @test_pbroadcastq_ymm(<<br>
             ; GENERIC-LABEL: test_pbroadcastq_ymm:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastq %xmm0, %ymm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastq (%rdi), %ymm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # sched:
            [3:1.00]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -2052,7 +2052,7 @@ define <8 x i16>
            @test_pbroadcastw(<8 x<br>
             ; GENERIC-LABEL: test_pbroadcastw:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastw %xmm0, %xmm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastw (%rdi), %xmm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # sched:
            [1:0.50]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            @@ -2101,7 +2101,7 @@ define <16 x i16>
            @test_pbroadcastw_ymm(<br>
             ; GENERIC-LABEL: test_pbroadcastw_ymm:<br>
             ; GENERIC:       # %bb.0:<br>
             ; GENERIC-NEXT:    vpbroadcastw %xmm0, %ymm0 # sched:
            [1:1.00]<br>
            -; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched:
            [4:0.50]<br>
            +; GENERIC-NEXT:    vpbroadcastw (%rdi), %ymm1 # sched:
            [5:1.00]<br>
             ; GENERIC-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # sched:
            [3:1.00]<br>
             ; GENERIC-NEXT:    retq # sched: [1:1.00]<br>
             ;<br>
            <br>
            <br>
            ______________________________<wbr>_________________<br>
            llvm-commits mailing list<br>
            <a href="mailto:llvm-commits@lists.llvm.org"
              moz-do-not-send="true">llvm-commits@lists.llvm.org</a><br>
            <a
              href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits"
              rel="noreferrer" target="_blank" moz-do-not-send="true">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
          </blockquote>
        </div>
        <br>
      </div>
    </blockquote>
    <br>
  </body>
</html>