<html>
    <head>
      <base href="https://bugs.llvm.org/">
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW - @llvm.maxnum creates very inefficient code for skylake-avx512"
   href="https://bugs.llvm.org/show_bug.cgi?id=40984">40984</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>@llvm.maxnum creates very inefficient code for skylake-avx512
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>libraries
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>trunk
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>enhancement
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>Backend: X86
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>schnetter@gmail.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>craig.topper@gmail.com, llvm-bugs@lists.llvm.org, llvm-dev@redking.me.uk, spatel+llvm@rotateright.com
          </td>
        </tr></table>
      <p>
        <div>
        <pre>@llvm_maxnum with vector arguments creates very inefficient code on AVX512
architectures. It creates good code for AVX2, using the ymm registers. However,
on AVX512, it falls back to using xmm registers instead of zmm registers.

This is an example (see also <<a href="https://godbolt.org/z/oGnYqO">https://godbolt.org/z/oGnYqO</a>>):



declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>)

;  @ /home/eschnetter/.julia/packages/SIMD/5ugK9/src/SIMD.jl:1012 within `max'
define void @julia_max_12430({ <16 x float> }* noalias nocapture sret, { <16 x
float> } addrspace(11)* nocapture nonnull readonly dereferenceable(64), { <16 x
float> } addrspace(11)* nocapture nonnull readonly dereferenceable(64)) {
top:
; ┌ @ /home/eschnetter/.julia/packages/SIMD/5ugK9/src/SIMD.jl:538 within
`llvmwrap' @ /home/eschnetter/.julia/packages/SIMD/5ugK9/src/SIMD.jl:538
; │┌ @ /home/eschnetter/.julia/packages/SIMD/5ugK9/src/SIMD.jl:557 within
`macro expansion'
; ││┌ @ sysimg.jl:18 within `getproperty'
     %3 = getelementptr inbounds { <16 x float> }, { <16 x float> }
addrspace(11)* %1, i64 0, i32 0
     %4 = getelementptr inbounds { <16 x float> }, { <16 x float> }
addrspace(11)* %2, i64 0, i32 0
; ││└
    %5 = load <16 x float>, <16 x float> addrspace(11)* %3, align 16
    %6 = load <16 x float>, <16 x float> addrspace(11)* %4, align 16
    %res.i = call <16 x float> @llvm.maxnum.v16f32(<16 x float> %5, <16 x
float> %6)
; └└
  %.sroa.0.0..sroa_idx = getelementptr inbounds { <16 x float> }, { <16 x
float> }* %0, i64 0, i32 0
  store <16 x float> %res.i, <16 x float>* %.sroa.0.0..sroa_idx, align 64
  ret void
}</pre>
        </div>
      </p>


      <hr>
      <span>You are receiving this mail because:</span>

      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>