<html>
    <head>
      <base href="https://llvm.org/bugs/" />
    </head>
    <body><table border="1" cellspacing="0" cellpadding="8">
        <tr>
          <th>Bug ID</th>
          <td><a class="bz_bug_link 
          bz_status_NEW "
   title="NEW --- - ext_vector_type(3) generates scalar divides"
   href="https://llvm.org/bugs/show_bug.cgi?id=29114">29114</a>
          </td>
        </tr>

        <tr>
          <th>Summary</th>
          <td>ext_vector_type(3) generates scalar divides
          </td>
        </tr>

        <tr>
          <th>Product</th>
          <td>clang
          </td>
        </tr>

        <tr>
          <th>Version</th>
          <td>3.9
          </td>
        </tr>

        <tr>
          <th>Hardware</th>
          <td>PC
          </td>
        </tr>

        <tr>
          <th>OS</th>
          <td>All
          </td>
        </tr>

        <tr>
          <th>Status</th>
          <td>NEW
          </td>
        </tr>

        <tr>
          <th>Severity</th>
          <td>normal
          </td>
        </tr>

        <tr>
          <th>Priority</th>
          <td>P
          </td>
        </tr>

        <tr>
          <th>Component</th>
          <td>LLVM Codegen
          </td>
        </tr>

        <tr>
          <th>Assignee</th>
          <td>unassignedclangbugs@nondot.org
          </td>
        </tr>

        <tr>
          <th>Reporter</th>
          <td>tim@moomalade.com
          </td>
        </tr>

        <tr>
          <th>CC</th>
          <td>llvm-bugs@lists.llvm.org
          </td>
        </tr>

        <tr>
          <th>Classification</th>
          <td>Unclassified
          </td>
        </tr></table>
      <p>
        <div>
        <pre>Created <span class=""><a href="attachment.cgi?id=17034" name="attach_17034" title="Example program showing good and bad vector div generation">attachment 17034</a> <a href="attachment.cgi?id=17034&action=edit" title="Example program showing good and bad vector div generation">[details]</a></span>
Example program showing good and bad vector div generation

Performing a scalar divides on an ext_vector_type(3) produces 3 div
instructions, rather than one. Addition, multiplication, subtraction all
produce one instruction.

This can be worked around by converting the value to an ext_vector_type(4),
dividing, then converting back to ext_vector_type(3).

Reproduces on Windows LLVM 3.9 and 4.0 snapshot and OSX with XCode clang 7.3.0.

Example: 
This code will generate 3 divide instructions.

typedef float vec3 __attribute__((ext_vector_type(3)));
vec3 vec3_divide_bad(vec3 v, float d) {
  return v / d;
}

/*
a.out`vec3_divide_bad:
a.out[0x100000f60] <+0>:  push   rbp
a.out[0x100000f61] <+1>:  mov    rbp, rsp
a.out[0x100000f64] <+4>:  movshdup xmm2, xmm0                ; xmm2 =
xmm0[1,1,3,3]
a.out[0x100000f68] <+8>:  divss  xmm2, xmm1
a.out[0x100000f6c] <+12>: movaps xmm3, xmm0
a.out[0x100000f6f] <+15>: shufpd xmm3, xmm3, 0x1           ; xmm3 = xmm3[1,0]
a.out[0x100000f74] <+20>: divss  xmm3, xmm1
a.out[0x100000f78] <+24>: divss  xmm0, xmm1
a.out[0x100000f7c] <+28>: unpcklps xmm0, xmm3                ; xmm0 =
xmm0[0],xmm3[0],xmm0[1],xmm3[1]
a.out[0x100000f7f] <+31>: unpcklps xmm0, xmm2                ; xmm0 =
xmm0[0],xmm2[0],xmm0[1],xmm2[1]
a.out[0x100000f82] <+34>: pop    rbp
a.out[0x100000f83] <+35>: ret
a.out[0x100000f84] <+36>: nop    word ptr cs:[rax + rax]
*/

While this code will generate one divide instruction:

typedef float vec3 __attribute__((ext_vector_type(3)));
typedef float vec4 __attribute__((ext_vector_type(4)));
vec3 vec3_divide_good(vec3 v, float d) {
  return ((vec4){v.x,v.y,v.z,0.0f} / d).xyz;
}


/*
a.out`vec3_divide_good:
a.out[0x100000f90] <+0>:  push   rbp
a.out[0x100000f91] <+1>:  mov    rbp, rsp
a.out[0x100000f94] <+4>:  xorps  xmm2, xmm2
a.out[0x100000f97] <+7>:  shufps xmm2, xmm0, 0x23          ; xmm2 =
xmm2[3,0],xmm0[2,0]
a.out[0x100000f9b] <+11>: shufps xmm0, xmm2, 0x24          ; xmm0 =
xmm0[0,1],xmm2[2,0]
a.out[0x100000f9f] <+15>: shufps xmm1, xmm1, 0x0           ; xmm1 =
xmm1[0,0,0,0]
a.out[0x100000fa3] <+19>: divps  xmm0, xmm1
a.out[0x100000fa6] <+22>: pop    rbp
a.out[0x100000fa7] <+23>: ret
a.out[0x100000fa8] <+24>: nop    dword ptr [rax + rax]
*/</pre>
        </div>
      </p>
      <hr>
      <span>You are receiving this mail because:</span>
      
      <ul>
          <li>You are on the CC list for the bug.</li>
      </ul>
    </body>
</html>