[llvm-bugs] [Bug 29114] New: ext_vector_type(3) generates scalar divides

via llvm-bugs llvm-bugs at lists.llvm.org
Tue Aug 23 20:10:22 PDT 2016


https://llvm.org/bugs/show_bug.cgi?id=29114

            Bug ID: 29114
           Summary: ext_vector_type(3) generates scalar divides
           Product: clang
           Version: 3.9
          Hardware: PC
                OS: All
            Status: NEW
          Severity: normal
          Priority: P
         Component: LLVM Codegen
          Assignee: unassignedclangbugs at nondot.org
          Reporter: tim at moomalade.com
                CC: llvm-bugs at lists.llvm.org
    Classification: Unclassified

Created attachment 17034
  --> https://llvm.org/bugs/attachment.cgi?id=17034&action=edit
Example program showing good and bad vector div generation

Performing a scalar divides on an ext_vector_type(3) produces 3 div
instructions, rather than one. Addition, multiplication, subtraction all
produce one instruction.

This can be worked around by converting the value to an ext_vector_type(4),
dividing, then converting back to ext_vector_type(3).

Reproduces on Windows LLVM 3.9 and 4.0 snapshot and OSX with XCode clang 7.3.0.

Example: 
This code will generate 3 divide instructions.

typedef float vec3 __attribute__((ext_vector_type(3)));
vec3 vec3_divide_bad(vec3 v, float d) {
  return v / d;
}

/*
a.out`vec3_divide_bad:
a.out[0x100000f60] <+0>:  push   rbp
a.out[0x100000f61] <+1>:  mov    rbp, rsp
a.out[0x100000f64] <+4>:  movshdup xmm2, xmm0                ; xmm2 =
xmm0[1,1,3,3]
a.out[0x100000f68] <+8>:  divss  xmm2, xmm1
a.out[0x100000f6c] <+12>: movaps xmm3, xmm0
a.out[0x100000f6f] <+15>: shufpd xmm3, xmm3, 0x1           ; xmm3 = xmm3[1,0]
a.out[0x100000f74] <+20>: divss  xmm3, xmm1
a.out[0x100000f78] <+24>: divss  xmm0, xmm1
a.out[0x100000f7c] <+28>: unpcklps xmm0, xmm3                ; xmm0 =
xmm0[0],xmm3[0],xmm0[1],xmm3[1]
a.out[0x100000f7f] <+31>: unpcklps xmm0, xmm2                ; xmm0 =
xmm0[0],xmm2[0],xmm0[1],xmm2[1]
a.out[0x100000f82] <+34>: pop    rbp
a.out[0x100000f83] <+35>: ret
a.out[0x100000f84] <+36>: nop    word ptr cs:[rax + rax]
*/

While this code will generate one divide instruction:

typedef float vec3 __attribute__((ext_vector_type(3)));
typedef float vec4 __attribute__((ext_vector_type(4)));
vec3 vec3_divide_good(vec3 v, float d) {
  return ((vec4){v.x,v.y,v.z,0.0f} / d).xyz;
}


/*
a.out`vec3_divide_good:
a.out[0x100000f90] <+0>:  push   rbp
a.out[0x100000f91] <+1>:  mov    rbp, rsp
a.out[0x100000f94] <+4>:  xorps  xmm2, xmm2
a.out[0x100000f97] <+7>:  shufps xmm2, xmm0, 0x23          ; xmm2 =
xmm2[3,0],xmm0[2,0]
a.out[0x100000f9b] <+11>: shufps xmm0, xmm2, 0x24          ; xmm0 =
xmm0[0,1],xmm2[2,0]
a.out[0x100000f9f] <+15>: shufps xmm1, xmm1, 0x0           ; xmm1 =
xmm1[0,0,0,0]
a.out[0x100000fa3] <+19>: divps  xmm0, xmm1
a.out[0x100000fa6] <+22>: pop    rbp
a.out[0x100000fa7] <+23>: ret
a.out[0x100000fa8] <+24>: nop    dword ptr [rax + rax]
*/

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20160824/3464b0f1/attachment.html>


More information about the llvm-bugs mailing list