[llvm-bugs] [Bug 36734] New: llvm.experimental.vector.reduce.{fadd, fmul} incorrect for non-unit accumulators
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed Mar 14 09:15:36 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=36734
Bug ID: 36734
Summary: llvm.experimental.vector.reduce.{fadd,fmul} incorrect
for non-unit accumulators
Product: new-bugs
Version: unspecified
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: gonzalobg88 at gmail.com
CC: chandlerc at gmail.com, hfinkel at anl.gov,
llvm-bugs at lists.llvm.org, llvm-dev at redking.me.uk
This IR:
declare float @llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float, <4 x
float>)
declare float @llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float, <4 x
float>)
define internal float
@_ZN32simd_intrinsic_generic_reduction3foo17ha7e2b586cf5567bdE(<4 x float>*
noalias nocapture dereferenceable(16)) unnamed_addr #0 {
%2 = alloca float, align 4
%3 = load <4 x float>, <4 x float>* %0, align 16
%4 = call fast float
@llvm.experimental.vector.reduce.fadd.f32.f32.v4f32(float -1.000000e+00, <4 x
float> %3)
store float %4, float* %2, align 4
%5 = load float, float* %2, align 4
br label %6
; <label>:6: ; preds = %1
ret float %5
}
define internal float
@_ZN32simd_intrinsic_generic_reduction3bar17he2463f63ae652611E(<4 x float>*
noalias nocapture dereferenceable(16)) unnamed_addr #0 {
%2 = alloca float, align 4
%3 = load <4 x float>, <4 x float>* %0, align 16
%4 = call fast float
@llvm.experimental.vector.reduce.fmul.f32.f32.v4f32(float -1.000000e+00, <4 x
float> %3)
store float %4, float* %2, align 4
%5 = load float, float* %2, align 4
br label %6
; <label>:6: ; preds = %1
ret float %5
}
lowers to:
simd_intrinsic_generic_reduction::foo: # @simd_intrinsic_generic_reduction::foo
movaps xmm0, xmmword ptr [rdi]
movaps xmm1, xmm0
movhlps xmm1, xmm1 # xmm1 = xmm1[1,1]
addps xmm1, xmm0
movaps xmm0, xmm1
shufps xmm0, xmm0, 229 # xmm0 = xmm0[1,1,2,3]
addps xmm0, xmm1
movss dword ptr [rsp - 4], xmm0
ret
simd_intrinsic_generic_reduction::bar: # @simd_intrinsic_generic_reduction::bar
movaps xmm0, xmmword ptr [rdi]
movaps xmm1, xmm0
movhlps xmm1, xmm1 # xmm1 = xmm1[1,1]
mulps xmm1, xmm0
movaps xmm0, xmm1
shufps xmm0, xmm0, 229 # xmm0 = xmm0[1,1,2,3]
mulps xmm0, xmm1
movss dword ptr [rsp - 4], xmm0
ret
which is incorrect for any non unit accumulator (0. for fadd, and 1 for fmul).
For example, here -1 is the accumulator, and for the input (1, -2, 3, 4) this
should produce -1 + 1 - 2 + 3 + 4 = 5, but it produces 6 (it never adds the
accumulator to the result). Basically, these intrinsics only appear to work
correctly for an accumulator value of 0 for add, and 1 for mul...
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20180314/1a24be4b/attachment-0001.html>
More information about the llvm-bugs
mailing list