<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><span class="vcard"><a class="email" href="mailto:lebedev.ri@gmail.com" title="Roman Lebedev <lebedev.ri@gmail.com>"> <span class="fn">Roman Lebedev</span></a>
</span> changed
<a class="bz_bug_link
bz_status_RESOLVED bz_closed"
title="RESOLVED FIXED - Failure to convert 'sub' reduction to negated 'add' reduction"
href="https://bugs.llvm.org/show_bug.cgi?id=49858">bug 49858</a>
<br>
<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>What</th>
<th>Removed</th>
<th>Added</th>
</tr>
<tr>
<td style="text-align:right;">Status</td>
<td>NEW
</td>
<td>RESOLVED
</td>
</tr>
<tr>
<td style="text-align:right;">Fixed By Commit(s)</td>
<td>
</td>
<td>31d219d2997fed1b7dc97e0adf170d5aaf65883e
</td>
</tr>
<tr>
<td style="text-align:right;">Resolution</td>
<td>---
</td>
<td>FIXED
</td>
</tr></table>
<p>
<div>
<b><a class="bz_bug_link
bz_status_RESOLVED bz_closed"
title="RESOLVED FIXED - Failure to convert 'sub' reduction to negated 'add' reduction"
href="https://bugs.llvm.org/show_bug.cgi?id=49858#c2">Comment # 2</a>
on <a class="bz_bug_link
bz_status_RESOLVED bz_closed"
title="RESOLVED FIXED - Failure to convert 'sub' reduction to negated 'add' reduction"
href="https://bugs.llvm.org/show_bug.cgi?id=49858">bug 49858</a>
from <span class="vcard"><a class="email" href="mailto:lebedev.ri@gmail.com" title="Roman Lebedev <lebedev.ri@gmail.com>"> <span class="fn">Roman Lebedev</span></a>
</span></b>
<pre>Fixed by instcombine transform in 31d219d2997fed1b7dc97e0adf170d5aaf65883e.
We end with expected vectorization/assembly afterwards:
$ ./bin/opt -O3 /tmp/test.ll -S
; ModuleID = '/tmp/test.ll'
source_filename = "./example.cpp"
target datalayout =
"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: norecurse nounwind readonly uwtable willreturn mustprogress
define dso_local i32 @_Z5sub32PKi(i32* nocapture readonly %0)
local_unnamed_addr #0 {
%2 = bitcast i32* %0 to <16 x i32>*
%3 = load <16 x i32>, <16 x i32>* %2, align 4, !tbaa !2
%4 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %3)
%5 = sub i32 0, %4
ret i32 %5
}
; Function Attrs: norecurse nounwind readonly uwtable willreturn mustprogress
define dso_local i32 @_Z5sub32PKii(i32* nocapture readonly %0, i32 %1)
local_unnamed_addr #0 {
%3 = bitcast i32* %0 to <16 x i32>*
%4 = load <16 x i32>, <16 x i32>* %3, align 4, !tbaa !2
%5 = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %4)
%6 = sub i32 %1, %5
ret i32 %6
}
; Function Attrs: nofree nosync nounwind readnone willreturn
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) #1
attributes #0 = { norecurse nounwind readonly uwtable willreturn mustprogress
"frame-pointer"="none" "no-trapping-math"="true"
"stack-protector-buffer-size"="8" "target-cpu"="haswell"
"target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
}
attributes #1 = { nofree nosync nounwind readnone willreturn }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{!"clang version 13.0.0 (<a href="https://github.com/llvm/llvm-project.git">https://github.com/llvm/llvm-project.git</a>
30b3aab3299a1b6e4e262866e88f0aac0ecdee09)"}
!2 = !{!3, !3, i64 0}
!3 = !{!"int", !4, i64 0}
!4 = !{!"omnipotent char", !5, i64 0}
!5 = !{!"Simple C++ TBAA"}
$ ./bin/opt -O3 /tmp/test.ll -S | ./bin/llc -
.text
.file "example.cpp"
.globl _Z5sub32PKi # -- Begin function _Z5sub32PKi
.p2align 4, 0x90
.type _Z5sub32PKi,@function
_Z5sub32PKi: # @_Z5sub32PKi
.cfi_startproc
# %bb.0:
vmovdqu (%rdi), %ymm0
vpaddd 32(%rdi), %ymm0, %ymm0
vextracti128 $1, %ymm0, %xmm1
vpaddd %xmm1, %xmm0, %xmm0
vpshufd $238, %xmm0, %xmm1 # xmm1 = xmm0[2,3,2,3]
vpaddd %xmm1, %xmm0, %xmm0
vpshufd $85, %xmm0, %xmm1 # xmm1 = xmm0[1,1,1,1]
vpaddd %xmm1, %xmm0, %xmm0
vmovd %xmm0, %eax
negl %eax
vzeroupper
retq
.Lfunc_end0:
.size _Z5sub32PKi, .Lfunc_end0-_Z5sub32PKi
.cfi_endproc
# -- End function
.globl _Z5sub32PKii # -- Begin function
_Z5sub32PKii
.p2align 4, 0x90
.type _Z5sub32PKii,@function
_Z5sub32PKii: # @_Z5sub32PKii
.cfi_startproc
# %bb.0:
movl %esi, %eax
vmovdqu (%rdi), %ymm0
vpaddd 32(%rdi), %ymm0, %ymm0
vextracti128 $1, %ymm0, %xmm1
vpaddd %xmm1, %xmm0, %xmm0
vpshufd $238, %xmm0, %xmm1 # xmm1 = xmm0[2,3,2,3]
vpaddd %xmm1, %xmm0, %xmm0
vpshufd $85, %xmm0, %xmm1 # xmm1 = xmm0[1,1,1,1]
vpaddd %xmm1, %xmm0, %xmm0
vmovd %xmm0, %ecx
subl %ecx, %eax
vzeroupper
retq
.Lfunc_end1:
.size _Z5sub32PKii, .Lfunc_end1-_Z5sub32PKii
.cfi_endproc
# -- End function
.ident "clang version 13.0.0 (<a href="https://github.com/llvm/llvm-project.git">https://github.com/llvm/llvm-project.git</a>
30b3aab3299a1b6e4e262866e88f0aac0ecdee09)"
.section ".note.GNU-stack","",@progbits</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>