<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Failure to multiply 'pow2 +/-1' vectors with shl+add/sub"
href="https://bugs.llvm.org/show_bug.cgi?id=34474">34474</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Failure to multiply 'pow2 +/-1' vectors with shl+add/sub
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Windows NT
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Common Code Generator Code
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>llvm-dev@redking.me.uk
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org, spatel+llvm@rotateright.com
</td>
</tr></table>
<p>
<div>
<pre>For cases where we don't have a legal multiply this is a definite win, and it
might be still a win for other cases by avoiding slow multiplies/constant
loads.
I'm most interested in the x86/SSE splat cases but a common solution might be
useful as well.
#include <stdint.h>
#include <x86intrin.h>
int64_t mul7(int64_t x) {
return x * 7;
}
int64_t mul17(int64_t x) {
return x * 17;
}
__m128i mul7(__m128i x) {
return x * (__v2di) {7,7};
}
__m128i mul17(__m128i x) {
return x * (__v2di) {17,17};
}
IR:
define i64 @mul7(i64){
%2 = mul nsw i64 %0, 7
ret i64 %2
}
define i64 @mul17(i64) {
%2 = mul nsw i64 %0, 17
ret i64 %2
}
define <2 x i64> @mul7(<2 x i64>) {
%2 = mul <2 x i64> %0, <i64 7, i64 7>
ret <2 x i64> %2
}
define <2 x i64> @mul17(<2 x i64>) {
%2 = mul <2 x i64> %0, <i64 17, i64 17>
ret <2 x i64> %2
}
llc -mcpu=btver2
mul7(long):
leaq (,%rdi,8), %rax
subq %rdi, %rax
retq
mul17(long):
movq %rdi, %rax
shlq $4, %rax
leaq (%rax,%rdi), %rax
retq
.LCPI2_0:
.quad 7 # 0x7
.quad 7 # 0x7
mul7(long long __vector(2)):
vmovdqa .LCPI2_0(%rip), %xmm1 # xmm1 = [7,7]
vpmuludq %xmm1, %xmm0, %xmm2
vpsrlq $32, %xmm0, %xmm0
vpmuludq %xmm1, %xmm0, %xmm0
vpsllq $32, %xmm0, %xmm0
vpaddq %xmm0, %xmm2, %xmm0
retq
.LCPI3_0:
.quad 17 # 0x11
.quad 17 # 0x11
mul17(long long __vector(2)):
vmovdqa .LCPI3_0(%rip), %xmm1 # xmm1 = [17,17]
vpmuludq %xmm1, %xmm0, %xmm2
vpsrlq $32, %xmm0, %xmm0
vpmuludq %xmm1, %xmm0, %xmm0
vpsllq $32, %xmm0, %xmm0
vpaddq %xmm0, %xmm2, %xmm0
retq</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>