<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - No vectorization - should use pmaddwd"
href="https://bugs.llvm.org/show_bug.cgi?id=47437">47437</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>No vectorization - should use pmaddwd
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Linux
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Loop Optimizer
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>david.bolvansky@gmail.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>typedef short int int16_t;
typedef int int32_t;
void
test_muladd (int32_t * restrict d1,
const int16_t * restrict s1,
const int16_t * restrict s2,
int n)
{
int i;
for (i = 0; i < n; i++) {
d1[i] = ((s1[i*2 + 0] * s2[i*2 + 0])
+ (s1[i*2 + 1] * s2[i*2 + 1]));
}
}
Clang -O3:
test_muladd: # @test_muladd
test ecx, ecx
jle .LBB0_7
mov r8d, ecx
cmp ecx, 1
jne .LBB0_3
xor eax, eax
jmp .LBB0_5
.LBB0_3:
mov r9d, r8d
and r9d, -2
xor eax, eax
.LBB0_4: # =>This Inner Loop Header: Depth=1
movsx r10d, word ptr [rsi + 4*rax]
movsx r11d, word ptr [rdx + 4*rax]
imul r11d, r10d
movsx r10d, word ptr [rsi + 4*rax + 2]
movsx ecx, word ptr [rdx + 4*rax + 2]
imul ecx, r10d
add ecx, r11d
mov dword ptr [rdi + 4*rax], ecx
movsx r10d, word ptr [rsi + 4*rax + 4]
movsx r11d, word ptr [rdx + 4*rax + 4]
imul r11d, r10d
movsx r10d, word ptr [rsi + 4*rax + 6]
movsx ecx, word ptr [rdx + 4*rax + 6]
imul ecx, r10d
add ecx, r11d
mov dword ptr [rdi + 4*rax + 4], ecx
add rax, 2
cmp r9, rax
jne .LBB0_4
.LBB0_5:
test r8b, 1
je .LBB0_7
movsx r8d, word ptr [rsi + 4*rax]
movsx ecx, word ptr [rdx + 4*rax]
imul ecx, r8d
movsx esi, word ptr [rsi + 4*rax + 2]
movsx edx, word ptr [rdx + 4*rax + 2]
imul edx, esi
add edx, ecx
mov dword ptr [rdi + 4*rax], edx
.LBB0_7:
ret
ICC 19 produces:
..B1.16: # Preds ..B1.11 ..B1.16
movdqu xmm0, XMMWORD PTR [rsi+rdx*4] #12.15
movdqu xmm1, XMMWORD PTR [16+rsi+rdx*4] #12.15
pmaddwd xmm0, XMMWORD PTR [r8+rdx*4] #12.29
pmaddwd xmm1, XMMWORD PTR [16+r8+rdx*4] #12.29
movdqu XMMWORD PTR [rdi+rdx*4], xmm0 #12.5
movdqu XMMWORD PTR [16+rdi+rdx*4], xmm1 #12.5
add rdx, 8 #11.3
cmp rdx, rcx #11.3
jb ..B1.16
Looking at -Rpass-missed logs, looks like cost model issue.
Godbolt:
<a href="https://godbolt.org/z/E8Mf99">https://godbolt.org/z/E8Mf99</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>