[llvm-bugs] [Bug 47437] New: No vectorization - should use pmaddwd
via llvm-bugs
llvm-bugs at lists.llvm.org
Sun Sep 6 06:39:43 PDT 2020
https://bugs.llvm.org/show_bug.cgi?id=47437
Bug ID: 47437
Summary: No vectorization - should use pmaddwd
Product: libraries
Version: trunk
Hardware: PC
OS: Linux
Status: NEW
Severity: enhancement
Priority: P
Component: Loop Optimizer
Assignee: unassignedbugs at nondot.org
Reporter: david.bolvansky at gmail.com
CC: llvm-bugs at lists.llvm.org
typedef short int int16_t;
typedef int int32_t;
void
test_muladd (int32_t * restrict d1,
const int16_t * restrict s1,
const int16_t * restrict s2,
int n)
{
int i;
for (i = 0; i < n; i++) {
d1[i] = ((s1[i*2 + 0] * s2[i*2 + 0])
+ (s1[i*2 + 1] * s2[i*2 + 1]));
}
}
Clang -O3:
test_muladd: # @test_muladd
test ecx, ecx
jle .LBB0_7
mov r8d, ecx
cmp ecx, 1
jne .LBB0_3
xor eax, eax
jmp .LBB0_5
.LBB0_3:
mov r9d, r8d
and r9d, -2
xor eax, eax
.LBB0_4: # =>This Inner Loop Header: Depth=1
movsx r10d, word ptr [rsi + 4*rax]
movsx r11d, word ptr [rdx + 4*rax]
imul r11d, r10d
movsx r10d, word ptr [rsi + 4*rax + 2]
movsx ecx, word ptr [rdx + 4*rax + 2]
imul ecx, r10d
add ecx, r11d
mov dword ptr [rdi + 4*rax], ecx
movsx r10d, word ptr [rsi + 4*rax + 4]
movsx r11d, word ptr [rdx + 4*rax + 4]
imul r11d, r10d
movsx r10d, word ptr [rsi + 4*rax + 6]
movsx ecx, word ptr [rdx + 4*rax + 6]
imul ecx, r10d
add ecx, r11d
mov dword ptr [rdi + 4*rax + 4], ecx
add rax, 2
cmp r9, rax
jne .LBB0_4
.LBB0_5:
test r8b, 1
je .LBB0_7
movsx r8d, word ptr [rsi + 4*rax]
movsx ecx, word ptr [rdx + 4*rax]
imul ecx, r8d
movsx esi, word ptr [rsi + 4*rax + 2]
movsx edx, word ptr [rdx + 4*rax + 2]
imul edx, esi
add edx, ecx
mov dword ptr [rdi + 4*rax], edx
.LBB0_7:
ret
ICC 19 produces:
..B1.16: # Preds ..B1.11 ..B1.16
movdqu xmm0, XMMWORD PTR [rsi+rdx*4] #12.15
movdqu xmm1, XMMWORD PTR [16+rsi+rdx*4] #12.15
pmaddwd xmm0, XMMWORD PTR [r8+rdx*4] #12.29
pmaddwd xmm1, XMMWORD PTR [16+r8+rdx*4] #12.29
movdqu XMMWORD PTR [rdi+rdx*4], xmm0 #12.5
movdqu XMMWORD PTR [16+rdi+rdx*4], xmm1 #12.5
add rdx, 8 #11.3
cmp rdx, rcx #11.3
jb ..B1.16
Looking at -Rpass-missed logs, looks like cost model issue.
Godbolt:
https://godbolt.org/z/E8Mf99
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200906/c9f79fa8/attachment-0001.html>
More information about the llvm-bugs
mailing list