[llvm-bugs] [Bug 39460] New: [Loop Vectorizer] Bad code generated for stores in AVX 512
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri Oct 26 14:18:16 PDT 2018
https://bugs.llvm.org/show_bug.cgi?id=39460
Bug ID: 39460
Summary: [Loop Vectorizer] Bad code generated for stores in AVX
512
Product: tools
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: opt
Assignee: unassignedbugs at nondot.org
Reporter: anna at azul.com
CC: llvm-bugs at lists.llvm.org
For the test case below, we generate bad code after vectorization:
cat repro.ll
; ModuleID = 'trunk2.ll'
source_filename = "trunk2.ll"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: uwtable
define void @ham() #0 !prof !0 {
bb:
br label %bb1
bb1: ; preds = %bb1, %bb
%tmp = phi i64 [ %tmp10, %bb1 ], [ 2, %bb ]
%tmp2 = lshr exact i64 %tmp, 1
%tmp3 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)*
addrspace(1)* undef, i64 %tmp2
store i8 addrspace(1)* undef, i8 addrspace(1)* addrspace(1)* %tmp3, align 8
%tmp4 = add nuw nsw i64 %tmp, 2
%tmp5 = lshr exact i64 %tmp4, 1
%tmp6 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)*
addrspace(1)* undef, i64 %tmp5
store i8 addrspace(1)* undef, i8 addrspace(1)* addrspace(1)* %tmp6, align 8
%tmp7 = add nuw nsw i64 %tmp, 4
%tmp8 = lshr exact i64 %tmp7, 1
%tmp9 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)*
addrspace(1)* undef, i64 %tmp8
store i8 addrspace(1)* undef, i8 addrspace(1)* addrspace(1)* %tmp9, align 8
%tmp10 = add nuw nsw i64 %tmp, 6
%tmp11 = icmp ult i64 %tmp10, 302
br i1 %tmp11, label %bb1, label %bb12
bb12: ; preds = %bb1
unreachable
}
attributes #0 = { uwtable "target-cpu"="skylake-avx512"
"target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,+xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,+clwb,+avx512f,-clzero,+pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,+avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,+avx512vl,+invpcid,+avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,+avx512dq,+adx,-avx512pf,+sse3"
}
!0 = !{!"function_entry_count", i64 32768}
To reproduce on ToT: opt -loop-vectorize repro.ll -S
--------------------------
The vectorized code contains the following snippet:
%induction = add <8 x i64> %broadcast.splat, <i64 0, i64 6, i64 12, i64 18, i64
24, i64 30, i64 36, i64 42>
%1 = add i64 %offset.idx, 0
%2 = lshr exact i64 %1, 1
%3 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
undef, i64 %2
%4 = add nuw nsw i64 %1, 2
%5 = lshr exact i64 %4, 1
%6 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
undef, i64 %5
%7 = add nuw nsw i64 %1, 4
%8 = lshr exact i64 %7, 1
%9 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
undef, i64 %8
%10 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
%9, i32 -2
%11 = bitcast i8 addrspace(1)* addrspace(1)* %10 to <24 x i8 addrspace(1)*>
addrspace(1)*
store <24 x i8 addrspace(1)*> undef, <24 x i8 addrspace(1)*> addrspace(1)*
%11, align 8
%index.next = add i64 %index, 8
%12 = icmp eq i64 %index.next, 48
As we can see the VF = 8 for the loop, but we generate <24 x i8 addrspace(1)*>.
We should be generating <8 x i8 addrspace(1)*> vectors instead.
LV debug shows:
LV: We can vectorize this loop!
LV: The Smallest and Widest types: 64 / 64 bits.
LV: The Widest register safe to use is: 512 bits.
...
LV: Vector loop of width 8 costs: 2.
LV: Selecting VF: 8.
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20181026/04bf8ad1/attachment.html>
More information about the llvm-bugs
mailing list