[llvm-bugs] [Bug 39460] New: [Loop Vectorizer] Bad code generated for stores in AVX 512

Fri Oct 26 14:18:16 PDT 2018

https://bugs.llvm.org/show_bug.cgi?id=39460

            Bug ID: 39460
           Summary: [Loop Vectorizer] Bad code generated for stores in AVX
                    512
           Product: tools
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: opt
          Assignee: unassignedbugs at nondot.org
          Reporter: anna at azul.com
                CC: llvm-bugs at lists.llvm.org

For the test case below, we generate bad code after vectorization: 

cat repro.ll 
; ModuleID = 'trunk2.ll'
source_filename = "trunk2.ll"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: uwtable
define void @ham() #0 !prof !0 {
bb:
  br label %bb1

bb1:                                              ; preds = %bb1, %bb
  %tmp = phi i64 [ %tmp10, %bb1 ], [ 2, %bb ]
  %tmp2 = lshr exact i64 %tmp, 1
  %tmp3 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)*
addrspace(1)* undef, i64 %tmp2
  store i8 addrspace(1)* undef, i8 addrspace(1)* addrspace(1)* %tmp3, align 8
  %tmp4 = add nuw nsw i64 %tmp, 2
  %tmp5 = lshr exact i64 %tmp4, 1
  %tmp6 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)*
addrspace(1)* undef, i64 %tmp5
  store i8 addrspace(1)* undef, i8 addrspace(1)* addrspace(1)* %tmp6, align 8
  %tmp7 = add nuw nsw i64 %tmp, 4
  %tmp8 = lshr exact i64 %tmp7, 1
  %tmp9 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)*
addrspace(1)* undef, i64 %tmp8
  store i8 addrspace(1)* undef, i8 addrspace(1)* addrspace(1)* %tmp9, align 8
  %tmp10 = add nuw nsw i64 %tmp, 6
  %tmp11 = icmp ult i64 %tmp10, 302
  br i1 %tmp11, label %bb1, label %bb12

bb12:                                             ; preds = %bb1
  unreachable
}

attributes #0 = { uwtable "target-cpu"="skylake-avx512"
"target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,+xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,+clwb,+avx512f,-clzero,+pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,+avx512bw,+clflushopt,+xsave,-avx512vbmi2,+64bit,+avx512vl,+invpcid,+avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,+avx512dq,+adx,-avx512pf,+sse3"
}

!0 = !{!"function_entry_count", i64 32768}

To reproduce on ToT: opt -loop-vectorize repro.ll -S 
--------------------------
The vectorized code contains the following snippet:
%induction = add <8 x i64> %broadcast.splat, <i64 0, i64 6, i64 12, i64 18, i64
24, i64 30, i64 36, i64 42>
  %1 = add i64 %offset.idx, 0
  %2 = lshr exact i64 %1, 1
  %3 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
undef, i64 %2
  %4 = add nuw nsw i64 %1, 2
  %5 = lshr exact i64 %4, 1
  %6 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
undef, i64 %5
  %7 = add nuw nsw i64 %1, 4
  %8 = lshr exact i64 %7, 1
  %9 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
undef, i64 %8
  %10 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)*
%9, i32 -2
  %11 = bitcast i8 addrspace(1)* addrspace(1)* %10 to <24 x i8 addrspace(1)*>
addrspace(1)*
  store <24 x i8 addrspace(1)*> undef, <24 x i8 addrspace(1)*> addrspace(1)*
%11, align 8
  %index.next = add i64 %index, 8
  %12 = icmp eq i64 %index.next, 48

As we can see the VF = 8 for the loop, but we generate <24 x i8 addrspace(1)*>.
We should be generating <8 x i8 addrspace(1)*> vectors instead. 

LV debug shows:
LV: We can vectorize this loop!
LV: The Smallest and Widest types: 64 / 64 bits.
LV: The Widest register safe to use is: 512 bits.
...
LV: Vector loop of width 8 costs: 2.
LV: Selecting VF: 8.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20181026/04bf8ad1/attachment.html>