[llvm-bugs] [Bug 34438] New: LoopVectorizer bails out when ShortTripCount=8, MaxVF=16

via llvm-bugs llvm-bugs at lists.llvm.org
Sun Sep 3 06:40:17 PDT 2017


https://bugs.llvm.org/show_bug.cgi?id=34438

            Bug ID: 34438
           Summary: LoopVectorizer bails out when ShortTripCount=8,
                    MaxVF=16
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: zvi.rackover at intel.com
                CC: llvm-bugs at lists.llvm.org

For this case:

 target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"

 ; This loop will be vectorized as the trip count is below the threshold but no
 ; scalar iterations are needed. This should work also when MaxVL > TripCount
 ;
 define void @small_tc(float* noalias nocapture %A, float* noalias nocapture
readonly %B) {
 entry:
   br label %for.body

 for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %arrayidx = getelementptr inbounds float, float* %B, i64 %indvars.iv
   %0 = load float, float* %arrayidx, align 4, !llvm.mem.parallel_loop_access
!3
   %arrayidx2 = getelementptr inbounds float, float* %A, i64 %indvars.iv
   %1 = load float, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access
!3
   %add = fadd fast float %0, %1
   store float %add, float* %arrayidx2, align 4, !llvm.mem.parallel_loop_access
!3
   %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
   %exitcond = icmp eq i64 %indvars.iv.next, 8
   br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !4

 for.end:
   ret void
 }

 !3 = !{!3}
 !4 = !{!4}


The loop is vectorized with:
opt -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=core-i7
(ShortTripCount=8, VF=8)

The loop is not vectorized with:
(ShortTripCount=8, VF=16)
opt -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=skx

It appears that we are bailing out here:
*********************************************
  if (TC % MaxVF != 0) {
     // If the trip count that we found modulo the vectorization factor is not
     // zero then we require a tail.
     // FIXME: look for a smaller MaxVF that does divide TC rather than give
up.
     // FIXME: return None if loop requiresScalarEpilog(<MaxVF>), or look for a
     //        smaller MaxVF that does not require a scalar epilog.

    ...

     return None;
   }
***********************************************
Looks like the first FIXME is the solution.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170903/23f7aa4c/attachment.html>


More information about the llvm-bugs mailing list