[LLVMbugs] [Bug 19045] New: loop vectorizer ops lose 'fast' flag

Tue Mar 4 14:51:33 PST 2014

http://llvm.org/bugs/show_bug.cgi?id=19045

            Bug ID: 19045
           Summary: loop vectorizer ops lose 'fast' flag
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Loop Optimizer
          Assignee: unassignedbugs at nondot.org
          Reporter: hfinkel at anl.gov
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified

If I compile this with clang -O3 -ffast-math on x86_64:

float foo(float *restrict s) {
  float q = 0;
  for (int i = 0; i < 1600; ++i)
    q += s[i];

  return q;
}

we get this:

define float @foo(float* noalias nocapture readonly %s) #0 {
entry:
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
  %arrayidx = getelementptr inbounds float* %s, i64 %indvars.iv
  %0 = load float* %arrayidx, align 4
  %add = fadd fast float %q.04, %0
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond = icmp eq i64 %indvars.iv.next, 1600
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  %add.lcssa = phi float [ %add, %for.body ]
  ret float %add.lcssa
}

attributes #0 = { nounwind readonly uwtable }

vectorizing this gives the following:

...
  %8 = fadd <4 x float> %vec.phi, %wide.load
  %9 = fadd <4 x float> %vec.phi9, %wide.load12
...

and for the reduction:

middle.block:                                     ; preds = %vector.body
  %.lcssa14 = phi <4 x float> [ %5, %vector.body ]
  %.lcssa = phi <4 x float> [ %4, %vector.body ]
  %bin.rdx = fadd <4 x float> %.lcssa14, %.lcssa
  %rdx.shuf = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32>
<i32 2, i32 3, i32 undef, i32 undef>
  %bin.rdx10 = fadd <4 x float> %bin.rdx, %rdx.shuf
  %rdx.shuf11 = shufflevector <4 x float> %bin.rdx10, <4 x float> undef, <4 x
i32> <i32 1, i32 undef, i32 undef, i32 undef>
  %bin.rdx12 = fadd <4 x float> %bin.rdx10, %rdx.shuf11
  %7 = extractelement <4 x float> %bin.rdx12, i32 0
  ret float %7

note that these 'fadd' instructions are missing the 'fast' flag, even though
the original reduction operation had the 'fast' flag. So this flag is missing
both on the reduction from the unrolling and on the actual horizontal piece (in
addition to in the loop body).

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20140304/b27efef1/attachment.html>