<html>

    <head>

      <base href="https://llvm.org/bugs/" />

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - failure to convert FP addition loop into multiplication with fast-math"

   href="https://llvm.org/bugs/show_bug.cgi?id=27894">27894</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>failure to convert FP addition loop into multiplication with fast-math

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>All

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>normal

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Loop Optimizer

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>spatel+llvm@rotateright.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr>

        <tr>

          <th>Classification</th>

          <td>Unclassified

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Another example in the loop vectorizer code explosion series (<a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - Code explosion when LoopVectorize vectorizes a loop with a stride of 8 floats, part 2"

   href="show_bug.cgi?id=27881">bug 27881</a>, <a class="bz_bug_link 

          bz_status_RESOLVED  bz_closed"

   title="RESOLVED FIXED - Code explosion when LoopVectorize vectorizes a loop with a stride of 8 floats"

   href="show_bug.cgi?id=27826">bug

27826</a>) can be seen with:

float multiply_the_hard_way(int n) {

  float sum = 0.0f;

  for (int i=0; i<n; i++)

    sum += 7.0f;

  return sum;

}

------------------------------------------------------------------------------

This may look ridiculous in simplified form, but the problem could easily exist

in real code or slightly more real code like <a class="bz_bug_link 

          bz_status_NEW "

   title="NEW --- - Code explosion when LoopVectorize vectorizes a loop with a stride of 8 floats, part 2"

   href="show_bug.cgi?id=27881">bug 27881</a>.

This may be considered a bug before it ever gets to the vectorizer as discussed

recently here:

<a href="http://lists.llvm.org/pipermail/llvm-dev/2016-May/099724.html">http://lists.llvm.org/pipermail/llvm-dev/2016-May/099724.html</a>

Ie, if these were integers, we'd convert this into an imul.

------------------------------------------------------------------------------

$ ./clang -O2 multiply_the_hard_way.c -S  -ffast-math -emit-llvm -o -

; ModuleID = 'multiply_the_hard_way.c'

source_filename = "multiply_the_hard_way.c"

target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

target triple = "x86_64-apple-macosx10.11.0"

; Function Attrs: norecurse nounwind readnone ssp uwtable

define float @multiple_the_hard_way(i32 %n) #0 {

entry:

  %cmp5 = icmp sgt i32 %n, 0

  br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup

for.body.preheader:                               ; preds = %entry

  %min.iters.check = icmp ult i32 %n, 8

  br i1 %min.iters.check, label %for.body.preheader15, label %min.iters.checked

for.body.preheader15:                             ; preds = %middle.block,

%min.iters.checked, %for.body.preheader

  %i.07.ph = phi i32 [ 0, %min.iters.checked ], [ 0, %for.body.preheader ], [

%n.vec, %middle.block ]

  %sum.06.ph = phi float [ 0.000000e+00, %min.iters.checked ], [ 0.000000e+00,

%for.body.preheader ], [ %8, %middle.block ]

  br label %for.body

min.iters.checked:                                ; preds = %for.body.preheader

  %n.vec = and i32 %n, -8

  %cmp.zero = icmp eq i32 %n.vec, 0

  br i1 %cmp.zero, label %for.body.preheader15, label %vector.body.preheader

vector.body.preheader:                            ; preds = %min.iters.checked

  %0 = add i32 %n.vec, -8

  %1 = lshr exact i32 %0, 3

  %2 = add nuw nsw i32 %1, 1

  %xtraiter = and i32 %2, 7

  %3 = icmp ult i32 %0, 56

  br i1 %3, label %middle.block.unr-lcssa, label %vector.body.preheader.new

vector.body.preheader.new:                        ; preds =

%vector.body.preheader

  %unroll_iter = sub nsw i32 %2, %xtraiter

  br label %vector.body

vector.body:                                      ; preds = %vector.body,

%vector.body.preheader.new

  %vec.phi = phi <4 x float> [ zeroinitializer, %vector.body.preheader.new ], [

%4, %vector.body ]

  %vec.phi9 = phi <4 x float> [ zeroinitializer, %vector.body.preheader.new ],

[ %5, %vector.body ]

  %niter = phi i32 [ %unroll_iter, %vector.body.preheader.new ], [

%niter.nsub.7, %vector.body ]

  %4 = fadd fast <4 x float> %vec.phi, <float 5.600000e+01, float 5.600000e+01,

float 5.600000e+01, float 5.600000e+01>

  %5 = fadd fast <4 x float> %vec.phi9, <float 5.600000e+01, float

5.600000e+01, float 5.600000e+01, float 5.600000e+01>

  %niter.nsub.7 = add i32 %niter, -8

  %niter.ncmp.7 = icmp eq i32 %niter.nsub.7, 0

  br i1 %niter.ncmp.7, label %middle.block.unr-lcssa.loopexit, label

%vector.body, !llvm.loop !2

middle.block.unr-lcssa.loopexit:                  ; preds = %vector.body

  %.lcssa20 = phi <4 x float> [ %5, %vector.body ]

  %.lcssa19 = phi <4 x float> [ %4, %vector.body ]

  br label %middle.block.unr-lcssa

middle.block.unr-lcssa:                           ; preds =

%middle.block.unr-lcssa.loopexit, %vector.body.preheader

  %.lcssa16.ph = phi <4 x float> [ undef, %vector.body.preheader ], [

%.lcssa20, %middle.block.unr-lcssa.loopexit ]

  %.lcssa.ph = phi <4 x float> [ undef, %vector.body.preheader ], [ %.lcssa19,

%middle.block.unr-lcssa.loopexit ]

  %vec.phi.unr = phi <4 x float> [ zeroinitializer, %vector.body.preheader ], [

%.lcssa19, %middle.block.unr-lcssa.loopexit ]

  %vec.phi9.unr = phi <4 x float> [ zeroinitializer, %vector.body.preheader ],

[ %.lcssa20, %middle.block.unr-lcssa.loopexit ]

  %lcmp.mod = icmp eq i32 %xtraiter, 0

  br i1 %lcmp.mod, label %middle.block, label %vector.body.epil.preheader

vector.body.epil.preheader:                       ; preds =

%middle.block.unr-lcssa

  br label %vector.body.epil

vector.body.epil:                                 ; preds = %vector.body.epil,

%vector.body.epil.preheader

  %vec.phi.epil = phi <4 x float> [ %6, %vector.body.epil ], [ %vec.phi.unr,

%vector.body.epil.preheader ]

  %vec.phi9.epil = phi <4 x float> [ %7, %vector.body.epil ], [ %vec.phi9.unr,

%vector.body.epil.preheader ]

  %epil.iter = phi i32 [ %epil.iter.sub, %vector.body.epil ], [ %xtraiter,

%vector.body.epil.preheader ]

  %6 = fadd fast <4 x float> %vec.phi.epil, <float 7.000000e+00, float

7.000000e+00, float 7.000000e+00, float 7.000000e+00>

  %7 = fadd fast <4 x float> %vec.phi9.epil, <float 7.000000e+00, float

7.000000e+00, float 7.000000e+00, float 7.000000e+00>

  %epil.iter.sub = add i32 %epil.iter, -1

  %epil.iter.cmp = icmp eq i32 %epil.iter.sub, 0

  br i1 %epil.iter.cmp, label %middle.block.epilog-lcssa, label

%vector.body.epil, !llvm.loop !5

middle.block.epilog-lcssa:                        ; preds = %vector.body.epil

  %.lcssa22 = phi <4 x float> [ %7, %vector.body.epil ]

  %.lcssa21 = phi <4 x float> [ %6, %vector.body.epil ]

  br label %middle.block

middle.block:                                     ; preds =

%middle.block.unr-lcssa, %middle.block.epilog-lcssa

  %.lcssa16 = phi <4 x float> [ %.lcssa16.ph, %middle.block.unr-lcssa ], [

%.lcssa22, %middle.block.epilog-lcssa ]

  %.lcssa = phi <4 x float> [ %.lcssa.ph, %middle.block.unr-lcssa ], [

%.lcssa21, %middle.block.epilog-lcssa ]

  %bin.rdx = fadd fast <4 x float> %.lcssa16, %.lcssa

  %rdx.shuf = shufflevector <4 x float> %bin.rdx, <4 x float> undef, <4 x i32>

<i32 2, i32 3, i32 undef, i32 undef>

  %bin.rdx12 = fadd fast <4 x float> %bin.rdx, %rdx.shuf

  %rdx.shuf13 = shufflevector <4 x float> %bin.rdx12, <4 x float> undef, <4 x

i32> <i32 1, i32 undef, i32 undef, i32 undef>

  %bin.rdx14 = fadd fast <4 x float> %bin.rdx12, %rdx.shuf13

  %8 = extractelement <4 x float> %bin.rdx14, i32 0

  %cmp.n = icmp eq i32 %n.vec, %n

  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader15

for.cond.cleanup.loopexit:                        ; preds = %for.body

  %add.lcssa = phi float [ %add, %for.body ]

  br label %for.cond.cleanup

for.cond.cleanup:                                 ; preds =

%for.cond.cleanup.loopexit, %middle.block, %entry

  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %8, %middle.block ], [

%add.lcssa, %for.cond.cleanup.loopexit ]

  ret float %sum.0.lcssa

for.body:                                         ; preds =

%for.body.preheader15, %for.body

  %i.07 = phi i32 [ %inc, %for.body ], [ %i.07.ph, %for.body.preheader15 ]

  %sum.06 = phi float [ %add, %for.body ], [ %sum.06.ph, %for.body.preheader15

]

  %add = fadd fast float %sum.06, 7.000000e+00

  %inc = add nuw nsw i32 %i.07, 1

  %exitcond = icmp eq i32 %inc, %n

  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body,

!llvm.loop !7

}

attributes #0 = { norecurse nounwind readnone ssp uwtable

"disable-tail-calls"="false" "less-precise-fpmad"="false"

"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"

"no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true"

"stack-protector-buffer-size"="8" "target-cpu"="core2"

"target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87"

"unsafe-fp-math"="true" "use-soft-float"="false" }

!llvm.module.flags = !{!0}

!llvm.ident = !{!1}

!0 = !{i32 1, !"PIC Level", i32 2}

!1 = !{!"clang version 3.9.0 (trunk 270847)"}

!2 = distinct !{!2, !3, !4}

!3 = !{!"llvm.loop.vectorize.width", i32 1}

!4 = !{!"llvm.loop.interleave.count", i32 1}

!5 = distinct !{!5, !6}

!6 = !{!"llvm.loop.unroll.disable"}

!7 = distinct !{!7, !8, !3, !4}

!8 = !{!"llvm.loop.unroll.runtime.disable"}</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>