[llvm] r222451 - Fix a trip-count overflow issue in LoopUnroll.

Michael Zolotukhin mzolotukhin at apple.com
Thu Nov 20 12:19:55 PST 2014


Author: mzolotukhin
Date: Thu Nov 20 14:19:55 2014
New Revision: 222451

URL: http://llvm.org/viewvc/llvm-project?rev=222451&view=rev
Log:
Fix a trip-count overflow issue in LoopUnroll.

Currently LoopUnroll generates a prologue loop before the main loop
body to execute first N%UnrollFactor iterations. Also, this loop is
used if trip-count can overflow - it's determined by a runtime check.

However, we've been mistakenly optimizing this loop to a linear code for
UnrollFactor = 2, not taking into account that it also serves as a safe
version of the loop if its trip-count overflows.

Added:
    llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll
Modified:
    llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
    llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll

Modified: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp?rev=222451&r1=222450&r2=222451&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp Thu Nov 20 14:19:55 2014
@@ -295,6 +295,10 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
   if (isa<SCEVCouldNotCompute>(BECount) || !BECount->getType()->isIntegerTy())
     return false;
 
+  // If BECount is INT_MAX, we can't compute trip-count without overflow.
+  if (BECount->isAllOnesValue())
+    return false;
+
   // Add 1 since the backedge count doesn't include the first loop iteration
   const SCEV *TripCountSC =
     SE->getAddExpr(BECount, SE->getConstant(BECount->getType(), 1));
@@ -357,11 +361,16 @@ bool llvm::UnrollRuntimeLoopProlog(Loop
   std::vector<BasicBlock *> NewBlocks;
   ValueToValueMapTy VMap;
 
+  // If unroll count is 2 and we can't overflow in tripcount computation (which
+  // is BECount + 1), then we don't need a loop for prologue, and we can unroll
+  // it. We can be sure that we don't overflow only if tripcount is a constant.
+  bool UnrollPrologue = (Count == 2 && isa<ConstantInt>(TripCount));
+
   // Clone all the basic blocks in the loop. If Count is 2, we don't clone
   // the loop, otherwise we create a cloned loop to execute the extra
   // iterations. This function adds the appropriate CFG connections.
-  CloneLoopBlocks(L, ModVal, Count == 2, PH, PEnd, NewBlocks, LoopBlocks, VMap,
-                  LI);
+  CloneLoopBlocks(L, ModVal, UnrollPrologue, PH, PEnd, NewBlocks, LoopBlocks,
+                  VMap, LI);
 
   // Insert the cloned blocks into function just before the original loop
   F->getBasicBlockList().splice(PEnd, F->getBasicBlockList(), NewBlocks[0],

Modified: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll?rev=222451&r1=222450&r2=222451&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll (original)
+++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop1.ll Thu Nov 20 14:19:55 2014
@@ -3,7 +3,7 @@
 ; This tests that setting the unroll count works
 
 ; CHECK: for.body.prol:
-; CHECK: br label %for.body.preheader.split
+; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
 ; CHECK: for.body:
 ; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body
 ; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body

Added: llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll?rev=222451&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnroll/tripcount-overflow.ll Thu Nov 20 14:19:55 2014
@@ -0,0 +1,30 @@
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When prologue is fully unrolled, the branch on its end is unconditional.
+; Unrolling it is illegal if we can't prove that trip-count+1 doesn't overflow,
+; like in this example, where it comes from an argument.
+;
+; This test is based on an example from here:
+; http://stackoverflow.com/questions/23838661/why-is-clang-optimizing-this-code-out
+;
+; CHECK: while.body.prol:
+; CHECK: br i1
+; CHECK: entry.split:
+
+; Function Attrs: nounwind readnone ssp uwtable
+define i32 @foo(i32 %N) #0 {
+entry:
+  br label %while.body
+
+while.body:                                       ; preds = %while.body, %entry
+  %i = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+  %cmp = icmp eq i32 %i, %N
+  %inc = add i32 %i, 1
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body
+  ret i32 %i
+}
+
+attributes #0 = { nounwind readnone ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }





More information about the llvm-commits mailing list