[llvm] d1aa075 - [LoopFlatten] Fix assertion failure

Thu Aug 19 05:20:34 PDT 2021

Author: Rosie Sumpter
Date: 2021-08-19T13:18:57+01:00
New Revision: d1aa075129a94232bce6d47306f8c719ceef9142

URL: https://github.com/llvm/llvm-project/commit/d1aa075129a94232bce6d47306f8c719ceef9142
DIFF: https://github.com/llvm/llvm-project/commit/d1aa075129a94232bce6d47306f8c719ceef9142.diff

LOG: [LoopFlatten] Fix assertion failure

There is an assertion failure in computeOverflowForUnsignedMul
(used in checkOverflow) due to the inner and outer trip counts
having different types. This occurs when the IV has been widened,
but the loop components are not successfully rediscovered.
This is fixed by some refactoring of the code in findLoopComponents
which identifies the trip count of the loop.

Differential Revision: https://reviews.llvm.org/D108107

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopFlatten.cpp
    llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 3343bdd0b573..c8a4bbdc6851 100644

--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -496,17 +496,27 @@ static OverflowResult checkOverflow(FlattenInfo &FI, DominatorTree *DT,
   for (Value *V : FI.LinearIVUses) {
     for (Value *U : V->users()) {
       if (auto *GEP = dyn_cast<GetElementPtrInst>(U)) {
-        // The IV is used as the operand of a GEP, and the IV is at least as
-        // wide as the address space of the GEP. In this case, the GEP would
-        // wrap around the address space before the IV increment wraps, which
-        // would be UB.
-        if (GEP->isInBounds() &&
-            V->getType()->getIntegerBitWidth() >=
-                DL.getPointerTypeSizeInBits(GEP->getType())) {
-          LLVM_DEBUG(
-              dbgs() << "use of linear IV would be UB if overflow occurred: ";
-              GEP->dump());
-          return OverflowResult::NeverOverflows;
+        for (Value *GEPUser : U->users()) {
+          Instruction *GEPUserInst = dyn_cast<Instruction>(GEPUser);
+          if (!isa<LoadInst>(GEPUserInst) &&
+              !(isa<StoreInst>(GEPUserInst) &&
+                GEP == GEPUserInst->getOperand(1)))
+            continue;
+          if (!isGuaranteedToExecuteForEveryIteration(GEPUserInst,
+                                                      FI.InnerLoop))
+            continue;
+          // The IV is used as the operand of a GEP which dominates the loop
+          // latch, and the IV is at least as wide as the address space of the
+          // GEP. In this case, the GEP would wrap around the address space
+          // before the IV increment wraps, which would be UB.
+          if (GEP->isInBounds() &&
+              V->getType()->getIntegerBitWidth() >=
+                  DL.getPointerTypeSizeInBits(GEP->getType())) {
+            LLVM_DEBUG(
+                dbgs() << "use of linear IV would be UB if overflow occurred: ";
+                GEP->dump());
+            return OverflowResult::NeverOverflows;
+          }
         }
       }
     }

diff  --git a/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll b/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll
index cee15ee8a44a..4bf3129e877a 100644
--- a/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll
+++ b/llvm/test/Transforms/LoopFlatten/loop-flatten-negative.ll
@@ -786,6 +786,54 @@ for.empty:
   ret void
 } 
 
+; GEP doesn't dominate the loop latch so can't guarantee N*M won't overflow.
+ at first = global i32 1, align 4
+ at a = external global [0 x i8], align 1
+define void @overflow(i32 %lim, i8* %a) {
+entry:
+  %cmp17.not = icmp eq i32 %lim, 0
+  br i1 %cmp17.not, label %for.cond.cleanup, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:
+  %i.018 = phi i32 [ %inc6, %for.cond.cleanup3 ], [ 0, %for.cond1.preheader.preheader ]
+  %mul = mul i32 %i.018, 100000
+  br label %for.body4
+
+for.cond.cleanup.loopexit:
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  ret void
+
+for.cond.cleanup3:
+  %inc6 = add i32 %i.018, 1
+  %cmp = icmp ult i32 %inc6, %lim
+  br i1 %cmp, label %for.cond1.preheader, label %for.cond.cleanup.loopexit
+
+for.body4:
+  %j.016 = phi i32 [ 0, %for.cond1.preheader ], [ %inc, %if.end ]
+  %add = add i32 %j.016, %mul
+  %0 = load i32, i32* @first, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:
+  %arrayidx = getelementptr inbounds [0 x i8], [0 x i8]* @a, i32 0, i32 %add
+  %1 = load i8, i8* %arrayidx, align 1
+  tail call void asm sideeffect "", "r"(i8 %1)
+  store i32 0, i32* @first, align 4
+  br label %if.end
+
+if.end:
+  tail call void asm sideeffect "", "r"(i32 %add)
+  %inc = add nuw nsw i32 %j.016, 1
+  %cmp2 = icmp ult i32 %j.016, 99999
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3
+}
+
 declare void @objc_enumerationMutation(i8*)
 declare dso_local void @f(i32*)
 declare dso_local void @g(...)