[llvm] [LoopVectorize] Refine runtime memory check costs when there is an outer loop (PR #76034)

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 22 04:52:42 PST 2023


================
@@ -2091,16 +2091,45 @@ class GeneratedRTChecks {
         LLVM_DEBUG(dbgs() << "  " << C << "  for " << I << "\n");
         RTCheckCost += C;
       }
-    if (MemCheckBlock)
+    if (MemCheckBlock) {
+      InstructionCost MemCheckCost = 0;
       for (Instruction &I : *MemCheckBlock) {
         if (MemCheckBlock->getTerminator() == &I)
           continue;
         InstructionCost C =
             TTI->getInstructionCost(&I, TTI::TCK_RecipThroughput);
         LLVM_DEBUG(dbgs() << "  " << C << "  for " << I << "\n");
-        RTCheckCost += C;
+        MemCheckCost += C;
+      }
+
+      // If the runtime memory checks are being created inside an outer loop
+      // we should find out if these checks are outer loop invariant. If so,
+      // the checks will be hoisted out and so the effective cost will reduce
+      // according to the outer loop trip count.
+      if (OuterLoop) {
+        ScalarEvolution *SE = MemCheckExp.getSE();
+        const SCEV *Cond = SE->getSCEV(MemRuntimeCheckCond);
+        if (SE->isLoopInvariant(Cond, OuterLoop)) {
+          if (std::optional<unsigned> OuterTC =
+                  getSmallBestKnownTC(*SE, OuterLoop))
----------------
fhahn wrote:

Hmm, not sure if this is the right function to use here, as it will use the maximum trip count if no info is present.

For something like  below, it will divide by something like 2^32, so it is likely to over-estimate real trip counts in a lot of cases by quite a large margin

```
define void @outer_no_tc(ptr nocapture noundef %a, ptr nocapture noundef readonly %b, i32 noundef %m, i64 noundef %n) {
entry:
  %e = zext i32 %m to i64
  %cmp = icmp ugt i64 %e, 0
   br i1 %cmp, label %outer.loop, label %outer.exit

 outer.loop:
   %outer.iv = phi i64 [ %outer.iv.next, %inner.exit ], [ 0, %entry ]
   %mul.us = mul nsw i64 %outer.iv, %n
   br label %inner.loop

 inner.loop:
   %inner.iv = phi i64 [ 0, %outer.loop ], [ %inner.iv.next, %inner.loop ]
   %add.us = add nuw nsw i64 %inner.iv, %mul.us
   %arrayidx.us = getelementptr inbounds i8, ptr %b, i64 %add.us
   %0 = load i8, ptr %arrayidx.us, align 1
   %arrayidx7.us = getelementptr inbounds i8, ptr %a, i64 %add.us
   %1 = load i8, ptr %arrayidx7.us, align 1
   %add9.us = add i8 %1, %0
   store i8 %add9.us, ptr %arrayidx7.us, align 1
   %inner.iv.next = add nuw nsw i64 %inner.iv, 1
   %exitcond.not = icmp eq i64 %inner.iv.next, %n
   br i1 %exitcond.not, label %inner.exit, label %inner.loop

 inner.exit:
   %outer.iv.next = add nuw nsw i64 %outer.iv, 1
   %exitcond27.not = icmp eq i64 %outer.iv.next, %e
   br i1 %exitcond27.not, label %outer.exit, label %outer.loop

 outer.exit:
   ret void
 }
```

https://github.com/llvm/llvm-project/pull/76034


More information about the llvm-commits mailing list