[llvm] [LV] Ignore some costs when loop gets fully unrolled (PR #106699)

Fri Dec 6 06:13:37 PST 2024

================
@@ -2652,6 +2652,25 @@ static Value *getExpandedStep(const InductionDescriptor &ID,
   return I->second;
 }
 
+/// Knowing that loop \p L executes a single vector iteration, add instructions
+/// that will get simplified and thus should not have any cost to \p
+/// InstsToIgnore.
+static void addFullyUnrolledInstructionsToIgnore(
+    Loop *L, const LoopVectorizationLegality::InductionList &IL,
+    SmallPtrSetImpl<Instruction *> &InstsToIgnore) {
+  auto *Cmp = L->getLatchCmpInst();
+  if (Cmp)
----------------
david-arm wrote:

I was actually thinking about something else @igogo-x86:

```
define i1 @foo(ptr nocapture noundef %dst, ptr nocapture noundef readonly %src) {
entry:
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %arrayidx = getelementptr inbounds nuw i32, ptr %src, i64 %indvars.iv
  %0 = load i32, ptr %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds nuw i32, ptr %dst, i64 %indvars.iv
  %1 = load i32, ptr %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  store i32 %add, ptr %arrayidx2, align 4
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond.not = icmp eq i64 %indvars.iv.next, 16
  br i1 %exitcond.not, label %exit, label %for.body

exit:
  ret i1 %exitcond.not
}
```

This is a case where in theory the final comparison is used after the loop. That's what I was hoping you could try out. Anyway, I ran this with the following command:

opt -mcpu=neoverse-v1 -p loop-vectorize,loop-unroll -force-vector-width=4 -force-vector-interleave=1 -S -debug-only=loop-vectorize < foo.ll

and it looks like we do indeed unroll the loop and the comparison disappears. Similary we unroll this and the comparison disappears:

```
define i1 @foo(ptr nocapture noundef %dst, ptr nocapture noundef %p, ptr nocapture noundef readonly %src) {
entry:
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
  %arrayidx = getelementptr inbounds nuw i32, ptr %src, i64 %indvars.iv
  %0 = load i32, ptr %arrayidx, align 4
  %arrayidx2 = getelementptr inbounds nuw i32, ptr %dst, i64 %indvars.iv
  %1 = load i32, ptr %arrayidx2, align 4
  %add = add nsw i32 %1, %0
  store i32 %add, ptr %arrayidx2, align 4
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %exitcond.not = icmp eq i64 %indvars.iv.next, 16
  %arrayidx3 = getelementptr inbounds nuw i1, ptr %p, i64 %indvars.iv
  store i1 %exitcond.not, ptr %arrayidx3, align 4
  br i1 %exitcond.not, label %exit, label %for.body

exit:
  ret i1 false
}
```

In this case I'm happy that the comparison disappears, although I don't think an extra use check would have done any harm.

https://github.com/llvm/llvm-project/pull/106699