[llvm] [AArch64] Enable unrolling for small multi-exit loops (PR #131998)

Tue Apr 8 01:47:28 PDT 2025

================
@@ -4528,6 +4528,95 @@ getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
   }
 }
 
+static bool shouldUnrollLoopWithInstruction(Instruction &I,
+                                            AArch64TTIImpl &TTI) {
+  // Don't unroll vectorised loop.
+  if (I.getType()->isVectorTy())
+    return false;
+
+  if (isa<CallBase>(I)) {
+    if (isa<CallInst>(I) || isa<InvokeInst>(I))
+      if (const Function *F = cast<CallBase>(I).getCalledFunction())
+        if (!TTI.isLoweredToCall(F))
+          return true;
+    return false;
+  }
+
+  return true;
+}
+
+// This function returns true if the loop:
+//  1. Contains only those instructions that should be unrolled,
+//  2. Has a valid cost,
+//  3. Has a cost within the supplied budget.
+// Otherwise it returns false.
+static bool canUnrollLoopWithinBudget(Loop *L, AArch64TTIImpl &TTI,
+                                      InstructionCost Budget,
+                                      unsigned *FinalSize) {
+  // Estimate the size of the loop.
+  InstructionCost LoopCost = 0;
+
+  for (auto *BB : L->getBlocks()) {
+    for (auto &I : *BB) {
+      if (!shouldUnrollLoopWithInstruction(I, TTI))
+        return false;
+
+      SmallVector<const Value *, 4> Operands(I.operand_values());
+      InstructionCost Cost =
+          TTI.getInstructionCost(&I, Operands, TTI::TCK_CodeSize);
+      // This can happen with intrinsics that don't currently have a cost model
+      // or for some operations that require SVE.
+      if (!Cost.isValid())
+        return false;
+
+      LoopCost += Cost;
+      if (LoopCost > Budget)
+        return false;
+    }
+  }
+
+  if (FinalSize)
+    *FinalSize = *LoopCost.getValue();
+  return true;
+}
+
+static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
+                                      AArch64TTIImpl &TTI) {
+  // Only consider loops with unknown trip counts for which we can determine
+  // a symbolic expression. Multi-exit loops with small known trip counts will
+  // likely be unrolled anyway.
+  const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
+  if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
+    return false;
+
+  // It might not be worth unrolling loops with low max trip counts. Restrict
+  // this to max trip counts > 32 for now.
+  unsigned MaxTC = SE.getSmallConstantMaxTripCount(L);
+  if (MaxTC > 0 && MaxTC <= 32)
+    return false;
+
+  if (findStringMetadataForLoop(L, "llvm.loop.isvectorized"))
+    return false;
----------------
fhahn wrote:

I think this is probably dead code, as the current version of `canUnrollLoopWithinBudget` forbids loops with vector instructions. This is a change to the current behavior for the Apple unrolling preferences, which allowed loops with vector code (e.g. intrinsics), but not loops vectorized by LV, which should already be unrolled by LV.

If there are enough vector execution units, I don't think there's a need to dis-allow vector instructions from unrolling, but that may require more target info, so keeping the bail-out is fine for now in `shouldUnrollLoopWithInstruction` is fine.

https://github.com/llvm/llvm-project/pull/131998