[llvm] Unroll loops apple (PR #149358)
Ahmad Yasin via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 10:04:47 PDT 2025
https://github.com/ayasin-a created https://github.com/llvm/llvm-project/pull/149358
Enhance the heuristics in `getAppleRuntimeUnrollPreferences` to let a bit more loops to be unrolled.
Specifically, this patch adjusts two checks:
I. Tune the loop size budget from 8 to 9
II. Include immediate users of loaded values in the load/stores dependencies predicate
>From 7b46981ceca0054efdedf6c52ebb784018471ef0 Mon Sep 17 00:00:00 2001
From: Ahmad Yasin <ahmad.yasin at apple.com>
Date: Wed, 9 Jul 2025 16:27:30 +0300
Subject: [PATCH 1/2] tune budget for runtime-unroll
---
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 90d3d92d6bbf5..1eb6589ed8ca9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4808,7 +4808,7 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
if (Header == L->getLoopLatch()) {
// Estimate the size of the loop.
unsigned Size;
- if (!isLoopSizeWithinBudget(L, TTI, 8, &Size))
+ if (!isLoopSizeWithinBudget(L, TTI, 9, &Size))
return;
SmallPtrSet<Value *, 8> LoadedValues;
>From eb21acc1bca702f935f408e64a0fb34f4870ea37 Mon Sep 17 00:00:00 2001
From: Ahmad Yasin <ahmad.yasin at apple.com>
Date: Wed, 9 Jul 2025 22:21:59 +0300
Subject: [PATCH 2/2] Include immediate users of loaded values in the
load/stores dependencies predicate
---
.../AArch64/AArch64TargetTransformInfo.cpp | 19 +++++++++++++------
1 file changed, 13 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 1eb6589ed8ca9..6d97ae7c8c5e7 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4811,7 +4811,7 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
if (!isLoopSizeWithinBudget(L, TTI, 9, &Size))
return;
- SmallPtrSet<Value *, 8> LoadedValues;
+ SmallPtrSet<Value *, 8> LoadedValuesPlus;
SmallVector<StoreInst *> Stores;
for (auto *BB : L->blocks()) {
for (auto &I : *BB) {
@@ -4821,9 +4821,16 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
const SCEV *PtrSCEV = SE.getSCEV(Ptr);
if (SE.isLoopInvariant(PtrSCEV, L))
continue;
- if (isa<LoadInst>(&I))
- LoadedValues.insert(&I);
- else
+ if (isa<LoadInst>(&I)) {
+ LoadedValuesPlus.insert(&I);
+ // Included 1st users of loaded values
+ for (auto *U : I.users()) {
+ auto *Inst = dyn_cast<Instruction>(U);
+ if (!Inst || Inst->getParent() != BB)
+ continue;
+ LoadedValuesPlus.insert(U);
+ }
+ } else
Stores.push_back(cast<StoreInst>(&I));
}
}
@@ -4846,8 +4853,8 @@ getAppleRuntimeUnrollPreferences(Loop *L, ScalarEvolution &SE,
UC++;
}
- if (BestUC == 1 || none_of(Stores, [&LoadedValues](StoreInst *SI) {
- return LoadedValues.contains(SI->getOperand(0));
+ if (BestUC == 1 || none_of(Stores, [&LoadedValuesPlus](StoreInst *SI) {
+ return LoadedValuesPlus.contains(SI->getOperand(0));
}))
return;
More information about the llvm-commits
mailing list