[llvm] [SystemZ] Add inlining heuristic for IV update in callee. (PR #113135)
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 29 03:20:17 PDT 2024
https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/113135
>From 300c4de4c24bd8790abcf6b191ba9646b4522873 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 16 Oct 2024 17:45:36 +0200
Subject: [PATCH 1/3] Inline callee with IV update with caller alloca.
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 113 ++++++++++++++++++
1 file changed, 113 insertions(+)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 7e5728c40950ad..aa8b77e749b713 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -74,6 +74,79 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
}
}
+static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
+ for (const User *U : V->users()) {
+ if (const auto *LI = dyn_cast<LoadInst>(U)) {
+ assert(isa<AllocaInst>(V) && LI->getType()->isIntegerTy() &&
+ "Expected a load only from the alloca, with integer type.");
+ getNumGEPIndexUses(LI, NumGEPIdxUses);
+ }
+ else if (const auto *SExtI = dyn_cast<SExtInst>(U))
+ getNumGEPIndexUses(SExtI, NumGEPIdxUses);
+ else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
+ getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
+ else if (const auto *TruncI = dyn_cast<TruncInst>(U))
+ getNumGEPIndexUses(TruncI, NumGEPIdxUses); // XXX Effective?
+ else if (isa<GetElementPtrInst>(U))
+ NumGEPIdxUses++;
+ }
+}
+
+// Only one load? one store?
+// Only constants?
+static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
+ assert(Arg->getType()->isPointerTy() && "Expecting alloca (ptr).");
+ // Load *Arg -> Add/Sub Constant -> Store *Arg
+ unsigned IVUpdates = 0;
+ unsigned NumLoads = 0;
+ unsigned NumStores = 0;
+ unsigned NumCalls = 0;
+ unsigned NumOthers = 0;
+ for (const User *U : Arg->users()) {
+ if (const auto *LI = dyn_cast<LoadInst>(U)) {
+ if (!LI->getType()->isIntegerTy()) {
+ NumOthers++;
+ continue;
+ }
+ NumLoads++;
+ for (const User *U2 : LI->users()) {
+ const Instruction *LdUser = cast<Instruction>(U2);
+ if ((LdUser->getOpcode() == Instruction::Add) // &&
+ // (isa<Constant>(LdUser->getOperand(0)) ||
+ // isa<Constant>(LdUser->getOperand(1))))
+ ||
+ (LdUser->getOpcode() == Instruction::Sub) // &&
+ // isa<Constant>(LdUser->getOperand(1)))
+ ) {
+ for (const User *U3 : LdUser->users())
+ if (const auto *SI = dyn_cast<StoreInst>(U3))
+ if (SI->getPointerOperand() == Arg)
+ IVUpdates++;
+ }
+ }
+ }
+ else if (isa<StoreInst>(U))
+ NumStores++;
+ else if (isa<CallBase>(U))
+ NumCalls++;
+ else
+ NumOthers++;
+ }
+ dbgs() << " IVUpdates: " << IVUpdates
+ << " NumLoads: " << NumLoads
+ << " NumStores: " << NumStores
+ << " NumCalls: " << NumCalls
+ << " NumOthers: " << NumOthers;
+
+ return IVUpdates > 0; // && NumStores == IVUpdates;
+}
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+static cl::opt<bool> ONLYINLOOPS("onlyinloops", cl::init(false));
+static cl::opt<unsigned> NUMGEPIDX("numgepidx", cl::init(0));
+static cl::opt<bool> ONLYIVUPDATES("onlyivupdates", cl::init(true));
+
unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
unsigned Bonus = 0;
const Function *Caller = CB->getParent()->getParent();
@@ -82,6 +155,46 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
return 0;
const Module *M = Caller->getParent();
+ // if (strcmp(Caller->getName().data(), "_Z6searchP7state_tiiiii") == 0 &&
+ // strcmp(Callee->getName().data(), "_ZL15remove_one_fastPiS_S_i") == 0) {
+ // if (!ONLYINLOOPS)
+ // return 1000;
+ // else {
+ // DominatorTree DT(*(const_cast<Function *>(Caller)));
+ // LoopInfo LI(DT);
+ // const BasicBlock *CallBB = CB->getParent();
+ // if (LI.getLoopFor(CallBB) != nullptr)
+ // return 1000;
+ // }
+ // }
+
+ // Give bonus for an integer alloca which is used as a GEP index in
+ // caller....XXX
+ for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+ Value *CallerArg = CB->getArgOperand(OpIdx);
+ Argument *CalleeArg = Callee->getArg(OpIdx);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+ if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+ unsigned NumGEPIdxUses = 0;
+ getNumGEPIndexUses(CallerArg, NumGEPIdxUses);
+ dbgs() << "ALLOCA-ARG: "
+ << Caller->getName().data() << " / "
+ << Callee->getName().data() << " : "
+ << *AI->getAllocatedType()
+ << " NumGEPIdxUses: " << NumGEPIdxUses
+ << " IV: ";
+ bool IsIVUpdate = looksLikeIVUpdate(Callee, CalleeArg);
+ DominatorTree DT(*(const_cast<Function *>(Caller)));
+ LoopInfo LI(DT);
+ bool InLoop = (LI.getLoopFor(CB->getParent()) != nullptr);
+ dbgs() << " InLoop: " << InLoop << "\n";
+ if ((NumGEPIdxUses > NUMGEPIDX) &&
+ (IsIVUpdate || !ONLYIVUPDATES) &&
+ (InLoop || !ONLYINLOOPS))
+ return 1000;
+ }
+ }
+
// Increase the threshold if an incoming argument is used only as a memcpy
// source.
for (const Argument &Arg : Callee->args()) {
>From 604030bd4245ed45d40160047662b817e19b3382 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 18 Oct 2024 09:29:37 +0200
Subject: [PATCH 2/3] cleanup
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 123 +++++-------------
.../CodeGen/SystemZ/inline-thresh-adjust.ll | 28 ++++
2 files changed, 58 insertions(+), 93 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index aa8b77e749b713..128135199acf41 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -85,68 +85,29 @@ static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
getNumGEPIndexUses(SExtI, NumGEPIdxUses);
else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
- else if (const auto *TruncI = dyn_cast<TruncInst>(U))
- getNumGEPIndexUses(TruncI, NumGEPIdxUses); // XXX Effective?
else if (isa<GetElementPtrInst>(U))
NumGEPIdxUses++;
}
}
-// Only one load? one store?
-// Only constants?
+// Return true if Arg is used in a Load; Add/Sub; Store sequence.
static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
- assert(Arg->getType()->isPointerTy() && "Expecting alloca (ptr).");
- // Load *Arg -> Add/Sub Constant -> Store *Arg
- unsigned IVUpdates = 0;
- unsigned NumLoads = 0;
- unsigned NumStores = 0;
- unsigned NumCalls = 0;
- unsigned NumOthers = 0;
- for (const User *U : Arg->users()) {
- if (const auto *LI = dyn_cast<LoadInst>(U)) {
- if (!LI->getType()->isIntegerTy()) {
- NumOthers++;
- continue;
- }
- NumLoads++;
- for (const User *U2 : LI->users()) {
- const Instruction *LdUser = cast<Instruction>(U2);
- if ((LdUser->getOpcode() == Instruction::Add) // &&
- // (isa<Constant>(LdUser->getOperand(0)) ||
- // isa<Constant>(LdUser->getOperand(1))))
- ||
- (LdUser->getOpcode() == Instruction::Sub) // &&
- // isa<Constant>(LdUser->getOperand(1)))
- ) {
- for (const User *U3 : LdUser->users())
- if (const auto *SI = dyn_cast<StoreInst>(U3))
- if (SI->getPointerOperand() == Arg)
- IVUpdates++;
+ assert(Arg->getType()->isPointerTy() && "Expecting ptr arg.");
+ for (const User *ArgU : Arg->users())
+ if (const auto *LoadI = dyn_cast<LoadInst>(ArgU))
+ if (LoadI->getType()->isIntegerTy())
+ for (const User *LdU : LoadI->users()) {
+ const Instruction *AddSubI = cast<Instruction>(LdU);
+ if (AddSubI->getOpcode() == Instruction::Add ||
+ AddSubI->getOpcode() == Instruction::Sub)
+ for (const User *ASU : AddSubI->users())
+ if (const auto *StoreI = dyn_cast<StoreInst>(ASU))
+ if (StoreI->getPointerOperand() == Arg)
+ return true;
}
- }
- }
- else if (isa<StoreInst>(U))
- NumStores++;
- else if (isa<CallBase>(U))
- NumCalls++;
- else
- NumOthers++;
- }
- dbgs() << " IVUpdates: " << IVUpdates
- << " NumLoads: " << NumLoads
- << " NumStores: " << NumStores
- << " NumCalls: " << NumCalls
- << " NumOthers: " << NumOthers;
-
- return IVUpdates > 0; // && NumStores == IVUpdates;
+ return false;
}
-#include "llvm/IR/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-static cl::opt<bool> ONLYINLOOPS("onlyinloops", cl::init(false));
-static cl::opt<unsigned> NUMGEPIDX("numgepidx", cl::init(0));
-static cl::opt<bool> ONLYIVUPDATES("onlyivupdates", cl::init(true));
-
unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
unsigned Bonus = 0;
const Function *Caller = CB->getParent()->getParent();
@@ -155,46 +116,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
return 0;
const Module *M = Caller->getParent();
- // if (strcmp(Caller->getName().data(), "_Z6searchP7state_tiiiii") == 0 &&
- // strcmp(Callee->getName().data(), "_ZL15remove_one_fastPiS_S_i") == 0) {
- // if (!ONLYINLOOPS)
- // return 1000;
- // else {
- // DominatorTree DT(*(const_cast<Function *>(Caller)));
- // LoopInfo LI(DT);
- // const BasicBlock *CallBB = CB->getParent();
- // if (LI.getLoopFor(CallBB) != nullptr)
- // return 1000;
- // }
- // }
-
- // Give bonus for an integer alloca which is used as a GEP index in
- // caller....XXX
- for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
- Value *CallerArg = CB->getArgOperand(OpIdx);
- Argument *CalleeArg = Callee->getArg(OpIdx);
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
- if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
- unsigned NumGEPIdxUses = 0;
- getNumGEPIndexUses(CallerArg, NumGEPIdxUses);
- dbgs() << "ALLOCA-ARG: "
- << Caller->getName().data() << " / "
- << Callee->getName().data() << " : "
- << *AI->getAllocatedType()
- << " NumGEPIdxUses: " << NumGEPIdxUses
- << " IV: ";
- bool IsIVUpdate = looksLikeIVUpdate(Callee, CalleeArg);
- DominatorTree DT(*(const_cast<Function *>(Caller)));
- LoopInfo LI(DT);
- bool InLoop = (LI.getLoopFor(CB->getParent()) != nullptr);
- dbgs() << " InLoop: " << InLoop << "\n";
- if ((NumGEPIdxUses > NUMGEPIDX) &&
- (IsIVUpdate || !ONLYIVUPDATES) &&
- (InLoop || !ONLYINLOOPS))
- return 1000;
- }
- }
-
// Increase the threshold if an incoming argument is used only as a memcpy
// source.
for (const Argument &Arg : Callee->args()) {
@@ -244,6 +165,22 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
Bonus += NumStores * 50;
Bonus = std::min(Bonus, unsigned(1000));
+ // Give bonus for an integer alloca which is used as a GEP index in caller
+ // and is updated like an IV in memory in callee. This will help LSR.
+ for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+ Value *CallerArg = CB->getArgOperand(OpIdx);
+ Argument *CalleeArg = Callee->getArg(OpIdx);
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+ if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+ unsigned NumGEPIdxUses = 0;
+ getNumGEPIndexUses(AI, NumGEPIdxUses);
+ if (NumGEPIdxUses && looksLikeIVUpdate(Callee, CalleeArg)) {
+ Bonus = 1000;
+ break;
+ }
+ }
+ }
+
LLVM_DEBUG(if (Bonus)
dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
return Bonus;
diff --git a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
index f7c83c7af7021b..80ef5463451c39 100644
--- a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
+++ b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
@@ -163,3 +163,31 @@ if.end100: ; preds = %if.else79, %if.then
%cmp181 = fcmp olt double %mul155, 0.000000e+00
br label %common.ret
}
+
+; Check that the inlining threshold is incremented for a function that is
+; accessing an alloca (used for addressing) of the caller by making a simple
+; IV update.
+;
+; CHECK: Inlining calls in: Caller3
+; CHECK: ++ SZTTI Adding inlining bonus: 1000
+
+define dso_local void @Caller3(ptr %Dst) {
+ %A = alloca i32
+ store i32 0, ptr %A
+ br label %loop
+
+loop:
+ %L = load i32, ptr %A
+ %SE = sext i32 %L to i64
+ %GEP = getelementptr [240 x i32], ptr %Dst, i64 0, i64 %SE
+ store i64 0, ptr %GEP
+ call void @Callee3(ptr %A)
+ br label %loop
+}
+
+define void @Callee3(ptr %0) {
+ %L = load i32, ptr %0
+ %A = add nsw i32 %L, 1
+ store i32 %A, ptr %0
+ ret void
+}
>From 7860adcfb1505ddef3bbe9cc5488f9610d8e58f4 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Tue, 22 Oct 2024 15:20:53 +0200
Subject: [PATCH 3/3] Updated per review
---
.../SystemZ/SystemZTargetTransformInfo.cpp | 28 ++++++++++---------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 128135199acf41..e3eee6bb45100b 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -74,20 +74,17 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
}
}
-static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
+static bool usedAsGEPIndex(const Value *V) {
+ assert(V->getType()->isIntegerTy() && "Expected an integer value.");
for (const User *U : V->users()) {
- if (const auto *LI = dyn_cast<LoadInst>(U)) {
- assert(isa<AllocaInst>(V) && LI->getType()->isIntegerTy() &&
- "Expected a load only from the alloca, with integer type.");
- getNumGEPIndexUses(LI, NumGEPIdxUses);
- }
- else if (const auto *SExtI = dyn_cast<SExtInst>(U))
- getNumGEPIndexUses(SExtI, NumGEPIdxUses);
+ if (const auto *SExtI = dyn_cast<SExtInst>(U))
+ return usedAsGEPIndex(SExtI);
else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
- getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
+ return usedAsGEPIndex(ZExtI);
else if (isa<GetElementPtrInst>(U))
- NumGEPIdxUses++;
+ return true;
}
+ return false;
}
// Return true if Arg is used in a Load; Add/Sub; Store sequence.
@@ -172,9 +169,14 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
Argument *CalleeArg = Callee->getArg(OpIdx);
if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
- unsigned NumGEPIdxUses = 0;
- getNumGEPIndexUses(AI, NumGEPIdxUses);
- if (NumGEPIdxUses && looksLikeIVUpdate(Callee, CalleeArg)) {
+ bool UsedAsGEPIndex = false;
+ for (const User *U : AI->users())
+ if (const auto *LI = dyn_cast<LoadInst>(U))
+ if (usedAsGEPIndex(LI)) {
+ UsedAsGEPIndex = true;
+ break;
+ }
+ if (UsedAsGEPIndex && looksLikeIVUpdate(Callee, CalleeArg)) {
Bonus = 1000;
break;
}
More information about the llvm-commits
mailing list