[llvm] [SystemZ] Add inlining heuristic for IV update in callee. (PR #113135)

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 29 03:20:17 PDT 2024


https://github.com/JonPsson1 updated https://github.com/llvm/llvm-project/pull/113135

>From 300c4de4c24bd8790abcf6b191ba9646b4522873 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 16 Oct 2024 17:45:36 +0200
Subject: [PATCH 1/3] Inline callee with IV update with caller alloca.

---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 7e5728c40950ad..aa8b77e749b713 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -74,6 +74,79 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
     }
 }
 
+static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
+  for (const User *U : V->users()) {
+    if (const auto *LI = dyn_cast<LoadInst>(U)) {
+      assert(isa<AllocaInst>(V) && LI->getType()->isIntegerTy() &&
+             "Expected a load only from the alloca, with integer type.");
+      getNumGEPIndexUses(LI, NumGEPIdxUses);
+    }
+    else if (const auto *SExtI = dyn_cast<SExtInst>(U))
+      getNumGEPIndexUses(SExtI, NumGEPIdxUses);
+    else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
+      getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
+    else if (const auto *TruncI = dyn_cast<TruncInst>(U))
+      getNumGEPIndexUses(TruncI, NumGEPIdxUses);  // XXX Effective?
+    else if (isa<GetElementPtrInst>(U))
+      NumGEPIdxUses++;
+  }
+}
+
+// Only one load? one store?
+// Only constants?
+static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
+  assert(Arg->getType()->isPointerTy() && "Expecting alloca (ptr).");
+  // Load *Arg -> Add/Sub Constant -> Store *Arg
+  unsigned IVUpdates = 0;
+  unsigned NumLoads = 0;
+  unsigned NumStores = 0;
+  unsigned NumCalls = 0;
+  unsigned NumOthers = 0;
+  for (const User *U : Arg->users()) {
+    if (const auto *LI = dyn_cast<LoadInst>(U)) {
+      if (!LI->getType()->isIntegerTy()) {
+        NumOthers++;
+        continue;
+      }
+      NumLoads++;
+      for (const User *U2 : LI->users()) {
+        const Instruction *LdUser = cast<Instruction>(U2);
+        if ((LdUser->getOpcode() == Instruction::Add) // &&
+             // (isa<Constant>(LdUser->getOperand(0)) ||
+             //  isa<Constant>(LdUser->getOperand(1))))
+            ||
+            (LdUser->getOpcode() == Instruction::Sub) // &&
+             // isa<Constant>(LdUser->getOperand(1)))
+          ) {
+          for (const User *U3 : LdUser->users())
+            if (const auto *SI = dyn_cast<StoreInst>(U3))
+              if (SI->getPointerOperand() == Arg)
+                IVUpdates++;
+        }
+      }
+    }
+    else if (isa<StoreInst>(U))
+      NumStores++;
+    else if (isa<CallBase>(U))
+      NumCalls++;
+    else
+      NumOthers++;
+  }
+  dbgs() << " IVUpdates: " << IVUpdates
+         << " NumLoads: " << NumLoads
+         << " NumStores: " << NumStores
+         << " NumCalls: " << NumCalls
+         << " NumOthers: " << NumOthers;
+
+  return IVUpdates > 0; //  && NumStores == IVUpdates;
+}
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+static cl::opt<bool> ONLYINLOOPS("onlyinloops", cl::init(false));
+static cl::opt<unsigned> NUMGEPIDX("numgepidx", cl::init(0));
+static cl::opt<bool> ONLYIVUPDATES("onlyivupdates", cl::init(true));
+
 unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
   unsigned Bonus = 0;
   const Function *Caller = CB->getParent()->getParent();
@@ -82,6 +155,46 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     return 0;
   const Module *M = Caller->getParent();
 
+  // if (strcmp(Caller->getName().data(), "_Z6searchP7state_tiiiii") == 0 &&
+  //     strcmp(Callee->getName().data(), "_ZL15remove_one_fastPiS_S_i") == 0) {
+  //   if (!ONLYINLOOPS)
+  //     return 1000;
+  //   else {
+  //     DominatorTree DT(*(const_cast<Function *>(Caller)));
+  //     LoopInfo LI(DT);
+  //     const BasicBlock *CallBB = CB->getParent();
+  //     if (LI.getLoopFor(CallBB) != nullptr)
+  //       return 1000;
+  //   }
+  // }
+
+  // Give bonus for an integer alloca which is used as a GEP index in
+  // caller....XXX
+  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+    Value *CallerArg = CB->getArgOperand(OpIdx);
+    Argument *CalleeArg = Callee->getArg(OpIdx);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+        unsigned NumGEPIdxUses = 0;
+        getNumGEPIndexUses(CallerArg, NumGEPIdxUses);
+        dbgs() << "ALLOCA-ARG: "
+               << Caller->getName().data() << " / "
+               << Callee->getName().data() << " : "
+               << *AI->getAllocatedType()
+               << " NumGEPIdxUses: " << NumGEPIdxUses
+               << " IV: ";
+        bool IsIVUpdate = looksLikeIVUpdate(Callee, CalleeArg);
+        DominatorTree DT(*(const_cast<Function *>(Caller)));
+        LoopInfo LI(DT);
+        bool InLoop = (LI.getLoopFor(CB->getParent()) != nullptr);
+        dbgs() << " InLoop: " << InLoop << "\n";
+        if ((NumGEPIdxUses > NUMGEPIDX) &&
+            (IsIVUpdate || !ONLYIVUPDATES) &&
+            (InLoop || !ONLYINLOOPS))
+          return 1000;
+      }
+  }
+
   // Increase the threshold if an incoming argument is used only as a memcpy
   // source.
   for (const Argument &Arg : Callee->args()) {

>From 604030bd4245ed45d40160047662b817e19b3382 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 18 Oct 2024 09:29:37 +0200
Subject: [PATCH 2/3] cleanup

---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 123 +++++-------------
 .../CodeGen/SystemZ/inline-thresh-adjust.ll   |  28 ++++
 2 files changed, 58 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index aa8b77e749b713..128135199acf41 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -85,68 +85,29 @@ static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
       getNumGEPIndexUses(SExtI, NumGEPIdxUses);
     else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
       getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
-    else if (const auto *TruncI = dyn_cast<TruncInst>(U))
-      getNumGEPIndexUses(TruncI, NumGEPIdxUses);  // XXX Effective?
     else if (isa<GetElementPtrInst>(U))
       NumGEPIdxUses++;
   }
 }
 
-// Only one load? one store?
-// Only constants?
+// Return true if Arg is used in a Load; Add/Sub; Store sequence.
 static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
-  assert(Arg->getType()->isPointerTy() && "Expecting alloca (ptr).");
-  // Load *Arg -> Add/Sub Constant -> Store *Arg
-  unsigned IVUpdates = 0;
-  unsigned NumLoads = 0;
-  unsigned NumStores = 0;
-  unsigned NumCalls = 0;
-  unsigned NumOthers = 0;
-  for (const User *U : Arg->users()) {
-    if (const auto *LI = dyn_cast<LoadInst>(U)) {
-      if (!LI->getType()->isIntegerTy()) {
-        NumOthers++;
-        continue;
-      }
-      NumLoads++;
-      for (const User *U2 : LI->users()) {
-        const Instruction *LdUser = cast<Instruction>(U2);
-        if ((LdUser->getOpcode() == Instruction::Add) // &&
-             // (isa<Constant>(LdUser->getOperand(0)) ||
-             //  isa<Constant>(LdUser->getOperand(1))))
-            ||
-            (LdUser->getOpcode() == Instruction::Sub) // &&
-             // isa<Constant>(LdUser->getOperand(1)))
-          ) {
-          for (const User *U3 : LdUser->users())
-            if (const auto *SI = dyn_cast<StoreInst>(U3))
-              if (SI->getPointerOperand() == Arg)
-                IVUpdates++;
+  assert(Arg->getType()->isPointerTy() && "Expecting ptr arg.");
+  for (const User *ArgU : Arg->users())
+    if (const auto *LoadI = dyn_cast<LoadInst>(ArgU))
+      if (LoadI->getType()->isIntegerTy())
+        for (const User *LdU : LoadI->users()) {
+          const Instruction *AddSubI = cast<Instruction>(LdU);
+          if (AddSubI->getOpcode() == Instruction::Add ||
+              AddSubI->getOpcode() == Instruction::Sub)
+            for (const User *ASU : AddSubI->users())
+              if (const auto *StoreI = dyn_cast<StoreInst>(ASU))
+                if (StoreI->getPointerOperand() == Arg)
+                  return true;
         }
-      }
-    }
-    else if (isa<StoreInst>(U))
-      NumStores++;
-    else if (isa<CallBase>(U))
-      NumCalls++;
-    else
-      NumOthers++;
-  }
-  dbgs() << " IVUpdates: " << IVUpdates
-         << " NumLoads: " << NumLoads
-         << " NumStores: " << NumStores
-         << " NumCalls: " << NumCalls
-         << " NumOthers: " << NumOthers;
-
-  return IVUpdates > 0; //  && NumStores == IVUpdates;
+  return false;
 }
 
-#include "llvm/IR/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-static cl::opt<bool> ONLYINLOOPS("onlyinloops", cl::init(false));
-static cl::opt<unsigned> NUMGEPIDX("numgepidx", cl::init(0));
-static cl::opt<bool> ONLYIVUPDATES("onlyivupdates", cl::init(true));
-
 unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
   unsigned Bonus = 0;
   const Function *Caller = CB->getParent()->getParent();
@@ -155,46 +116,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     return 0;
   const Module *M = Caller->getParent();
 
-  // if (strcmp(Caller->getName().data(), "_Z6searchP7state_tiiiii") == 0 &&
-  //     strcmp(Callee->getName().data(), "_ZL15remove_one_fastPiS_S_i") == 0) {
-  //   if (!ONLYINLOOPS)
-  //     return 1000;
-  //   else {
-  //     DominatorTree DT(*(const_cast<Function *>(Caller)));
-  //     LoopInfo LI(DT);
-  //     const BasicBlock *CallBB = CB->getParent();
-  //     if (LI.getLoopFor(CallBB) != nullptr)
-  //       return 1000;
-  //   }
-  // }
-
-  // Give bonus for an integer alloca which is used as a GEP index in
-  // caller....XXX
-  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
-    Value *CallerArg = CB->getArgOperand(OpIdx);
-    Argument *CalleeArg = Callee->getArg(OpIdx);
-    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
-      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
-        unsigned NumGEPIdxUses = 0;
-        getNumGEPIndexUses(CallerArg, NumGEPIdxUses);
-        dbgs() << "ALLOCA-ARG: "
-               << Caller->getName().data() << " / "
-               << Callee->getName().data() << " : "
-               << *AI->getAllocatedType()
-               << " NumGEPIdxUses: " << NumGEPIdxUses
-               << " IV: ";
-        bool IsIVUpdate = looksLikeIVUpdate(Callee, CalleeArg);
-        DominatorTree DT(*(const_cast<Function *>(Caller)));
-        LoopInfo LI(DT);
-        bool InLoop = (LI.getLoopFor(CB->getParent()) != nullptr);
-        dbgs() << " InLoop: " << InLoop << "\n";
-        if ((NumGEPIdxUses > NUMGEPIDX) &&
-            (IsIVUpdate || !ONLYIVUPDATES) &&
-            (InLoop || !ONLYINLOOPS))
-          return 1000;
-      }
-  }
-
   // Increase the threshold if an incoming argument is used only as a memcpy
   // source.
   for (const Argument &Arg : Callee->args()) {
@@ -244,6 +165,22 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     Bonus += NumStores * 50;
   Bonus = std::min(Bonus, unsigned(1000));
 
+  // Give bonus for an integer alloca which is used as a GEP index in caller
+  // and is updated like an IV in memory in callee. This will help LSR.
+  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+    Value *CallerArg = CB->getArgOperand(OpIdx);
+    Argument *CalleeArg = Callee->getArg(OpIdx);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+        unsigned NumGEPIdxUses = 0;
+        getNumGEPIndexUses(AI, NumGEPIdxUses);
+        if (NumGEPIdxUses && looksLikeIVUpdate(Callee, CalleeArg)) {
+          Bonus = 1000;
+          break;
+        }
+      }
+  }
+
   LLVM_DEBUG(if (Bonus)
                dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
   return Bonus;
diff --git a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
index f7c83c7af7021b..80ef5463451c39 100644
--- a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
+++ b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
@@ -163,3 +163,31 @@ if.end100:                                        ; preds = %if.else79, %if.then
   %cmp181 = fcmp olt double %mul155, 0.000000e+00
   br label %common.ret
 }
+
+; Check that the inlining threshold is incremented for a function that is
+; accessing an alloca (used for addressing) of the caller by making a simple
+; IV update.
+;
+; CHECK: Inlining calls in: Caller3
+; CHECK: ++ SZTTI Adding inlining bonus: 1000
+
+define dso_local void @Caller3(ptr %Dst) {
+  %A = alloca i32
+  store i32 0, ptr %A
+  br label %loop
+
+loop:
+  %L = load i32, ptr %A
+  %SE = sext i32 %L to i64
+  %GEP = getelementptr  [240 x i32], ptr %Dst, i64 0, i64 %SE
+  store i64 0, ptr %GEP
+  call void @Callee3(ptr %A)
+  br label %loop
+}
+
+define void @Callee3(ptr %0) {
+  %L = load i32, ptr %0
+  %A = add nsw i32 %L, 1
+  store i32 %A, ptr %0
+  ret void
+}

>From 7860adcfb1505ddef3bbe9cc5488f9610d8e58f4 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Tue, 22 Oct 2024 15:20:53 +0200
Subject: [PATCH 3/3] Updated per review

---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 128135199acf41..e3eee6bb45100b 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -74,20 +74,17 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
     }
 }
 
-static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
+static bool usedAsGEPIndex(const Value *V) {
+  assert(V->getType()->isIntegerTy() && "Expected an integer value.");
   for (const User *U : V->users()) {
-    if (const auto *LI = dyn_cast<LoadInst>(U)) {
-      assert(isa<AllocaInst>(V) && LI->getType()->isIntegerTy() &&
-             "Expected a load only from the alloca, with integer type.");
-      getNumGEPIndexUses(LI, NumGEPIdxUses);
-    }
-    else if (const auto *SExtI = dyn_cast<SExtInst>(U))
-      getNumGEPIndexUses(SExtI, NumGEPIdxUses);
+    if (const auto *SExtI = dyn_cast<SExtInst>(U))
+      return usedAsGEPIndex(SExtI);
     else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
-      getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
+      return usedAsGEPIndex(ZExtI);
     else if (isa<GetElementPtrInst>(U))
-      NumGEPIdxUses++;
+      return true;
   }
+  return false;
 }
 
 // Return true if Arg is used in a Load; Add/Sub; Store sequence.
@@ -172,9 +169,14 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     Argument *CalleeArg = Callee->getArg(OpIdx);
     if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
       if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
-        unsigned NumGEPIdxUses = 0;
-        getNumGEPIndexUses(AI, NumGEPIdxUses);
-        if (NumGEPIdxUses && looksLikeIVUpdate(Callee, CalleeArg)) {
+        bool UsedAsGEPIndex = false;
+        for (const User *U : AI->users())
+          if (const auto *LI = dyn_cast<LoadInst>(U))
+            if (usedAsGEPIndex(LI)) {
+              UsedAsGEPIndex = true;
+              break;
+            }
+        if (UsedAsGEPIndex && looksLikeIVUpdate(Callee, CalleeArg)) {
           Bonus = 1000;
           break;
         }



More information about the llvm-commits mailing list