[llvm] [SystemZ] Add inlining heuristic for IV update in callee. (PR #113135)

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 21 02:03:14 PDT 2024


https://github.com/JonPsson1 created https://github.com/llvm/llvm-project/pull/113135

Give bonus in adjustInliningThreshold() for an integer alloca which is used as
a GEP index in caller and passed to callee where it is updated like an IV in
memory. This will help LSR as it can then get the SCEV representing the
increment.

In the majority of cases, these calls are inside loops as one might expect
given that callee is updating the value with a simple 'load, add +1, store'
sequence.

This was also tried with any pointer (not just pointers to an alloca),but this
did not seem worth much (minor, mixed results) so the patch is instead kept
more specific.

This was found in deepsjeng (search.cpp), as a slight regression was noted
after removing the general threshold multiplier recently (76aa370).

@dominik-steenken 

>From 5cb39ef1b1e1728ef1993dbdebf82cb21a4e48ca Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Wed, 16 Oct 2024 17:45:36 +0200
Subject: [PATCH 1/2] Inline callee with IV update with caller alloca.

---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 113 ++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 7e5728c40950ad..aa8b77e749b713 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -74,6 +74,79 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
     }
 }
 
+static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
+  for (const User *U : V->users()) {
+    if (const auto *LI = dyn_cast<LoadInst>(U)) {
+      assert(isa<AllocaInst>(V) && LI->getType()->isIntegerTy() &&
+             "Expected a load only from the alloca, with integer type.");
+      getNumGEPIndexUses(LI, NumGEPIdxUses);
+    }
+    else if (const auto *SExtI = dyn_cast<SExtInst>(U))
+      getNumGEPIndexUses(SExtI, NumGEPIdxUses);
+    else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
+      getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
+    else if (const auto *TruncI = dyn_cast<TruncInst>(U))
+      getNumGEPIndexUses(TruncI, NumGEPIdxUses);  // XXX Effective?
+    else if (isa<GetElementPtrInst>(U))
+      NumGEPIdxUses++;
+  }
+}
+
+// Only one load? one store?
+// Only constants?
+static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
+  assert(Arg->getType()->isPointerTy() && "Expecting alloca (ptr).");
+  // Load *Arg -> Add/Sub Constant -> Store *Arg
+  unsigned IVUpdates = 0;
+  unsigned NumLoads = 0;
+  unsigned NumStores = 0;
+  unsigned NumCalls = 0;
+  unsigned NumOthers = 0;
+  for (const User *U : Arg->users()) {
+    if (const auto *LI = dyn_cast<LoadInst>(U)) {
+      if (!LI->getType()->isIntegerTy()) {
+        NumOthers++;
+        continue;
+      }
+      NumLoads++;
+      for (const User *U2 : LI->users()) {
+        const Instruction *LdUser = cast<Instruction>(U2);
+        if ((LdUser->getOpcode() == Instruction::Add) // &&
+             // (isa<Constant>(LdUser->getOperand(0)) ||
+             //  isa<Constant>(LdUser->getOperand(1))))
+            ||
+            (LdUser->getOpcode() == Instruction::Sub) // &&
+             // isa<Constant>(LdUser->getOperand(1)))
+          ) {
+          for (const User *U3 : LdUser->users())
+            if (const auto *SI = dyn_cast<StoreInst>(U3))
+              if (SI->getPointerOperand() == Arg)
+                IVUpdates++;
+        }
+      }
+    }
+    else if (isa<StoreInst>(U))
+      NumStores++;
+    else if (isa<CallBase>(U))
+      NumCalls++;
+    else
+      NumOthers++;
+  }
+  dbgs() << " IVUpdates: " << IVUpdates
+         << " NumLoads: " << NumLoads
+         << " NumStores: " << NumStores
+         << " NumCalls: " << NumCalls
+         << " NumOthers: " << NumOthers;
+
+  return IVUpdates > 0; //  && NumStores == IVUpdates;
+}
+
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+static cl::opt<bool> ONLYINLOOPS("onlyinloops", cl::init(false));
+static cl::opt<unsigned> NUMGEPIDX("numgepidx", cl::init(0));
+static cl::opt<bool> ONLYIVUPDATES("onlyivupdates", cl::init(true));
+
 unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
   unsigned Bonus = 0;
   const Function *Caller = CB->getParent()->getParent();
@@ -82,6 +155,46 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     return 0;
   const Module *M = Caller->getParent();
 
+  // if (strcmp(Caller->getName().data(), "_Z6searchP7state_tiiiii") == 0 &&
+  //     strcmp(Callee->getName().data(), "_ZL15remove_one_fastPiS_S_i") == 0) {
+  //   if (!ONLYINLOOPS)
+  //     return 1000;
+  //   else {
+  //     DominatorTree DT(*(const_cast<Function *>(Caller)));
+  //     LoopInfo LI(DT);
+  //     const BasicBlock *CallBB = CB->getParent();
+  //     if (LI.getLoopFor(CallBB) != nullptr)
+  //       return 1000;
+  //   }
+  // }
+
+  // Give bonus for an integer alloca which is used as a GEP index in
+  // caller....XXX
+  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+    Value *CallerArg = CB->getArgOperand(OpIdx);
+    Argument *CalleeArg = Callee->getArg(OpIdx);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+        unsigned NumGEPIdxUses = 0;
+        getNumGEPIndexUses(CallerArg, NumGEPIdxUses);
+        dbgs() << "ALLOCA-ARG: "
+               << Caller->getName().data() << " / "
+               << Callee->getName().data() << " : "
+               << *AI->getAllocatedType()
+               << " NumGEPIdxUses: " << NumGEPIdxUses
+               << " IV: ";
+        bool IsIVUpdate = looksLikeIVUpdate(Callee, CalleeArg);
+        DominatorTree DT(*(const_cast<Function *>(Caller)));
+        LoopInfo LI(DT);
+        bool InLoop = (LI.getLoopFor(CB->getParent()) != nullptr);
+        dbgs() << " InLoop: " << InLoop << "\n";
+        if ((NumGEPIdxUses > NUMGEPIDX) &&
+            (IsIVUpdate || !ONLYIVUPDATES) &&
+            (InLoop || !ONLYINLOOPS))
+          return 1000;
+      }
+  }
+
   // Increase the threshold if an incoming argument is used only as a memcpy
   // source.
   for (const Argument &Arg : Callee->args()) {

>From e0030496a2510604ae2f2262729bed433499f945 Mon Sep 17 00:00:00 2001
From: Jonas Paulsson <paulson1 at linux.ibm.com>
Date: Fri, 18 Oct 2024 09:29:37 +0200
Subject: [PATCH 2/2] cleanup

---
 .../SystemZ/SystemZTargetTransformInfo.cpp    | 123 +++++-------------
 .../CodeGen/SystemZ/inline-thresh-adjust.ll   |  28 ++++
 2 files changed, 58 insertions(+), 93 deletions(-)

diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index aa8b77e749b713..128135199acf41 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -85,68 +85,29 @@ static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
       getNumGEPIndexUses(SExtI, NumGEPIdxUses);
     else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
       getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
-    else if (const auto *TruncI = dyn_cast<TruncInst>(U))
-      getNumGEPIndexUses(TruncI, NumGEPIdxUses);  // XXX Effective?
     else if (isa<GetElementPtrInst>(U))
       NumGEPIdxUses++;
   }
 }
 
-// Only one load? one store?
-// Only constants?
+// Return true if Arg is used in a Load; Add/Sub; Store sequence.
 static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
-  assert(Arg->getType()->isPointerTy() && "Expecting alloca (ptr).");
-  // Load *Arg -> Add/Sub Constant -> Store *Arg
-  unsigned IVUpdates = 0;
-  unsigned NumLoads = 0;
-  unsigned NumStores = 0;
-  unsigned NumCalls = 0;
-  unsigned NumOthers = 0;
-  for (const User *U : Arg->users()) {
-    if (const auto *LI = dyn_cast<LoadInst>(U)) {
-      if (!LI->getType()->isIntegerTy()) {
-        NumOthers++;
-        continue;
-      }
-      NumLoads++;
-      for (const User *U2 : LI->users()) {
-        const Instruction *LdUser = cast<Instruction>(U2);
-        if ((LdUser->getOpcode() == Instruction::Add) // &&
-             // (isa<Constant>(LdUser->getOperand(0)) ||
-             //  isa<Constant>(LdUser->getOperand(1))))
-            ||
-            (LdUser->getOpcode() == Instruction::Sub) // &&
-             // isa<Constant>(LdUser->getOperand(1)))
-          ) {
-          for (const User *U3 : LdUser->users())
-            if (const auto *SI = dyn_cast<StoreInst>(U3))
-              if (SI->getPointerOperand() == Arg)
-                IVUpdates++;
+  assert(Arg->getType()->isPointerTy() && "Expecting ptr arg.");
+  for (const User *ArgU : Arg->users())
+    if (const auto *LoadI = dyn_cast<LoadInst>(ArgU))
+      if (LoadI->getType()->isIntegerTy())
+        for (const User *LdU : LoadI->users()) {
+          const Instruction *AddSubI = cast<Instruction>(LdU);
+          if (AddSubI->getOpcode() == Instruction::Add ||
+              AddSubI->getOpcode() == Instruction::Sub)
+            for (const User *ASU : AddSubI->users())
+              if (const auto *StoreI = dyn_cast<StoreInst>(ASU))
+                if (StoreI->getPointerOperand() == Arg)
+                  return true;
         }
-      }
-    }
-    else if (isa<StoreInst>(U))
-      NumStores++;
-    else if (isa<CallBase>(U))
-      NumCalls++;
-    else
-      NumOthers++;
-  }
-  dbgs() << " IVUpdates: " << IVUpdates
-         << " NumLoads: " << NumLoads
-         << " NumStores: " << NumStores
-         << " NumCalls: " << NumCalls
-         << " NumOthers: " << NumOthers;
-
-  return IVUpdates > 0; //  && NumStores == IVUpdates;
+  return false;
 }
 
-#include "llvm/IR/Dominators.h"
-#include "llvm/Analysis/LoopInfo.h"
-static cl::opt<bool> ONLYINLOOPS("onlyinloops", cl::init(false));
-static cl::opt<unsigned> NUMGEPIDX("numgepidx", cl::init(0));
-static cl::opt<bool> ONLYIVUPDATES("onlyivupdates", cl::init(true));
-
 unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
   unsigned Bonus = 0;
   const Function *Caller = CB->getParent()->getParent();
@@ -155,46 +116,6 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     return 0;
   const Module *M = Caller->getParent();
 
-  // if (strcmp(Caller->getName().data(), "_Z6searchP7state_tiiiii") == 0 &&
-  //     strcmp(Callee->getName().data(), "_ZL15remove_one_fastPiS_S_i") == 0) {
-  //   if (!ONLYINLOOPS)
-  //     return 1000;
-  //   else {
-  //     DominatorTree DT(*(const_cast<Function *>(Caller)));
-  //     LoopInfo LI(DT);
-  //     const BasicBlock *CallBB = CB->getParent();
-  //     if (LI.getLoopFor(CallBB) != nullptr)
-  //       return 1000;
-  //   }
-  // }
-
-  // Give bonus for an integer alloca which is used as a GEP index in
-  // caller....XXX
-  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
-    Value *CallerArg = CB->getArgOperand(OpIdx);
-    Argument *CalleeArg = Callee->getArg(OpIdx);
-    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
-      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
-        unsigned NumGEPIdxUses = 0;
-        getNumGEPIndexUses(CallerArg, NumGEPIdxUses);
-        dbgs() << "ALLOCA-ARG: "
-               << Caller->getName().data() << " / "
-               << Callee->getName().data() << " : "
-               << *AI->getAllocatedType()
-               << " NumGEPIdxUses: " << NumGEPIdxUses
-               << " IV: ";
-        bool IsIVUpdate = looksLikeIVUpdate(Callee, CalleeArg);
-        DominatorTree DT(*(const_cast<Function *>(Caller)));
-        LoopInfo LI(DT);
-        bool InLoop = (LI.getLoopFor(CB->getParent()) != nullptr);
-        dbgs() << " InLoop: " << InLoop << "\n";
-        if ((NumGEPIdxUses > NUMGEPIDX) &&
-            (IsIVUpdate || !ONLYIVUPDATES) &&
-            (InLoop || !ONLYINLOOPS))
-          return 1000;
-      }
-  }
-
   // Increase the threshold if an incoming argument is used only as a memcpy
   // source.
   for (const Argument &Arg : Callee->args()) {
@@ -244,6 +165,22 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     Bonus += NumStores * 50;
   Bonus = std::min(Bonus, unsigned(1000));
 
+  // Give bonus for an integer alloca which is used as a GEP index in caller
+  // and is updated like an IV in memory in callee. This will help LSR.
+  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+    Value *CallerArg = CB->getArgOperand(OpIdx);
+    Argument *CalleeArg = Callee->getArg(OpIdx);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+        unsigned NumGEPIdxUses = 0;
+        getNumGEPIndexUses(AI, NumGEPIdxUses);
+        if (NumGEPIdxUses && looksLikeIVUpdate(Callee, CalleeArg)) {
+          Bonus = 1000;
+          break;
+        }
+      }
+  }
+
   LLVM_DEBUG(if (Bonus)
                dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
   return Bonus;
diff --git a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
index f7c83c7af7021b..80ef5463451c39 100644
--- a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
+++ b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
@@ -163,3 +163,31 @@ if.end100:                                        ; preds = %if.else79, %if.then
   %cmp181 = fcmp olt double %mul155, 0.000000e+00
   br label %common.ret
 }
+
+; Check that the inlining threshold is incremented for a function that is
+; accessing an alloca (used for addressing) of the caller by making a simple
+; IV update.
+;
+; CHECK: Inlining calls in: Caller3
+; CHECK: ++ SZTTI Adding inlining bonus: 1000
+
+define dso_local void @Caller3(ptr %Dst) {
+  %A = alloca i32
+  store i32 0, ptr %A
+  br label %loop
+
+loop:
+  %L = load i32, ptr %A
+  %SE = sext i32 %L to i64
+  %GEP = getelementptr  [240 x i32], ptr %Dst, i64 0, i64 %SE
+  store i64 0, ptr %GEP
+  call void @Callee3(ptr %A)
+  br label %loop
+}
+
+define void @Callee3(ptr %0) {
+  %L = load i32, ptr %0
+  %A = add nsw i32 %L, 1
+  store i32 %A, ptr %0
+  ret void
+}



More information about the llvm-commits mailing list