[llvm] [SystemZ] Add inlining heuristic for IV update in callee. (PR #113135)

via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 21 02:03:50 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-systemz

Author: Jonas Paulsson (JonPsson1)

<details>
<summary>Changes</summary>

Give bonus in adjustInliningThreshold() for an integer alloca which is used as
a GEP index in caller and passed to callee where it is updated like an IV in
memory. This will help LSR as it can then get the SCEV representing the
increment.

In the majority of cases, these calls are inside loops as one might expect
given that callee is updating the value with a simple 'load, add +1, store'
sequence.

This was also tried with any pointer (not just pointers to an alloca),but this
did not seem worth much (minor, mixed results) so the patch is instead kept
more specific.

This was found in deepsjeng (search.cpp), as a slight regression was noted
after removing the general threshold multiplier recently (76aa370).

@<!-- -->dominik-steenken 

---
Full diff: https://github.com/llvm/llvm-project/pull/113135.diff


2 Files Affected:

- (modified) llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (+50) 
- (modified) llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll (+28) 


``````````diff
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 7e5728c40950ad..128135199acf41 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -74,6 +74,40 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
     }
 }
 
+static void getNumGEPIndexUses(const Value *V, unsigned &NumGEPIdxUses) {
+  for (const User *U : V->users()) {
+    if (const auto *LI = dyn_cast<LoadInst>(U)) {
+      assert(isa<AllocaInst>(V) && LI->getType()->isIntegerTy() &&
+             "Expected a load only from the alloca, with integer type.");
+      getNumGEPIndexUses(LI, NumGEPIdxUses);
+    }
+    else if (const auto *SExtI = dyn_cast<SExtInst>(U))
+      getNumGEPIndexUses(SExtI, NumGEPIdxUses);
+    else if (const auto *ZExtI = dyn_cast<ZExtInst>(U))
+      getNumGEPIndexUses(ZExtI, NumGEPIdxUses);
+    else if (isa<GetElementPtrInst>(U))
+      NumGEPIdxUses++;
+  }
+}
+
+// Return true if Arg is used in a Load; Add/Sub; Store sequence.
+static bool looksLikeIVUpdate(const Function *F, const Value *Arg) {
+  assert(Arg->getType()->isPointerTy() && "Expecting ptr arg.");
+  for (const User *ArgU : Arg->users())
+    if (const auto *LoadI = dyn_cast<LoadInst>(ArgU))
+      if (LoadI->getType()->isIntegerTy())
+        for (const User *LdU : LoadI->users()) {
+          const Instruction *AddSubI = cast<Instruction>(LdU);
+          if (AddSubI->getOpcode() == Instruction::Add ||
+              AddSubI->getOpcode() == Instruction::Sub)
+            for (const User *ASU : AddSubI->users())
+              if (const auto *StoreI = dyn_cast<StoreInst>(ASU))
+                if (StoreI->getPointerOperand() == Arg)
+                  return true;
+        }
+  return false;
+}
+
 unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
   unsigned Bonus = 0;
   const Function *Caller = CB->getParent()->getParent();
@@ -131,6 +165,22 @@ unsigned SystemZTTIImpl::adjustInliningThreshold(const CallBase *CB) const {
     Bonus += NumStores * 50;
   Bonus = std::min(Bonus, unsigned(1000));
 
+  // Give bonus for an integer alloca which is used as a GEP index in caller
+  // and is updated like an IV in memory in callee. This will help LSR.
+  for (unsigned OpIdx = 0; OpIdx != Callee->arg_size(); ++OpIdx) {
+    Value *CallerArg = CB->getArgOperand(OpIdx);
+    Argument *CalleeArg = Callee->getArg(OpIdx);
+    if (const AllocaInst *AI = dyn_cast<AllocaInst>(CallerArg))
+      if (AI->getAllocatedType()->isIntegerTy() && !AI->isArrayAllocation()) {
+        unsigned NumGEPIdxUses = 0;
+        getNumGEPIndexUses(AI, NumGEPIdxUses);
+        if (NumGEPIdxUses && looksLikeIVUpdate(Callee, CalleeArg)) {
+          Bonus = 1000;
+          break;
+        }
+      }
+  }
+
   LLVM_DEBUG(if (Bonus)
                dbgs() << "++ SZTTI Adding inlining bonus: " << Bonus << "\n";);
   return Bonus;
diff --git a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
index f7c83c7af7021b..80ef5463451c39 100644
--- a/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
+++ b/llvm/test/CodeGen/SystemZ/inline-thresh-adjust.ll
@@ -163,3 +163,31 @@ if.end100:                                        ; preds = %if.else79, %if.then
   %cmp181 = fcmp olt double %mul155, 0.000000e+00
   br label %common.ret
 }
+
+; Check that the inlining threshold is incremented for a function that is
+; accessing an alloca (used for addressing) of the caller by making a simple
+; IV update.
+;
+; CHECK: Inlining calls in: Caller3
+; CHECK: ++ SZTTI Adding inlining bonus: 1000
+
+define dso_local void @Caller3(ptr %Dst) {
+  %A = alloca i32
+  store i32 0, ptr %A
+  br label %loop
+
+loop:
+  %L = load i32, ptr %A
+  %SE = sext i32 %L to i64
+  %GEP = getelementptr  [240 x i32], ptr %Dst, i64 0, i64 %SE
+  store i64 0, ptr %GEP
+  call void @Callee3(ptr %A)
+  br label %loop
+}
+
+define void @Callee3(ptr %0) {
+  %L = load i32, ptr %0
+  %A = add nsw i32 %L, 1
+  store i32 %A, ptr %0
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/113135


More information about the llvm-commits mailing list