[llvm] [ArgPromotion] Handle pointer arguments of recursive calls (PR #78735)

Vedant Paranjape via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 11 06:11:20 PDT 2024


https://github.com/vedantparanjape-amd updated https://github.com/llvm/llvm-project/pull/78735

>From e00aa9910c42699fceac6f1af591a8c94bc16b71 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 18 Jan 2024 19:52:06 +0000
Subject: [PATCH 01/19] [ArgPromotion] Handle pointer arguments of recursive
 calls

Argument promotion doesn't handle recursive function calls to promote
arguments. This patch adds functionality to handle self recursive
function calls, i.e. whose SCC size is 1. Due to complexity of Value
Tracking in recursive calls with SCC size greater than 1, we bail out
in such cases.
---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 62 +++++++++++++++++-
 .../argpromotion-recursion-pr1259.ll          | 65 +++++++++++++++++++
 2 files changed, 124 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index f7d86bcdd0de9..1b7d3c145a65a 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -447,6 +447,7 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg,
 /// parts it can be promoted into.
 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
                          unsigned MaxElements, bool IsRecursive,
+                         bool IsSelfRecursive,
                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
   // Quick exit for unused arguments
   if (Arg->use_empty())
@@ -611,13 +612,61 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
       // unknown users
     }
 
+    auto *CB = dyn_cast<CallBase>(V);
+    Value *PtrArg = dyn_cast<Value>(U);
+    if (IsSelfRecursive && CB && PtrArg) {
+      Type *PtrTy = PtrArg->getType();
+      Align PtrAlign = PtrArg->getPointerAlignment(DL);
+      APInt Offset(DL.getIndexTypeSizeInBits(PtrArg->getType()), 0);
+      PtrArg = PtrArg->stripAndAccumulateConstantOffsets(
+          DL, Offset,
+          /* AllowNonInbounds= */ true);
+      if (PtrArg != Arg)
+        return false;
+
+      if (Offset.getSignificantBits() >= 64)
+        return false;
+
+      int64_t Off = Offset.getSExtValue();
+      auto Pair = ArgParts.try_emplace(Off, ArgPart{PtrTy, PtrAlign, nullptr});
+      ArgPart &Part = Pair.first->second;
+
+      // We limit promotion to only promoting up to a fixed number of elements
+      // of the aggregate.
+      if (MaxElements > 0 && ArgParts.size() > MaxElements) {
+        LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+                          << "more than " << MaxElements << " parts\n");
+        return false;
+      }
+
+      Part.Alignment = std::max(Part.Alignment, PtrAlign);
+      continue;
+    }
     // Unknown user.
     LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
                       << "unknown user " << *V << "\n");
     return false;
   }
 
-  if (NeededDerefBytes || NeededAlign > 1) {
+  // Incase of functions with recursive calls, this check will fail when it
+  // tries to look at the first caller of this function. The caller may or may
+  // not have a load, incase it doesn't load the pointer being passed, this
+  // check will fail. So, it's safe to skip the check incase we know that we
+  // are dealing with a recursive call.
+  //
+  // def fun(ptr %a) {
+  //   ...
+  //   %loadres = load i32, ptr %a, align 4
+  //   %res = call i32 @fun(ptr %a)
+  //   ...
+  // }
+  //
+  // def bar(ptr %x) {
+  //   ...
+  //   %resbar = call i32 @fun(ptr %x)
+  //   ...
+  // }
+  if (!IsRecursive && (NeededDerefBytes || NeededAlign > 1)) {
     // Try to prove a required deref / aligned requirement.
     if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
                                                NeededDerefBytes)) {
@@ -700,6 +749,10 @@ static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
 /// calls the DoPromotion method.
 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
                                   unsigned MaxElements, bool IsRecursive) {
+  // Due to complexity of handling cases where the SCC has more than one
+  // component. We want to limit argument promotion of recursive calls to
+  // just functions that directly call themselves.
+  bool IsSelfRecursive = false;
   // Don't perform argument promotion for naked functions; otherwise we can end
   // up removing parameters that are seemingly 'not used' as they are referred
   // to in the assembly.
@@ -745,8 +798,10 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     if (CB->isMustTailCall())
       return nullptr;
 
-    if (CB->getFunction() == F)
+    if (CB->getFunction() == F) {
       IsRecursive = true;
+      IsSelfRecursive = true;
+    }
   }
 
   // Can't change signature of musttail caller
@@ -780,7 +835,8 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     // If we can promote the pointer to its value.
     SmallVector<OffsetAndArgPart, 4> ArgParts;
 
-    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
+    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, IsSelfRecursive,
+                     ArgParts)) {
       SmallVector<Type *, 4> Types;
       for (const auto &Pair : ArgParts)
         Types.push_back(Pair.second.Ty);
diff --git a/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll
new file mode 100644
index 0000000000000..19bb4492171fc
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
+; CHECK-LABEL: define internal i32 @foo(
+; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[SUBVAL:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT:    [[CALLRET:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL]], i32 [[X_0_VAL]])
+; CHECK-NEXT:    [[SUBVAL2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT:    [[CALLRET2:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
+; CHECK-NEXT:    [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[COND_NEXT:.*]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ undef, %[[COND_NEXT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp ne i32 %n, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %val = load i32, ptr %x, align 4
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %val2 = load i32, ptr %x, align 4
+  %subval = sub i32 %n, 1
+  %callret = call i32 @foo(ptr %x, i32 %subval, i32 %val2)
+  %subval2 = sub i32 %n, 2
+  %callret2 = call i32 @foo(ptr %x, i32 %subval2, i32 %m)
+  %cmp2 = add i32 %callret, %callret2
+  br label %return
+
+cond_next:                                        ; No predecessors!
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ undef, %cond_next ]
+  ret i32 %retval.0
+}
+
+define i32 @bar(ptr %x, i32 %n, i32 %m) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: ptr [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo(i32 [[X_VAL]], i32 [[N]], i32 [[M]])
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret i32 [[CALLRET3]]
+;
+entry:
+  %callret3 = call i32 @foo(ptr %x, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %callret3
+}

>From 3f7d63259a5e6ffd4fe34365fa7b78e56bf2ca58 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Mon, 3 Jun 2024 08:23:17 +0000
Subject: [PATCH 02/19] Add a check to stop recursive promotion of ptr args

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 31 ++++++++++++-------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 1b7d3c145a65a..7165a654570f8 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -447,7 +447,6 @@ static bool allCallersPassValidPointerForArgument(Argument *Arg,
 /// parts it can be promoted into.
 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
                          unsigned MaxElements, bool IsRecursive,
-                         bool IsSelfRecursive,
                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
   // Quick exit for unused arguments
   if (Arg->use_empty())
@@ -614,7 +613,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
 
     auto *CB = dyn_cast<CallBase>(V);
     Value *PtrArg = dyn_cast<Value>(U);
-    if (IsSelfRecursive && CB && PtrArg) {
+    if (IsRecursive && CB && PtrArg) {
       Type *PtrTy = PtrArg->getType();
       Align PtrAlign = PtrArg->getPointerAlignment(DL);
       APInt Offset(DL.getIndexTypeSizeInBits(PtrArg->getType()), 0);
@@ -627,6 +626,23 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
       if (Offset.getSignificantBits() >= 64)
         return false;
 
+      // If this is a recursive function and one of the argument types is a
+      // pointer that isn't loaded to a non pointer type, it can lead to
+      // recursive promotion. Look for any Load candidates above the function
+      // call that load a non pointer type from this argument pointer. If we
+      // don't find even one such use, return false. For reference, you can
+      // refer to Transforms/ArgumentPromotion/pr42028-recursion.ll and
+      // Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
+      // testcases.
+      bool doesPointerResolve = false;
+      for (auto Load : Loads)
+        if (Load->getPointerOperand() == PtrArg &&
+            !Load->getType()->isPointerTy())
+          doesPointerResolve = true;
+
+      if (!doesPointerResolve)
+        return false;
+
       int64_t Off = Offset.getSExtValue();
       auto Pair = ArgParts.try_emplace(Off, ArgPart{PtrTy, PtrAlign, nullptr});
       ArgPart &Part = Pair.first->second;
@@ -749,10 +765,6 @@ static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
 /// calls the DoPromotion method.
 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
                                   unsigned MaxElements, bool IsRecursive) {
-  // Due to complexity of handling cases where the SCC has more than one
-  // component. We want to limit argument promotion of recursive calls to
-  // just functions that directly call themselves.
-  bool IsSelfRecursive = false;
   // Don't perform argument promotion for naked functions; otherwise we can end
   // up removing parameters that are seemingly 'not used' as they are referred
   // to in the assembly.
@@ -798,10 +810,8 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     if (CB->isMustTailCall())
       return nullptr;
 
-    if (CB->getFunction() == F) {
+    if (CB->getFunction() == F)
       IsRecursive = true;
-      IsSelfRecursive = true;
-    }
   }
 
   // Can't change signature of musttail caller
@@ -835,8 +845,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     // If we can promote the pointer to its value.
     SmallVector<OffsetAndArgPart, 4> ArgParts;
 
-    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, IsSelfRecursive,
-                     ArgParts)) {
+    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
       SmallVector<Type *, 4> Types;
       for (const auto &Pair : ArgParts)
         Types.push_back(Pair.second.Ty);

>From d3d687254177213768fc0909bfad7152b04b7f69 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Fri, 7 Jun 2024 02:55:01 +0000
Subject: [PATCH 03/19] Address review comments

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 7165a654570f8..6a70ba9ea3f5b 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -612,7 +612,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
     }
 
     auto *CB = dyn_cast<CallBase>(V);
-    Value *PtrArg = dyn_cast<Value>(U);
+    Value *PtrArg = cast<Value>(U);
     if (IsRecursive && CB && PtrArg) {
       Type *PtrTy = PtrArg->getType();
       Align PtrAlign = PtrArg->getPointerAlignment(DL);
@@ -664,7 +664,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
     return false;
   }
 
-  // Incase of functions with recursive calls, this check will fail when it
+  // In case of functions with recursive calls, this check will fail when it
   // tries to look at the first caller of this function. The caller may or may
   // not have a load, incase it doesn't load the pointer being passed, this
   // check will fail. So, it's safe to skip the check incase we know that we

>From ba01424e8d383bd2c83fc8ec982e7e424c2f66d0 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Mon, 10 Jun 2024 05:37:06 +0000
Subject: [PATCH 04/19] Address review 2 comments

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 6a70ba9ea3f5b..42e91b11e336c 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -423,13 +423,16 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
 
 /// Return true if we can prove that all callees pass in a valid pointer for the
 /// specified function argument.
-static bool allCallersPassValidPointerForArgument(Argument *Arg,
-                                                  Align NeededAlign,
-                                                  uint64_t NeededDerefBytes) {
+static bool allCallersPassValidPointerForArgument(
+    Argument *Arg, SmallPtrSet<CallBase *, 4> &RecursiveCalls,
+    Align NeededAlign, uint64_t NeededDerefBytes) {
   Function *Callee = Arg->getParent();
   const DataLayout &DL = Callee->getDataLayout();
   APInt Bytes(64, NeededDerefBytes);
 
+  if (RecursiveCalls.size())
+    return true;
+
   // Check if the argument itself is marked dereferenceable and aligned.
   if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
     return true;
@@ -571,6 +574,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
   SmallVector<const Use *, 16> Worklist;
   SmallPtrSet<const Use *, 16> Visited;
   SmallVector<LoadInst *, 16> Loads;
+  SmallPtrSet<CallBase *, 4> RecursiveCalls;
   auto AppendUses = [&](const Value *V) {
     for (const Use &U : V->uses())
       if (Visited.insert(&U).second)
@@ -644,8 +648,9 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
         return false;
 
       int64_t Off = Offset.getSExtValue();
-      auto Pair = ArgParts.try_emplace(Off, ArgPart{PtrTy, PtrAlign, nullptr});
-      ArgPart &Part = Pair.first->second;
+      if (Off)
+        LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+                          << "pointer offset is not equal to zero\n");
 
       // We limit promotion to only promoting up to a fixed number of elements
       // of the aggregate.
@@ -655,7 +660,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
         return false;
       }
 
-      Part.Alignment = std::max(Part.Alignment, PtrAlign);
+      RecursiveCalls.insert(CB);
       continue;
     }
     // Unknown user.
@@ -682,9 +687,9 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
   //   %resbar = call i32 @fun(ptr %x)
   //   ...
   // }
-  if (!IsRecursive && (NeededDerefBytes || NeededAlign > 1)) {
+  if (NeededDerefBytes || NeededAlign > 1) {
     // Try to prove a required deref / aligned requirement.
-    if (!allCallersPassValidPointerForArgument(Arg, NeededAlign,
+    if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,
                                                NeededDerefBytes)) {
       LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
                         << "not dereferenceable or aligned\n");

>From a183702b2de4e4ba404e85bdf5df47cf83df6996 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Mon, 10 Jun 2024 05:43:01 +0000
Subject: [PATCH 05/19] minor nitpick

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 42e91b11e336c..2f3aa99fb8f37 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -619,8 +619,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
     Value *PtrArg = cast<Value>(U);
     if (IsRecursive && CB && PtrArg) {
       Type *PtrTy = PtrArg->getType();
-      Align PtrAlign = PtrArg->getPointerAlignment(DL);
-      APInt Offset(DL.getIndexTypeSizeInBits(PtrArg->getType()), 0);
+      APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
       PtrArg = PtrArg->stripAndAccumulateConstantOffsets(
           DL, Offset,
           /* AllowNonInbounds= */ true);

>From d5bbcf05c2bef761a791840faca7eaf000a8152c Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Wed, 12 Jun 2024 10:28:03 +0000
Subject: [PATCH 06/19] Address review 3 comments

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 2f3aa99fb8f37..041e7333b109f 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -430,8 +430,8 @@ static bool allCallersPassValidPointerForArgument(
   const DataLayout &DL = Callee->getDataLayout();
   APInt Bytes(64, NeededDerefBytes);
 
-  if (RecursiveCalls.size())
-    return true;
+  // if (RecursiveCalls.size())
+  //   return true;
 
   // Check if the argument itself is marked dereferenceable and aligned.
   if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
@@ -441,6 +441,13 @@ static bool allCallersPassValidPointerForArgument(
   // direct callees.
   return all_of(Callee->users(), [&](User *U) {
     CallBase &CB = cast<CallBase>(*U);
+    if (RecursiveCalls.contains(&CB))
+      return true;
+
+    // if (RecursiveCalls.size() &&
+    //     CB.getCalledFunction()->getName() == Callee->getName())
+    //   return true;
+
     return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
                                               NeededAlign, Bytes, DL);
   });
@@ -647,9 +654,11 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
         return false;
 
       int64_t Off = Offset.getSExtValue();
-      if (Off)
+      if (Off) {
         LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
                           << "pointer offset is not equal to zero\n");
+        return false;
+      }
 
       // We limit promotion to only promoting up to a fixed number of elements
       // of the aggregate.

>From b7c41292553098e2606785e63893639b23a84e92 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Wed, 12 Jun 2024 19:05:55 +0000
Subject: [PATCH 07/19] Address review 4 comments

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 041e7333b109f..e217bac2174de 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -660,6 +660,13 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
         return false;
       }
 
+      unsigned int ArgNo = Arg->getArgNo();
+      if (CB->getArgOperand(ArgNo) != Arg) {
+        LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
+                          << "arg position is different in callee\n");
+        return false;
+      }
+
       // We limit promotion to only promoting up to a fixed number of elements
       // of the aggregate.
       if (MaxElements > 0 && ArgParts.size() > MaxElements) {

>From 979dd21a96471f85ef136a5c88233a7ecbdccf9d Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Wed, 12 Jun 2024 19:52:08 +0000
Subject: [PATCH 08/19] Refactor and add comments, also address review comments

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 85 ++++++++++---------
 .../argpromotion-recursion-pr1259.ll          |  8 +-
 2 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index e217bac2174de..3d1b1cf4f53e1 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -421,6 +421,26 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
   return NF;
 }
 
+/// Returns true if the Ptr is loaded by any Load in the vector of
+/// Loads, and if the Loaded value is not a pointer.
+static bool checkIfPointerIsDereferenced(SmallVector<LoadInst *, 16> &Loads,
+                                         const Value *Ptr) {
+  // If this is a recursive function and one of the argument types is a
+  // pointer that isn't loaded to a non pointer type, it can lead to
+  // recursive promotion. Look for any Load candidates above the function
+  // call that load a non pointer type from this argument pointer. If we
+  // don't find even one such use, return false. For reference, you can
+  // refer to Transforms/ArgumentPromotion/pr42028-recursion.ll and
+  // Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
+  // testcases.
+  bool doesPointerResolve = false;
+  for (auto Load : Loads)
+    if (Load->getPointerOperand() == Ptr && !Load->getType()->isPointerTy())
+      doesPointerResolve = true;
+
+  return doesPointerResolve;
+}
+
 /// Return true if we can prove that all callees pass in a valid pointer for the
 /// specified function argument.
 static bool allCallersPassValidPointerForArgument(
@@ -430,9 +450,6 @@ static bool allCallersPassValidPointerForArgument(
   const DataLayout &DL = Callee->getDataLayout();
   APInt Bytes(64, NeededDerefBytes);
 
-  // if (RecursiveCalls.size())
-  //   return true;
-
   // Check if the argument itself is marked dereferenceable and aligned.
   if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))
     return true;
@@ -441,13 +458,33 @@ static bool allCallersPassValidPointerForArgument(
   // direct callees.
   return all_of(Callee->users(), [&](User *U) {
     CallBase &CB = cast<CallBase>(*U);
+    // In case of functions with recursive calls, this check
+    // (isDereferenceableAndAlignedPointer) will fail when it tries to look at
+    // the first caller of this function. The caller may or may not have a load,
+    // incase it doesn't load the pointer being passed, this check will fail.
+    // So, it's safe to skip the check incase we know that we are dealing with a
+    // recursive call. For example we have a IR given below.
+    //
+    // def fun(ptr %a) {
+    //   ...
+    //   %loadres = load i32, ptr %a, align 4
+    //   %res = call i32 @fun(ptr %a)
+    //   ...
+    // }
+    //
+    // def bar(ptr %x) {
+    //   ...
+    //   %resbar = call i32 @fun(ptr %x)
+    //   ...
+    // }
+    //
+    // Since we record processed recursive calls, we check if the current
+    // CallBase has been processed before. If yes it means that it is a
+    // recursive call and we can skip the check just for this call. So, just
+    // return true.
     if (RecursiveCalls.contains(&CB))
       return true;
 
-    // if (RecursiveCalls.size() &&
-    //     CB.getCalledFunction()->getName() == Callee->getName())
-    //   return true;
-
     return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),
                                               NeededAlign, Bytes, DL);
   });
@@ -636,21 +673,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
       if (Offset.getSignificantBits() >= 64)
         return false;
 
-      // If this is a recursive function and one of the argument types is a
-      // pointer that isn't loaded to a non pointer type, it can lead to
-      // recursive promotion. Look for any Load candidates above the function
-      // call that load a non pointer type from this argument pointer. If we
-      // don't find even one such use, return false. For reference, you can
-      // refer to Transforms/ArgumentPromotion/pr42028-recursion.ll and
-      // Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
-      // testcases.
-      bool doesPointerResolve = false;
-      for (auto Load : Loads)
-        if (Load->getPointerOperand() == PtrArg &&
-            !Load->getType()->isPointerTy())
-          doesPointerResolve = true;
-
-      if (!doesPointerResolve)
+      if (!checkIfPointerIsDereferenced(Loads, PtrArg))
         return false;
 
       int64_t Off = Offset.getSExtValue();
@@ -684,24 +707,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
     return false;
   }
 
-  // In case of functions with recursive calls, this check will fail when it
-  // tries to look at the first caller of this function. The caller may or may
-  // not have a load, incase it doesn't load the pointer being passed, this
-  // check will fail. So, it's safe to skip the check incase we know that we
-  // are dealing with a recursive call.
-  //
-  // def fun(ptr %a) {
-  //   ...
-  //   %loadres = load i32, ptr %a, align 4
-  //   %res = call i32 @fun(ptr %a)
-  //   ...
-  // }
-  //
-  // def bar(ptr %x) {
-  //   ...
-  //   %resbar = call i32 @fun(ptr %x)
-  //   ...
-  // }
   if (NeededDerefBytes || NeededAlign > 1) {
     // Try to prove a required deref / aligned requirement.
     if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,
diff --git a/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll
index 19bb4492171fc..e160dbad92e7b 100644
--- a/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll
@@ -18,7 +18,7 @@ define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
 ; CHECK:       [[COND_NEXT:.*]]:
 ; CHECK-NEXT:    br label %[[RETURN]]
 ; CHECK:       [[RETURN]]:
-; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ undef, %[[COND_NEXT]] ]
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
 ; CHECK-NEXT:    ret i32 [[RETVAL_0]]
 ;
 entry:
@@ -42,13 +42,13 @@ cond_next:                                        ; No predecessors!
   br label %return
 
 return:                                           ; preds = %cond_next, %cond_false, %cond_true
-  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ undef, %cond_next ]
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
   ret i32 %retval.0
 }
 
-define i32 @bar(ptr %x, i32 %n, i32 %m) {
+define i32 @bar(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
 ; CHECK-LABEL: define i32 @bar(
-; CHECK-SAME: ptr [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
 ; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo(i32 [[X_VAL]], i32 [[N]], i32 [[M]])

>From 4457b7ab4640e3741407346ce25b6c41e9da280f Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 13 Jun 2024 21:31:46 +0000
Subject: [PATCH 09/19] Add check for self recursion

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 3d1b1cf4f53e1..9f413f255e5a2 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -494,6 +494,7 @@ static bool allCallersPassValidPointerForArgument(
 /// parts it can be promoted into.
 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
                          unsigned MaxElements, bool IsRecursive,
+                         bool isSelfRecursive,
                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
   // Quick exit for unused arguments
   if (Arg->use_empty())
@@ -661,7 +662,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
 
     auto *CB = dyn_cast<CallBase>(V);
     Value *PtrArg = cast<Value>(U);
-    if (IsRecursive && CB && PtrArg) {
+    if (isSelfRecursive && CB && PtrArg) {
       Type *PtrTy = PtrArg->getType();
       APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
       PtrArg = PtrArg->stripAndAccumulateConstantOffsets(
@@ -673,8 +674,8 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
       if (Offset.getSignificantBits() >= 64)
         return false;
 
-      if (!checkIfPointerIsDereferenced(Loads, PtrArg))
-        return false;
+      // if (!checkIfPointerIsDereferenced(Loads, PtrArg))
+      //   return false;
 
       int64_t Off = Offset.getSExtValue();
       if (Off) {
@@ -790,6 +791,10 @@ static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,
 /// calls the DoPromotion method.
 static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
                                   unsigned MaxElements, bool IsRecursive) {
+  // Due to complexity of handling cases where the SCC has more than one
+  // component. We want to limit argument promotion of recursive calls to
+  // just functions that directly call themselves.
+  bool IsSelfRecursive = false;
   // Don't perform argument promotion for naked functions; otherwise we can end
   // up removing parameters that are seemingly 'not used' as they are referred
   // to in the assembly.
@@ -835,8 +840,10 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     if (CB->isMustTailCall())
       return nullptr;
 
-    if (CB->getFunction() == F)
+    if (CB->getFunction() == F) {
       IsRecursive = true;
+      IsSelfRecursive = true;
+    }
   }
 
   // Can't change signature of musttail caller
@@ -870,7 +877,8 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     // If we can promote the pointer to its value.
     SmallVector<OffsetAndArgPart, 4> ArgParts;
 
-    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
+    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, IsSelfRecursive,
+                     ArgParts)) {
       SmallVector<Type *, 4> Types;
       for (const auto &Pair : ArgParts)
         Types.push_back(Pair.second.Ty);

>From d9ca1244617affa5fc36d9206b472f74856e5f97 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 13 Jun 2024 21:45:26 +0000
Subject: [PATCH 10/19] Add check if call is recursion

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 9f413f255e5a2..6265cdd46b0f9 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -662,7 +662,8 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
 
     auto *CB = dyn_cast<CallBase>(V);
     Value *PtrArg = cast<Value>(U);
-    if (isSelfRecursive && CB && PtrArg) {
+    if (isSelfRecursive && CB && PtrArg &&
+        CB->getCalledFunction() == CB->getFunction()) {
       Type *PtrTy = PtrArg->getType();
       APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
       PtrArg = PtrArg->stripAndAccumulateConstantOffsets(

>From 169eee2bde99351da3a04e0d0e49d7b371dbbf99 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 13 Jun 2024 21:54:53 +0000
Subject: [PATCH 11/19] remove isSelfRecursive check

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 26 +------------------
 1 file changed, 1 insertion(+), 25 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 6265cdd46b0f9..0761b711c0240 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -421,26 +421,6 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
   return NF;
 }
 
-/// Returns true if the Ptr is loaded by any Load in the vector of
-/// Loads, and if the Loaded value is not a pointer.
-static bool checkIfPointerIsDereferenced(SmallVector<LoadInst *, 16> &Loads,
-                                         const Value *Ptr) {
-  // If this is a recursive function and one of the argument types is a
-  // pointer that isn't loaded to a non pointer type, it can lead to
-  // recursive promotion. Look for any Load candidates above the function
-  // call that load a non pointer type from this argument pointer. If we
-  // don't find even one such use, return false. For reference, you can
-  // refer to Transforms/ArgumentPromotion/pr42028-recursion.ll and
-  // Transforms/ArgumentPromotion/2008-09-08-CGUpdateSelfEdge.ll
-  // testcases.
-  bool doesPointerResolve = false;
-  for (auto Load : Loads)
-    if (Load->getPointerOperand() == Ptr && !Load->getType()->isPointerTy())
-      doesPointerResolve = true;
-
-  return doesPointerResolve;
-}
-
 /// Return true if we can prove that all callees pass in a valid pointer for the
 /// specified function argument.
 static bool allCallersPassValidPointerForArgument(
@@ -662,8 +642,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
 
     auto *CB = dyn_cast<CallBase>(V);
     Value *PtrArg = cast<Value>(U);
-    if (isSelfRecursive && CB && PtrArg &&
-        CB->getCalledFunction() == CB->getFunction()) {
+    if (CB && PtrArg && CB->getCalledFunction() == CB->getFunction()) {
       Type *PtrTy = PtrArg->getType();
       APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
       PtrArg = PtrArg->stripAndAccumulateConstantOffsets(
@@ -675,9 +654,6 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
       if (Offset.getSignificantBits() >= 64)
         return false;
 
-      // if (!checkIfPointerIsDereferenced(Loads, PtrArg))
-      //   return false;
-
       int64_t Off = Offset.getSExtValue();
       if (Off) {
         LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "

>From 9dc850457d89fc0c5b8d2dbbf3b39b805214a2df Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 13 Jun 2024 22:19:58 +0000
Subject: [PATCH 12/19] Add check for argument number as well in recursive
 calls

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0761b711c0240..2af1114dd0f92 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -662,7 +662,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
       }
 
       unsigned int ArgNo = Arg->getArgNo();
-      if (CB->getArgOperand(ArgNo) != Arg) {
+      if (CB->getArgOperand(ArgNo) != Arg || U->getOperandNo() != ArgNo) {
         LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
                           << "arg position is different in callee\n");
         return false;

>From fb7b1ad2210dcdec388c7b746ff5ad37fbad0565 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 13 Jun 2024 22:31:37 +0000
Subject: [PATCH 13/19] Added more testcases

---
 .../recursion-arg-position-pr1259.ll          | 68 +++++++++++++++++++
 .../recursion-same-arg-twice-pr1259.ll        | 68 +++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/recursion-arg-position-pr1259.ll
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/recursion-same-arg-twice-pr1259.ll

diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion-arg-position-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/recursion-arg-position-pr1259.ll
new file mode 100644
index 0000000000000..32e1a6f045079
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/recursion-arg-position-pr1259.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
+; CHECK-LABEL: define internal i32 @foo(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[Y]], align 4
+; CHECK-NEXT:    [[SUBVAL:%.*]] = sub i32 [[N]], [[VAL3]]
+; CHECK-NEXT:    [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[SUBVAL]], i32 [[VAL2]])
+; CHECK-NEXT:    [[SUBVAL2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT:    [[CALLRET2:%.*]] = call i32 @foo(ptr [[Y]], ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
+; CHECK-NEXT:    [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[COND_NEXT:.*]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp ne i32 %n, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %val = load i32, ptr %x, align 4
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %val2 = load i32, ptr %x, align 4
+  %val3 = load i32, ptr %y, align 4
+  %subval = sub i32 %n, %val3
+  %callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
+  %subval2 = sub i32 %n, 2
+  %callret2 = call i32 @foo(ptr %y, ptr %x, i32 %subval2, i32 %m)
+  %cmp2 = add i32 %callret, %callret2
+  br label %return
+
+cond_next:                                        ; No predecessors!
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
+  ret i32 %retval.0
+}
+
+define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[N]], i32 [[M]])
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret i32 [[CALLRET3]]
+;
+entry:
+  %callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %callret3
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion-same-arg-twice-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/recursion-same-arg-twice-pr1259.ll
new file mode 100644
index 0000000000000..983bfaa0bb596
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/recursion-same-arg-twice-pr1259.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
+; CHECK-LABEL: define internal i32 @foo(
+; CHECK-SAME: ptr [[X:%.*]], ptr [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[VAL3:%.*]] = load i32, ptr [[Y]], align 4
+; CHECK-NEXT:    [[SUBVAL:%.*]] = sub i32 [[N]], [[VAL3]]
+; CHECK-NEXT:    [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[SUBVAL]], i32 [[VAL2]])
+; CHECK-NEXT:    [[SUBVAL2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT:    [[CALLRET2:%.*]] = call i32 @foo(ptr [[X]], ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
+; CHECK-NEXT:    [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[COND_NEXT:.*]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp ne i32 %n, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %val = load i32, ptr %x, align 4
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %val2 = load i32, ptr %x, align 4
+  %val3 = load i32, ptr %y, align 4
+  %subval = sub i32 %n, %val3
+  %callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
+  %subval2 = sub i32 %n, 2
+  %callret2 = call i32 @foo(ptr %x, ptr %x, i32 %subval2, i32 %m)
+  %cmp2 = add i32 %callret, %callret2
+  br label %return
+
+cond_next:                                        ; No predecessors!
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
+  ret i32 %retval.0
+}
+
+define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], ptr [[Y]], i32 [[N]], i32 [[M]])
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret i32 [[CALLRET3]]
+;
+entry:
+  %callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %callret3
+}

>From 42fa56e85fb1809de9f1a68f19dbe0ffe7ca4084 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Fri, 14 Jun 2024 16:34:39 +0000
Subject: [PATCH 14/19] Add more tests, and move them to a folder

---
 .../argpromotion-recursion-pr1259.ll          |   0
 .../recursion-arg-position-pr1259.ll          |   0
 .../recursion/recursion-mixed-calls.ll        |  84 +++++++++++
 .../recursion/recursion-non-zero-offset.ll    | 134 ++++++++++++++++++
 .../recursion-same-arg-twice-pr1259.ll        |   0
 5 files changed, 218 insertions(+)
 rename llvm/test/Transforms/ArgumentPromotion/{ => recursion}/argpromotion-recursion-pr1259.ll (100%)
 rename llvm/test/Transforms/ArgumentPromotion/{ => recursion}/recursion-arg-position-pr1259.ll (100%)
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/recursion/recursion-non-zero-offset.ll
 rename llvm/test/Transforms/ArgumentPromotion/{ => recursion}/recursion-same-arg-twice-pr1259.ll (100%)

diff --git a/llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/argpromotion-recursion-pr1259.ll
similarity index 100%
rename from llvm/test/Transforms/ArgumentPromotion/argpromotion-recursion-pr1259.ll
rename to llvm/test/Transforms/ArgumentPromotion/recursion/argpromotion-recursion-pr1259.ll
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion-arg-position-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-arg-position-pr1259.ll
similarity index 100%
rename from llvm/test/Transforms/ArgumentPromotion/recursion-arg-position-pr1259.ll
rename to llvm/test/Transforms/ArgumentPromotion/recursion/recursion-arg-position-pr1259.ll
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll
new file mode 100644
index 0000000000000..0ec4137aadeb4
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll
@@ -0,0 +1,84 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+define internal i32 @zoo(ptr %x, i32 %m) {
+; CHECK-LABEL: define internal i32 @zoo(
+; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:    [[RESZOO:%.*]] = add i32 [[X_0_VAL]], [[M]]
+; CHECK-NEXT:    ret i32 [[X_0_VAL]]
+;
+  %valzoo = load i32, ptr %x, align 4
+  %reszoo = add i32 %valzoo, %m
+  ret i32 %valzoo
+}
+
+define internal i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m) {
+; CHECK-LABEL: define internal i32 @foo(
+; CHECK-SAME: ptr [[X:%.*]], i32 [[Y_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[SUBVAL:%.*]] = sub i32 [[N]], [[Y_0_VAL]]
+; CHECK-NEXT:    [[CALLRET:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_0_VAL]], i32 [[SUBVAL]], i32 [[VAL2]])
+; CHECK-NEXT:    [[SUBVAL2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT:    [[CALLRET2:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
+; CHECK-NEXT:    [[CMP1:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
+; CHECK-NEXT:    [[X_VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[CALLRETFINAL:%.*]] = call i32 @zoo(i32 [[X_VAL]], i32 [[M]])
+; CHECK-NEXT:    [[CMP2:%.*]] = add i32 [[CMP1]], [[CALLRETFINAL]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[COND_NEXT:.*]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp ne i32 %n, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %val = load i32, ptr %x, align 4
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %val2 = load i32, ptr %x, align 4
+  %val3 = load i32, ptr %y, align 4
+  %subval = sub i32 %n, %val3
+  %callret = call i32 @foo(ptr %x, ptr %y, i32 %subval, i32 %val2)
+  %subval2 = sub i32 %n, 2
+  %callret2 = call i32 @foo(ptr %x, ptr %y, i32 %subval2, i32 %m)
+  %cmp1 = add i32 %callret, %callret2
+  %callretfinal = call i32 @zoo(ptr %x, i32 %m)
+  %cmp2 = add i32 %cmp1, %callretfinal
+  br label %return
+
+cond_next:                                        ; No predecessors!
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
+  ret i32 %retval.0
+}
+
+define i32 @bar(ptr align(4) dereferenceable(4) %x, ptr align(4) dereferenceable(4) %y, i32 %n, i32 %m) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], ptr align 4 dereferenceable(4) [[Y:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[Y_VAL:%.*]] = load i32, ptr [[Y]], align 4
+; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo(ptr [[X]], i32 [[Y_VAL]], i32 [[N]], i32 [[M]])
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret i32 [[CALLRET3]]
+;
+entry:
+  %callret3 = call i32 @foo(ptr %x, ptr %y, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %callret3
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-non-zero-offset.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-non-zero-offset.ll
new file mode 100644
index 0000000000000..805414de17f13
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-non-zero-offset.ll
@@ -0,0 +1,134 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+define internal i32 @foo(ptr %x, i32 %n, i32 %m) {
+; CHECK-LABEL: define internal i32 @foo(
+; CHECK-SAME: i32 [[X_0_VAL:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[SUBVAL:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT:    [[CALLRET:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL]], i32 [[X_0_VAL]])
+; CHECK-NEXT:    [[SUBVAL2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT:    [[CALLRET2:%.*]] = call i32 @foo(i32 [[X_0_VAL]], i32 [[SUBVAL2]], i32 [[M]])
+; CHECK-NEXT:    [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[COND_NEXT:.*]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[X_0_VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp ne i32 %n, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %val = load i32, ptr %x, align 4
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %val2 = load i32, ptr %x, align 4
+  %subval = sub i32 %n, 1
+  %callret = call i32 @foo(ptr %x, i32 %subval, i32 %val2)
+  %subval2 = sub i32 %n, 2
+  %callret2 = call i32 @foo(ptr %x, i32 %subval2, i32 %m)
+  %cmp2 = add i32 %callret, %callret2
+  br label %return
+
+cond_next:                                        ; No predecessors!
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
+  ret i32 %retval.0
+}
+
+define i32 @bar(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
+; CHECK-LABEL: define i32 @bar(
+; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEPVAL:%.*]] = getelementptr ptr, ptr [[X]], i32 0
+; CHECK-NEXT:    [[GEPVAL_VAL:%.*]] = load i32, ptr [[GEPVAL]], align 4
+; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo(i32 [[GEPVAL_VAL]], i32 [[N]], i32 [[M]])
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret i32 [[CALLRET3]]
+;
+entry:
+  %gepval = getelementptr ptr, ptr %x, i32 0
+  %callret3 = call i32 @foo(ptr %gepval, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %callret3
+}
+
+define internal i32 @foo2(ptr %x, i32 %n, i32 %m) {
+; CHECK-LABEL: define internal i32 @foo2(
+; CHECK-SAME: ptr [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[N]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    [[VAL:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    [[VAL2:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[SUBVAL:%.*]] = sub i32 [[N]], 1
+; CHECK-NEXT:    [[CALLRET:%.*]] = call i32 @foo2(ptr [[X]], i32 [[SUBVAL]], i32 [[VAL2]])
+; CHECK-NEXT:    [[SUBVAL2:%.*]] = sub i32 [[N]], 2
+; CHECK-NEXT:    [[CALLRET2:%.*]] = call i32 @foo2(ptr [[X]], i32 [[SUBVAL2]], i32 [[M]])
+; CHECK-NEXT:    [[CMP2:%.*]] = add i32 [[CALLRET]], [[CALLRET2]]
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[COND_NEXT:.*]]:
+; CHECK-NEXT:    br label %[[RETURN]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    [[RETVAL_0:%.*]] = phi i32 [ [[VAL]], %[[COND_TRUE]] ], [ [[CMP2]], %[[COND_FALSE]] ], [ poison, %[[COND_NEXT]] ]
+; CHECK-NEXT:    ret i32 [[RETVAL_0]]
+;
+entry:
+  %cmp = icmp ne i32 %n, 0
+  br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:                                        ; preds = %entry
+  %val = load i32, ptr %x, align 4
+  br label %return
+
+cond_false:                                       ; preds = %entry
+  %val2 = load i32, ptr %x, align 4
+  %subval = sub i32 %n, 1
+  %callret = call i32 @foo2(ptr %x, i32 %subval, i32 %val2)
+  %subval2 = sub i32 %n, 2
+  %callret2 = call i32 @foo2(ptr %x, i32 %subval2, i32 %m)
+  %cmp2 = add i32 %callret, %callret2
+  br label %return
+
+cond_next:                                        ; No predecessors!
+  br label %return
+
+return:                                           ; preds = %cond_next, %cond_false, %cond_true
+  %retval.0 = phi i32 [ %val, %cond_true ], [ %cmp2, %cond_false ], [ poison, %cond_next ]
+  ret i32 %retval.0
+}
+
+define i32 @bar2(ptr align(4) dereferenceable(4) %x, i32 %n, i32 %m) {
+; CHECK-LABEL: define i32 @bar2(
+; CHECK-SAME: ptr align 4 dereferenceable(4) [[X:%.*]], i32 [[N:%.*]], i32 [[M:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[GEPVAL:%.*]] = getelementptr ptr, ptr [[X]], i32 4
+; CHECK-NEXT:    [[CALLRET3:%.*]] = call i32 @foo2(ptr [[GEPVAL]], i32 [[N]], i32 [[M]])
+; CHECK-NEXT:    br label %[[RETURN:.*]]
+; CHECK:       [[RETURN]]:
+; CHECK-NEXT:    ret i32 [[CALLRET3]]
+;
+entry:
+  %gepval = getelementptr ptr, ptr %x, i32 4
+  %callret3 = call i32 @foo2(ptr %gepval, i32 %n, i32 %m)
+  br label %return
+
+return:                                           ; preds = %entry
+  ret i32 %callret3
+}
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion-same-arg-twice-pr1259.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-same-arg-twice-pr1259.ll
similarity index 100%
rename from llvm/test/Transforms/ArgumentPromotion/recursion-same-arg-twice-pr1259.ll
rename to llvm/test/Transforms/ArgumentPromotion/recursion/recursion-same-arg-twice-pr1259.ll

>From 78f9df5a2b6a2f81f8a144b46268e9b7cc6de152 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Mon, 8 Jul 2024 05:50:19 +0000
Subject: [PATCH 15/19] Added testcase for multiple elements

---
 .../aggregate-promote-recursive.ll            | 40 +++++++++++++++++++
 1 file changed, 40 insertions(+)
 create mode 100644 llvm/test/Transforms/ArgumentPromotion/aggregate-promote-recursive.ll

diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote-recursive.ll b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote-recursive.ll
new file mode 100644
index 0000000000000..011ebe4eee76e
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/aggregate-promote-recursive.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt < %s -passes=argpromotion -S | FileCheck %s
+
+%T = type { i32, i32, i32, i32 }
+ at G = constant %T { i32 0, i32 0, i32 17, i32 25 }
+
+define internal i32 @test(ptr %p) {
+; CHECK-LABEL: define {{[^@]+}}@test
+; CHECK-SAME: (i32 [[P_8_VAL:%.*]], i32 [[P_12_VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[V:%.*]] = add i32 [[P_12_VAL]], [[P_8_VAL]]
+; CHECK-NEXT:    [[RET:%.*]] = call i32 @test(i32 [[P_8_VAL]], i32 [[P_12_VAL]])
+; CHECK-NEXT:    [[ARET:%.*]] = add i32 [[V]], [[RET]]
+; CHECK-NEXT:    ret i32 [[ARET]]
+;
+entry:
+  %a.gep = getelementptr %T, ptr %p, i64 0, i32 3
+  %b.gep = getelementptr %T, ptr %p, i64 0, i32 2
+  %a = load i32, ptr %a.gep
+  %b = load i32, ptr %b.gep
+  %v = add i32 %a, %b
+  %ret = call i32 @test(ptr %p)
+  %aret = add i32 %v, %ret
+  ret i32 %aret
+}
+
+define i32 @caller() {
+; CHECK-LABEL: define {{[^@]+}}@caller() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr @G, i64 8
+; CHECK-NEXT:    [[G_VAL:%.*]] = load i32, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr @G, i64 12
+; CHECK-NEXT:    [[G_VAL1:%.*]] = load i32, ptr [[TMP1]], align 4
+; CHECK-NEXT:    [[V:%.*]] = call i32 @test(i32 [[G_VAL]], i32 [[G_VAL1]])
+; CHECK-NEXT:    ret i32 [[V]]
+;
+entry:
+  %v = call i32 @test(ptr @G)
+  ret i32 %v
+}

>From d0968a57a3da08121c13070028790e657f729a80 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 11 Jul 2024 08:38:41 +0000
Subject: [PATCH 16/19] Removed IsSelfRecursive from findArgParts

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 2af1114dd0f92..e93754f61e083 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -474,7 +474,6 @@ static bool allCallersPassValidPointerForArgument(
 /// parts it can be promoted into.
 static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
                          unsigned MaxElements, bool IsRecursive,
-                         bool isSelfRecursive,
                          SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {
   // Quick exit for unused arguments
   if (Arg->use_empty())
@@ -854,8 +853,7 @@ static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,
     // If we can promote the pointer to its value.
     SmallVector<OffsetAndArgPart, 4> ArgParts;
 
-    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, IsSelfRecursive,
-                     ArgParts)) {
+    if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {
       SmallVector<Type *, 4> Types;
       for (const auto &Pair : ArgParts)
         Types.push_back(Pair.second.Ty);

>From 213c13400d6b5cc1842aff01ee9cfcca6cb706a5 Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 11 Jul 2024 08:45:47 +0000
Subject: [PATCH 17/19] Use SmallPtrSetImpl in a helper function

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index e93754f61e083..c56f6a558d930 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -424,7 +424,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
 /// Return true if we can prove that all callees pass in a valid pointer for the
 /// specified function argument.
 static bool allCallersPassValidPointerForArgument(
-    Argument *Arg, SmallPtrSet<CallBase *, 4> &RecursiveCalls,
+    Argument *Arg, SmallPtrSetImpl<CallBase *, 4> &RecursiveCalls,
     Align NeededAlign, uint64_t NeededDerefBytes) {
   Function *Callee = Arg->getParent();
   const DataLayout &DL = Callee->getDataLayout();

>From 881c4cde5181794c3ebb1f884ccc85c0a3410dab Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 11 Jul 2024 09:20:53 +0000
Subject: [PATCH 18/19] minor fix to SmallPtrSetImpl

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index c56f6a558d930..9e3bc33b11dc5 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -424,7 +424,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
 /// Return true if we can prove that all callees pass in a valid pointer for the
 /// specified function argument.
 static bool allCallersPassValidPointerForArgument(
-    Argument *Arg, SmallPtrSetImpl<CallBase *, 4> &RecursiveCalls,
+    Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls,
     Align NeededAlign, uint64_t NeededDerefBytes) {
   Function *Callee = Arg->getParent();
   const DataLayout &DL = Callee->getDataLayout();

>From 5a053f91ea4ed2a59a4b361bf904c4ccc0ed901e Mon Sep 17 00:00:00 2001
From: Vedant Paranjape <vedant.paranjape at amd.com>
Date: Thu, 11 Jul 2024 13:06:23 +0000
Subject: [PATCH 19/19] Remove use of stripAndAccumulateConstantOffsets

---
 llvm/lib/Transforms/IPO/ArgumentPromotion.cpp      | 14 +-------------
 .../{ => recursion}/aggregate-promote-recursive.ll |  0
 2 files changed, 1 insertion(+), 13 deletions(-)
 rename llvm/test/Transforms/ArgumentPromotion/{ => recursion}/aggregate-promote-recursive.ll (100%)

diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 9e3bc33b11dc5..c5b909c533fb3 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -642,19 +642,7 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
     auto *CB = dyn_cast<CallBase>(V);
     Value *PtrArg = cast<Value>(U);
     if (CB && PtrArg && CB->getCalledFunction() == CB->getFunction()) {
-      Type *PtrTy = PtrArg->getType();
-      APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
-      PtrArg = PtrArg->stripAndAccumulateConstantOffsets(
-          DL, Offset,
-          /* AllowNonInbounds= */ true);
-      if (PtrArg != Arg)
-        return false;
-
-      if (Offset.getSignificantBits() >= 64)
-        return false;
-
-      int64_t Off = Offset.getSExtValue();
-      if (Off) {
+      if (PtrArg != Arg) {
         LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "
                           << "pointer offset is not equal to zero\n");
         return false;
diff --git a/llvm/test/Transforms/ArgumentPromotion/aggregate-promote-recursive.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/aggregate-promote-recursive.ll
similarity index 100%
rename from llvm/test/Transforms/ArgumentPromotion/aggregate-promote-recursive.ll
rename to llvm/test/Transforms/ArgumentPromotion/recursion/aggregate-promote-recursive.ll



More information about the llvm-commits mailing list