[llvm] [ArgPromotion] Promote allocas in calling functions (PR #110248)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 04:44:34 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Hari Limaye (hazzlim)

<details>
<summary>Changes</summary>

Promote allocas in calling functions, where argument promotion exposes the opportunity.

---

Patch is 22.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110248.diff


6 Files Affected:

- (modified) llvm/lib/Transforms/IPO/ArgumentPromotion.cpp (+17-2) 
- (modified) llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll (+1-4) 
- (modified) llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll (+42-48) 
- (modified) llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll (+1-4) 
- (modified) llvm/test/Transforms/ArgumentPromotion/control-flow2.ll (+1-4) 
- (added) llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll (+110) 


``````````diff
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 90e8c39e5a90df..9ca1608bdfbad6 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -227,6 +227,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
     assert(CB.getCalledFunction() == F);
     const AttributeList &CallPAL = CB.getAttributes();
     IRBuilder<NoFolder> IRB(&CB);
+    SmallPtrSet<AllocaInst *, 4> Allocas;
 
     // Loop over the operands, inserting GEP and loads in the caller as
     // appropriate.
@@ -264,6 +265,9 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
           Args.push_back(LI);
           ArgAttrVec.push_back(AttributeSet());
         }
+
+        if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripPointerCasts()))
+          Allocas.insert(Alloca);
       } else {
         assert(ArgsToPromote.count(&*I) && I->use_empty());
         DeadArgs.emplace_back(AI->get());
@@ -305,9 +309,20 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
       NewCS->takeName(&CB);
     }
 
-    // Finally, remove the old call from the program, reducing the use-count of
-    // F.
+    // Remove the old call from the program, reducing the use-count of F.
     CB.eraseFromParent();
+
+    // See if there are any allocas that can now be promoted in the caller.
+    Allocas.remove_if([](auto *AI) { return !isAllocaPromotable(AI); });
+    if (!Allocas.empty()) {
+      Function *Caller = (*Allocas.begin())->getFunction();
+      auto &DT = FAM.getResult<DominatorTreeAnalysis>(*Caller);
+      auto &AC = FAM.getResult<AssumptionAnalysis>(*Caller);
+      SmallVector<AllocaInst *, 4> AllocasToPromote;
+      append_range(AllocasToPromote, Allocas);
+
+      PromoteMemToReg(AllocasToPromote, DT, &AC);
+    }
   }
 
   RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs);
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index daa4e1fb757d21..7d78fc8d900ee4 100644
--- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -26,10 +26,7 @@ define i32 @f(i32 %x) {
 ; CHECK-LABEL: define {{[^@]+}}@f
 ; CHECK-SAME: (i32 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
-; CHECK-NEXT:    [[X_ADDR_VAL:%.*]] = load i32, ptr [[X_ADDR]], align 4
-; CHECK-NEXT:    [[TEMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
+; CHECK-NEXT:    [[TEMP1:%.*]] = call i32 @deref(i32 [[X]])
 ; CHECK-NEXT:    ret i32 [[TEMP1]]
 ;
 entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3373c09d5f91aa..00c07cc50a7ab9 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -21,12 +21,12 @@ bb:
 
 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -45,9 +45,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -58,12 +58,12 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -82,9 +82,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -95,12 +95,12 @@ bb:
 
 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -119,9 +119,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -132,12 +132,12 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
 ; This should not promote
 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -170,7 +170,7 @@ bb:
 
 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
 ; This should not promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -207,7 +207,7 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -230,9 +230,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -243,12 +243,12 @@ bb:
 
 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -267,9 +267,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -280,12 +280,12 @@ bb:
 
 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -304,8 +304,8 @@ bb:
 ; If the arguments are scalar, its ok to promote.
 define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, ptr %X
@@ -316,12 +316,9 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
 
 define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, ptr [[A]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]]
-; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 1, i32 [[B_VAL]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = alloca i32
@@ -333,8 +330,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
 ; If the arguments are scalar, its ok to promote.
 define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, ptr %X
@@ -345,12 +342,9 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
 
 define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, ptr [[A]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]]
-; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 1, i32 [[B_VAL]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll
index ca757a165fa4be..4d11ed125930cc 100644
--- a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll
@@ -207,10 +207,7 @@ define i32 @caller_safe_args_2(i64 %n, ptr %p) {
 ; CHECK-LABEL: define {{[^@]+}}@caller_safe_args_2
 ; CHECK-SAME: (i64 [[N:%.*]], ptr [[P:%.*]]) {
 ; CHECK-NEXT:    call void @memset(ptr [[P]], i64 0, i64 [[N]])
-; CHECK-NEXT:    [[CALLER_C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 5, ptr [[CALLER_C]], align 4
-; CHECK-NEXT:    [[CALLER_C_VAL:%.*]] = load i32, ptr [[CALLER_C]], align 4
-; CHECK-NEXT:    [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], i32 [[CALLER_C_VAL]])
+; CHECK-NEXT:    [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], i32 5)
 ; CHECK-NEXT:    ret i32 [[RES]]
 ;
   call void @memset(ptr %p, i64 0, i64 %n)
diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
index 8df89033c0d8da..e62ecd1eba04bf 100644
--- a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -24,10 +24,7 @@ F:              ; preds = %0
 
 define i32 @foo() {
 ; CHECK-LABEL: define {{[^@]+}}@foo() {
-; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 17, ptr [[A]], align 4
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]])
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 17)
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   %A = alloca i32         ; <ptr> [#uses=2]
diff --git a/llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll b/llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll
new file mode 100644
index 00000000000000..fdb21f2267da63
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+
+; Tests where we do not perform promotion of alloca in caller:
+
+define internal i32 @callee_nopromote_1(ptr nocapture readonly %p) {
+; CHECK-LABEL: define internal i32 @callee_nopromote_1(
+; CHECK-SAME: i32 [[P_0_VAL:%.*]]) {
+; CHECK-NEXT:    [[SUM:%.*]] = add i32 [[P_0_VAL]], [[P_0_VAL]]
+; CHECK-NEXT:    ret i32 [[SUM]]
+;
+  %p.val = load i32, ptr %p
+  %sum = add i32 %p.val, %p.val
+  ret i32 %sum
+}
+
+define i32 @caller_nopromote_1() {
+; CHECK-LABEL: define i32 @caller_nopromote_1() {
+; CHECK-NEXT:    [[P:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/110248


More information about the llvm-commits mailing list