[llvm] [ArgPromotion] Promote allocas in calling functions (PR #110248)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 27 04:44:34 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Hari Limaye (hazzlim)
<details>
<summary>Changes</summary>
Promote allocas in calling functions, where argument promotion exposes the opportunity.
---
Patch is 22.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110248.diff
6 Files Affected:
- (modified) llvm/lib/Transforms/IPO/ArgumentPromotion.cpp (+17-2)
- (modified) llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll (+1-4)
- (modified) llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll (+42-48)
- (modified) llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll (+1-4)
- (modified) llvm/test/Transforms/ArgumentPromotion/control-flow2.ll (+1-4)
- (added) llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll (+110)
``````````diff
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 90e8c39e5a90df..9ca1608bdfbad6 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -227,6 +227,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
assert(CB.getCalledFunction() == F);
const AttributeList &CallPAL = CB.getAttributes();
IRBuilder<NoFolder> IRB(&CB);
+ SmallPtrSet<AllocaInst *, 4> Allocas;
// Loop over the operands, inserting GEP and loads in the caller as
// appropriate.
@@ -264,6 +265,9 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
Args.push_back(LI);
ArgAttrVec.push_back(AttributeSet());
}
+
+ if (AllocaInst *Alloca = dyn_cast<AllocaInst>(V->stripPointerCasts()))
+ Allocas.insert(Alloca);
} else {
assert(ArgsToPromote.count(&*I) && I->use_empty());
DeadArgs.emplace_back(AI->get());
@@ -305,9 +309,20 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
NewCS->takeName(&CB);
}
- // Finally, remove the old call from the program, reducing the use-count of
- // F.
+ // Remove the old call from the program, reducing the use-count of F.
CB.eraseFromParent();
+
+ // See if there are any allocas that can now be promoted in the caller.
+ Allocas.remove_if([](auto *AI) { return !isAllocaPromotable(AI); });
+ if (!Allocas.empty()) {
+ Function *Caller = (*Allocas.begin())->getFunction();
+ auto &DT = FAM.getResult<DominatorTreeAnalysis>(*Caller);
+ auto &AC = FAM.getResult<AssumptionAnalysis>(*Caller);
+ SmallVector<AllocaInst *, 4> AllocasToPromote;
+ append_range(AllocasToPromote, Allocas);
+
+ PromoteMemToReg(AllocasToPromote, DT, &AC);
+ }
}
RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs);
diff --git a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index daa4e1fb757d21..7d78fc8d900ee4 100644
--- a/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -26,10 +26,7 @@ define i32 @f(i32 %x) {
; CHECK-LABEL: define {{[^@]+}}@f
; CHECK-SAME: (i32 [[X:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
-; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, ptr [[X_ADDR]], align 4
-; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
+; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref(i32 [[X]])
; CHECK-NEXT: ret i32 [[TEMP1]]
;
entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3373c09d5f91aa..00c07cc50a7ab9 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -21,12 +21,12 @@ bb:
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -45,9 +45,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -58,12 +58,12 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -82,9 +82,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -95,12 +95,12 @@ bb:
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -119,9 +119,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -132,12 +132,12 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -170,7 +170,7 @@ bb:
define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -207,7 +207,7 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -230,9 +230,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -243,12 +243,12 @@ bb:
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -267,9 +267,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -280,12 +280,12 @@ bb:
define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -304,8 +304,8 @@ bb:
; If the arguments are scalar, its ok to promote.
define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = load i32, ptr %X
@@ -316,12 +316,9 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT: [[A:%.*]] = alloca i32
-; CHECK-NEXT: store i32 1, ptr [[A]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
-; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 1, i32 [[B_VAL]])
; CHECK-NEXT: ret i32 [[C]]
;
%A = alloca i32
@@ -333,8 +330,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
; If the arguments are scalar, its ok to promote.
define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = load i32, ptr %X
@@ -345,12 +342,9 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT: [[A:%.*]] = alloca i32
-; CHECK-NEXT: store i32 1, ptr [[A]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
-; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 1, i32 [[B_VAL]])
; CHECK-NEXT: ret i32 [[C]]
;
%A = alloca i32
diff --git a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll
index ca757a165fa4be..4d11ed125930cc 100644
--- a/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/actual-arguments.ll
@@ -207,10 +207,7 @@ define i32 @caller_safe_args_2(i64 %n, ptr %p) {
; CHECK-LABEL: define {{[^@]+}}@caller_safe_args_2
; CHECK-SAME: (i64 [[N:%.*]], ptr [[P:%.*]]) {
; CHECK-NEXT: call void @memset(ptr [[P]], i64 0, i64 [[N]])
-; CHECK-NEXT: [[CALLER_C:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 5, ptr [[CALLER_C]], align 4
-; CHECK-NEXT: [[CALLER_C_VAL:%.*]] = load i32, ptr [[CALLER_C]], align 4
-; CHECK-NEXT: [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], i32 [[CALLER_C_VAL]])
+; CHECK-NEXT: [[RES:%.*]] = call i32 @test_can_promote_2(ptr [[P]], i32 5)
; CHECK-NEXT: ret i32 [[RES]]
;
call void @memset(ptr %p, i64 0, i64 %n)
diff --git a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
index 8df89033c0d8da..e62ecd1eba04bf 100644
--- a/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/control-flow2.ll
@@ -24,10 +24,7 @@ F: ; preds = %0
define i32 @foo() {
; CHECK-LABEL: define {{[^@]+}}@foo() {
-; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: store i32 17, ptr [[A]], align 4
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
-; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 [[A_VAL]])
+; CHECK-NEXT: [[X:%.*]] = call i32 @callee(i1 false, i32 17)
; CHECK-NEXT: ret i32 [[X]]
;
%A = alloca i32 ; <ptr> [#uses=2]
diff --git a/llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll b/llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll
new file mode 100644
index 00000000000000..fdb21f2267da63
--- /dev/null
+++ b/llvm/test/Transforms/ArgumentPromotion/promote-allocas-in-callers.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=argpromotion < %s | FileCheck %s
+
+; Tests where we do not perform promotion of alloca in caller:
+
+define internal i32 @callee_nopromote_1(ptr nocapture readonly %p) {
+; CHECK-LABEL: define internal i32 @callee_nopromote_1(
+; CHECK-SAME: i32 [[P_0_VAL:%.*]]) {
+; CHECK-NEXT: [[SUM:%.*]] = add i32 [[P_0_VAL]], [[P_0_VAL]]
+; CHECK-NEXT: ret i32 [[SUM]]
+;
+ %p.val = load i32, ptr %p
+ %sum = add i32 %p.val, %p.val
+ ret i32 %sum
+}
+
+define i32 @caller_nopromote_1() {
+; CHECK-LABEL: define i32 @caller_nopromote_1() {
+; CHECK-NEXT: [[P:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/110248
More information about the llvm-commits
mailing list