[llvm] [ArgPromotion] Infer parameter attributes on functions (PR #110245)

via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 27 04:19:55 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Hari Limaye (hazzlim)

<details>
<summary>Changes</summary>

Add nocapture and readonly parameter attributes to function Arguments if we have determined that the Argument's only uses in the function are load instructions and recursive calls with the same Argument position.

This improves precision of alias analysis using the actual arguments of calling functions in cases where these attributes have not already been inferred.

---

Patch is 20.55 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/110245.diff


5 Files Affected:

- (modified) llvm/lib/Transforms/IPO/ArgumentPromotion.cpp (+4) 
- (modified) llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll (+8-8) 
- (modified) llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll (+46-46) 
- (modified) llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll (+6-8) 
- (modified) llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll (+1-1) 


``````````diff
diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 90e8c39e5a90df..fb667bdda9a250 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -743,6 +743,10 @@ static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,
   // Okay, now we know that the argument is only used by load instructions, and
   // it is safe to unconditionally perform all of them.
 
+  // We can infer `nocapture readonly` as the argument is only used by loads.
+  Arg->getParent()->addParamAttr(Arg->getArgNo(), Attribute::NoCapture);
+  Arg->getParent()->addParamAttr(Arg->getArgNo(), Attribute::ReadOnly);
+
   // If we can determine that no call to the Function modifies the memory region
   // accessed through Arg, through alias analysis using actual arguments in the
   // callers, we know that it is guaranteed to be safe to promote the argument.
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
index a64b7346d83618..ba47316e56de16 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll
@@ -7,10 +7,10 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define internal fastcc void @no_promote_avx2(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@no_promote_avx2
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr nocapture readonly [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <4 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, ptr [[ARG1]], align 32
+; CHECK-NEXT:    store <4 x i64> [[TMP]], ptr [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -21,7 +21,7 @@ bb:
 
 define void @no_promote(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@no_promote
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
@@ -43,9 +43,9 @@ bb:
 
 define internal fastcc void @promote_avx2(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
-; CHECK-SAME: (ptr [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <4 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <4 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <4 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -56,12 +56,12 @@ bb:
 
 define void @promote(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@promote
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <4 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <4 x i64>, ptr [[TMP]], align 32
 ; CHECK-NEXT:    call fastcc void @promote_avx2(ptr [[TMP2]], <4 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3373c09d5f91aa..de868193f5f146 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -21,12 +21,12 @@ bb:
 
 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -45,9 +45,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -58,12 +58,12 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -82,9 +82,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -95,12 +95,12 @@ bb:
 
 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -119,9 +119,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -132,12 +132,12 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
 ; This should not promote
 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr nocapture readonly [[ARG1:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -170,7 +170,7 @@ bb:
 
 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
 ; This should not promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr nocapture readonly [[ARG1:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -207,7 +207,7 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -230,9 +230,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -243,12 +243,12 @@ bb:
 
 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -267,9 +267,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -280,12 +280,12 @@ bb:
 
 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -304,8 +304,8 @@ bb:
 ; If the arguments are scalar, its ok to promote.
 define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, ptr %X
@@ -316,11 +316,11 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
 
 define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, ptr [[A]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 1, ptr [[A]], align 4
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -333,8 +333,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
 ; If the arguments are scalar, its ok to promote.
 define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, ptr %X
@@ -345,11 +345,11 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
 
 define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, ptr [[A]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 1, ptr [[A]], align 4
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
diff --git a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll
index 87a14533cfda26..58b7644af9950c 100644
--- a/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/propagate-remove-dead-args.ll
@@ -18,14 +18,11 @@ entry:
 
 define internal void @parent(ptr %this, ptr %p1, ptr %p2) {
 ; CHECK-LABEL: define internal void @parent
-; CHECK-SAME: (ptr [[P1:%.*]], ptr [[P2:%.*]]) {
+; CHECK-SAME: (ptr [[P1:%.*]], half [[P2_0_VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[P2_VAL2:%.*]] = load half, ptr [[P2]], align 2
-; CHECK-NEXT:    call void @child(ptr [[P1]], half [[P2_VAL2]])
-; CHECK-NEXT:    [[P2_VAL1:%.*]] = load half, ptr [[P2]], align 2
-; CHECK-NEXT:    call void @child(ptr [[P1]], half [[P2_VAL1]])
-; CHECK-NEXT:    [[P2_VAL:%.*]] = load half, ptr [[P2]], align 2
-; CHECK-NEXT:    call void @child(ptr [[P1]], half [[P2_VAL]])
+; CHECK-NEXT:    call void @child(ptr [[P1]], half [[P2_0_VAL]])
+; CHECK-NEXT:    call void @child(ptr [[P1]], half [[P2_0_VAL]])
+; CHECK-NEXT:    call void @child(ptr [[P1]], half [[P2_0_VAL]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -46,7 +43,8 @@ define  void @grandparent() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[XPTR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[YPTR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @parent(ptr [[XPTR]], ptr [[YPTR]])
+; CHECK-NEXT:    [[YPTR_VAL:%.*]] = load half, ptr [[YPTR]], align 2
+; CHECK-NEXT:    call void @parent(ptr [[XPTR]], half [[YPTR_VAL]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
diff --git a/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll b/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll
index 0ec4137aadeb4c..3dee8b032ee3a4 100644
--- a/llvm/test/Transforms/ArgumentPromotion/recursion/recursion-mixed-calls.ll
+++ b/llvm/test/Transforms/Argumen...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/110245


More information about the llvm-commits mailing list