[llvm] [X86][ArgPromotion] Do not assume large vectors or aggregates ABI compatible (PR #84105)

via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 5 18:41:03 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Phoebe Wang (phoebewang)

<details>
<summary>Changes</summary>



---

Patch is 49.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84105.diff


3 Files Affected:

- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+8-6) 
- (modified) llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll (+50-50) 
- (modified) llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll (+96-102) 


``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d336ab9d309c4e..572e38f25ea9e3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6133,15 +6133,17 @@ bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
   // incompatible.
   const TargetMachine &TM = getTLI()->getTargetMachine();
 
-  if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
+  // AVX512 supports the largest vector length, so no ABI compatible issue.
+  if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() &&
       TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs())
     return true;
 
-  // Consider the arguments compatible if they aren't vectors or aggregates.
-  // FIXME: Look at the size of vectors.
-  // FIXME: Look at the element types of aggregates to see if there are vectors.
-  return llvm::none_of(Types,
-      [](Type *T) { return T->isVectorTy() || T->isAggregateType(); });
+  // Consider the arguments compatible iff they aren't large vectors or
+  // aggregates.
+  return llvm::none_of(Types, [this](Type *T) {
+    return (isa<FixedVectorType>(T) || T->isAggregateType()) &&
+           T->getPrimitiveSizeInBits() > getLoadStoreVecRegBitWidth(0);
+  });
 }
 
 X86TTIImpl::TTI::MemCmpExpansionOptions
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3373c09d5f91aa..a345204f402bed 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -21,12 +21,12 @@ bb:
 
 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -45,9 +45,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -58,12 +58,12 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -82,9 +82,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -95,12 +95,12 @@ bb:
 
 define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -119,9 +119,9 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -132,12 +132,12 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
 ; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
 ; This should not promote
 define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -170,7 +170,7 @@ bb:
 
 define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
 ; This should not promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -207,7 +207,7 @@ bb:
 
 define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -227,12 +227,13 @@ bb:
   ret void
 }
 
-; This should promote
+; This should not promote
 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -243,13 +244,12 @@ bb:
 
 define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
-; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], ptr [[TMP]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -264,12 +264,13 @@ bb:
   ret void
 }
 
-; This should promote
+; This should not promote
 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT:    store <8 x i64> [[TMP]], ptr [[ARG]], align 64
 ; CHECK-NEXT:    ret void
 ;
 bb:
@@ -280,13 +281,12 @@ bb:
 
 define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
-; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], ptr [[TMP]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -304,8 +304,8 @@ bb:
 ; If the arguments are scalar, its ok to promote.
 define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, ptr %X
@@ -316,11 +316,11 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
 
 define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, ptr [[A]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 1, ptr [[A]], align 4
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -333,8 +333,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
 ; If the arguments are scalar, its ok to promote.
 define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, ptr %X
@@ -345,11 +345,11 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
 
 define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
 ; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = alloca i32
-; CHECK-NEXT:    store i32 1, ptr [[A]]
-; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 1, ptr [[A]], align 4
+; CHECK-NEXT:    [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 321714849c8555..631a35b91fbf03 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -32,12 +32,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
 ; TUNIT-NEXT:  bb:
 ; TUNIT-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT:    [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT:    call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT:    call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
-; TUNIT-NEXT:    [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT:    store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
+; TUNIT-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT:    store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -45,12 +45,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
 ; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] {
 ; CGSCC-NEXT:  bb:
 ; CGSCC-NEXT:    [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT:    [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT:    call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; CGSCC-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT:    call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
-; CGSCC-NEXT:    [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT:    store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
+; CGSCC-NEXT:    [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/84105


More information about the llvm-commits mailing list