[llvm] [X86][ArgPromotion] Do not assume large vectors or aggregates ABI compatible (PR #84105)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 18:41:03 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
---
Patch is 49.78 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84105.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86TargetTransformInfo.cpp (+8-6)
- (modified) llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll (+50-50)
- (modified) llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll (+96-102)
``````````diff
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d336ab9d309c4e..572e38f25ea9e3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6133,15 +6133,17 @@ bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
// incompatible.
const TargetMachine &TM = getTLI()->getTargetMachine();
- if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
+ // AVX512 supports the largest vector length, so no ABI compatible issue.
+ if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() &&
TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs())
return true;
- // Consider the arguments compatible if they aren't vectors or aggregates.
- // FIXME: Look at the size of vectors.
- // FIXME: Look at the element types of aggregates to see if there are vectors.
- return llvm::none_of(Types,
- [](Type *T) { return T->isVectorTy() || T->isAggregateType(); });
+ // Consider the arguments compatible iff they aren't large vectors or
+ // aggregates.
+ return llvm::none_of(Types, [this](Type *T) {
+ return (isa<FixedVectorType>(T) || T->isAggregateType()) &&
+ T->getPrimitiveSizeInBits() > getLoadStoreVecRegBitWidth(0);
+ });
}
X86TTIImpl::TTI::MemCmpExpansionOptions
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3373c09d5f91aa..a345204f402bed 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -21,12 +21,12 @@ bb:
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -45,9 +45,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -58,12 +58,12 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -82,9 +82,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -95,12 +95,12 @@ bb:
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -119,9 +119,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -132,12 +132,12 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -170,7 +170,7 @@ bb:
define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -207,7 +207,7 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -227,12 +227,13 @@ bb:
ret void
}
-; This should promote
+; This should not promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -243,13 +244,12 @@ bb:
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
-; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], ptr [[TMP]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CHECK-NEXT: ret void
@@ -264,12 +264,13 @@ bb:
ret void
}
-; This should promote
+; This should not promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -280,13 +281,12 @@ bb:
define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
-; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], ptr [[TMP]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CHECK-NEXT: ret void
@@ -304,8 +304,8 @@ bb:
; If the arguments are scalar, its ok to promote.
define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = load i32, ptr %X
@@ -316,11 +316,11 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT: [[A:%.*]] = alloca i32
-; CHECK-NEXT: store i32 1, ptr [[A]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 1, ptr [[A]], align 4
+; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
; CHECK-NEXT: ret i32 [[C]]
;
@@ -333,8 +333,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
; If the arguments are scalar, its ok to promote.
define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = load i32, ptr %X
@@ -345,11 +345,11 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT: [[A:%.*]] = alloca i32
-; CHECK-NEXT: store i32 1, ptr [[A]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 1, ptr [[A]], align 4
+; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
; CHECK-NEXT: ret i32 [[C]]
;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 321714849c8555..631a35b91fbf03 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -32,12 +32,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -45,12 +45,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/84105
More information about the llvm-commits
mailing list