[llvm] [X86][ArgPromotion] Do not assume large vectors or aggregates ABI compatible (PR #84105)
Phoebe Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 18:40:32 PST 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/84105
None
>From 0eac945a9824f4b91c8030fb0f66c6c33b05d966 Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Wed, 6 Mar 2024 10:28:19 +0800
Subject: [PATCH 1/2] [X86] Pre-update test cases
---
.../X86/min-legal-vector-width.ll | 92 ++++++-------
.../X86/min-legal-vector-width.ll | 128 +++++++++---------
2 files changed, 110 insertions(+), 110 deletions(-)
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3373c09d5f91aa..3fabfebdb6b601 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -21,12 +21,12 @@ bb:
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg) #0 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -45,9 +45,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -58,12 +58,12 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -82,9 +82,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -95,12 +95,12 @@ bb:
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg) #0 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -119,9 +119,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -132,12 +132,12 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -170,7 +170,7 @@ bb:
define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg) #2 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -207,7 +207,7 @@ bb:
define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg) #1 {
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -230,9 +230,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -243,12 +243,12 @@ bb:
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -267,9 +267,9 @@ bb:
; This should promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_VAL]], ptr [[ARG]]
+; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -280,12 +280,12 @@ bb:
define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]])
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
+; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -304,8 +304,8 @@ bb:
; If the arguments are scalar, its ok to promote.
define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %X, ptr %Y) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = load i32, ptr %X
@@ -316,11 +316,11 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %B) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT: [[A:%.*]] = alloca i32
-; CHECK-NEXT: store i32 1, ptr [[A]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 1, ptr [[A]], align 4
+; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
; CHECK-NEXT: ret i32 [[C]]
;
@@ -333,8 +333,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
; If the arguments are scalar, its ok to promote.
define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %X, ptr %Y) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (i32 [[X_VAL:%.*]], i32 [[Y_VAL:%.*]])
-; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL]], [[Y_VAL]]
+; CHECK-SAME: (i32 [[X_0_VAL:%.*]], i32 [[Y_0_VAL:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL]], [[Y_0_VAL]]
; CHECK-NEXT: ret i32 [[C]]
;
%A = load i32, ptr %X
@@ -345,11 +345,11 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %B) #2 {
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr [[B:%.*]])
-; CHECK-NEXT: [[A:%.*]] = alloca i32
-; CHECK-NEXT: store i32 1, ptr [[A]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
+; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 1, ptr [[A]], align 4
+; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
; CHECK-NEXT: ret i32 [[C]]
;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 321714849c8555..f0bcf68b6444b6 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -32,12 +32,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -45,12 +45,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -89,12 +89,12 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -102,12 +102,12 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -146,12 +146,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR0]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -159,12 +159,12 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR0]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -203,12 +203,12 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -216,12 +216,12 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -258,11 +258,11 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -270,11 +270,11 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -311,11 +311,11 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg)
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR1]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -323,11 +323,11 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg)
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR1]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -366,12 +366,12 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -379,12 +379,12 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
@@ -423,12 +423,12 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; TUNIT-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; TUNIT-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; TUNIT-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
@@ -436,12 +436,12 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: [[TRUETMP2:%.*]] = alloca <8 x i64>, align 32
+; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TRUETMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
-; CGSCC-NEXT: [[TRUETMP4:%.*]] = load <8 x i64>, ptr [[TRUETMP2]], align 64
-; CGSCC-NEXT: store <8 x i64> [[TRUETMP4]], ptr [[ARG]], align 2
+; CGSCC-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
;
bb:
>From d4658502e040e3596501da559225f31ab47a4fba Mon Sep 17 00:00:00 2001
From: Phoebe Wang <phoebe.wang at intel.com>
Date: Wed, 6 Mar 2024 10:33:44 +0800
Subject: [PATCH 2/2] [X86][ArgPromotion] Do not assume large vectors or
aggregates ABI compatible
---
.../lib/Target/X86/X86TargetTransformInfo.cpp | 14 +--
.../X86/min-legal-vector-width.ll | 22 ++--
.../X86/min-legal-vector-width.ll | 102 +++++++++---------
3 files changed, 67 insertions(+), 71 deletions(-)
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index d336ab9d309c4e..572e38f25ea9e3 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -6133,15 +6133,17 @@ bool X86TTIImpl::areTypesABICompatible(const Function *Caller,
// incompatible.
const TargetMachine &TM = getTLI()->getTargetMachine();
- if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() ==
+ // AVX512 supports the largest vector length, so no ABI compatible issue.
+ if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() &&
TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs())
return true;
- // Consider the arguments compatible if they aren't vectors or aggregates.
- // FIXME: Look at the size of vectors.
- // FIXME: Look at the element types of aggregates to see if there are vectors.
- return llvm::none_of(Types,
- [](Type *T) { return T->isVectorTy() || T->isAggregateType(); });
+ // Consider the arguments compatible iff they aren't large vectors or
+ // aggregates.
+ return llvm::none_of(Types, [this](Type *T) {
+ return (isa<FixedVectorType>(T) || T->isAggregateType()) &&
+ T->getPrimitiveSizeInBits() > getLoadStoreVecRegBitWidth(0);
+ });
}
X86TTIImpl::TTI::MemCmpExpansionOptions
diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
index 3fabfebdb6b601..a345204f402bed 100644
--- a/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -227,12 +227,13 @@ bb:
ret void
}
-; This should promote
+; This should not promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -243,13 +244,12 @@ bb:
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], ptr [[TMP]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CHECK-NEXT: ret void
@@ -264,12 +264,13 @@ bb:
ret void
}
-; This should promote
+; This should not promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL]], ptr [[ARG]], align 64
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
bb:
@@ -285,8 +286,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
-; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
+; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], ptr [[TMP]])
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index f0bcf68b6444b6..631a35b91fbf03 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -33,9 +33,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -46,9 +46,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -90,9 +90,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -103,9 +103,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -147,9 +147,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -160,9 +160,9 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg)
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -204,9 +204,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -217,9 +217,9 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg)
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -259,8 +259,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -271,8 +271,8 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -312,8 +312,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -324,8 +324,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg)
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR6]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -340,16 +340,14 @@ bb:
ret void
}
-; This should promote
+; This should not promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
;
; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR3:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
@@ -363,26 +361,24 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
;
; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; TUNIT-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR3]] {
+; TUNIT-SAME: (ptr nocapture nofree writeonly [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
;
; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; CGSCC-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR3]] {
+; CGSCC-SAME: (ptr nocapture nofree noundef nonnull writeonly align 2 dereferenceable(64) [[ARG:%.*]]) #[[ATTR4:[0-9]+]] {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -397,16 +393,14 @@ bb:
ret void
}
-; This should promote
+; This should not promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
;
; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] {
+; CHECK-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
; CHECK-NEXT: ret void
;
@@ -424,9 +418,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; TUNIT-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; TUNIT-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; TUNIT-NEXT: ret void
@@ -437,9 +430,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; CGSCC-NEXT: bb:
; CGSCC-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; CGSCC-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
-; CGSCC-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
-; CGSCC-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
+; CGSCC-NEXT: call void @llvm.memset.p0.i64(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]]
+; CGSCC-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], ptr noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[TMP]]) #[[ATTR7]]
; CGSCC-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; CGSCC-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
; CGSCC-NEXT: ret void
@@ -468,15 +460,17 @@ attributes #5 = { argmemonly nounwind }
; TUNIT: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
; TUNIT: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
; TUNIT: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
-; TUNIT: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
-; TUNIT: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
-; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }
+; TUNIT: attributes #[[ATTR4]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" }
+; TUNIT: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; TUNIT: attributes #[[ATTR6]] = { nofree willreturn memory(write) }
+; TUNIT: attributes #[[ATTR7]] = { nofree nosync nounwind willreturn }
;.
; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
-; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
-; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
-; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn }
+; CGSCC: attributes #[[ATTR4]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx2" }
+; CGSCC: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CGSCC: attributes #[[ATTR6]] = { nofree willreturn memory(write) }
+; CGSCC: attributes #[[ATTR7]] = { nofree nounwind willreturn }
;.
More information about the llvm-commits
mailing list