[clang] c9de04e - [ARM] Fixing ABI mismatch for packed structs passed as function arguments

Jirui Wu via cfe-commits cfe-commits at lists.llvm.org
Wed Jul 26 09:33:12 PDT 2023


Author: Jirui Wu
Date: 2023-07-26T17:33:06+01:00
New Revision: c9de04ea64ed4e30d5bad1e6cdc2b3eeccdcd701

URL: https://github.com/llvm/llvm-project/commit/c9de04ea64ed4e30d5bad1e6cdc2b3eeccdcd701
DIFF: https://github.com/llvm/llvm-project/commit/c9de04ea64ed4e30d5bad1e6cdc2b3eeccdcd701.diff

LOG: [ARM] Fixing ABI mismatch for packed structs passed as function arguments

Previously when a packed struct, containing vector data types such as
uint16x8_t, is passed as a function argument, the alignment of the
struct used by the function caller and the alignment used by the callee
to load the argument from stack does not match.

This patch implements section 6.8.2, stage C.4 of the Procedure Call
Standard for the Arm 64-bit Architecture (AAPCS64): "If the argument is
an HFA, an HVA, a Quad-precision Floating-point or short vector type
then the NSAA is rounded up to the next multiple of 8 if its natural
alignment is ≤ 8 or the next multiple of 16 if its natural alignment
is ≥ 16." This ensures the alignments of the packed structs used as
function arguments are the same as described in the AAPCS64 for both
the caller and callee.

Reference:
AAPCS64 (https://github.com/ARM-software/abi-aa/blob/latest-release/aapcs64/aapcs64.rst)

Reviewed By: olista01, rjmccall, tmatheson

Differential Revision: https://reviews.llvm.org/D146242

Added: 
    

Modified: 
    clang/lib/CodeGen/Targets/AArch64.cpp
    clang/test/CodeGen/aarch64-ABI-align-packed-assembly.c
    clang/test/CodeGen/aarch64-ABI-align-packed.c
    clang/test/CodeGen/aarch64-args-hfa.c
    clang/test/CodeGen/aarch64-neon-ldst-one.c
    clang/test/CodeGen/aarch64-neon-tbl.c
    clang/test/CodeGen/aarch64-poly64.c
    clang/test/CodeGen/arm-aapcs-vfp.c
    clang/test/CodeGen/arm64-aapcs-arguments.c
    clang/test/CodeGen/complex-math.c
    clang/test/CodeGenCXX/homogeneous-aggregates.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index 561110ff8c0d8d..ee0968bce1aafb 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -323,12 +323,11 @@ AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
       return ABIArgInfo::getDirect(
           llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
 
-    // For alignment adjusted HFAs, cap the argument alignment to 16, leave it
-    // default otherwise.
+    // For HFAs/HVAs, cap the argument alignment to 16, otherwise
+    // set it to 8 according to the AAPCS64 document.
     unsigned Align =
         getContext().getTypeUnadjustedAlignInChars(Ty).getQuantity();
-    unsigned BaseAlign = getContext().getTypeAlignInChars(Base).getQuantity();
-    Align = (Align > BaseAlign && Align >= 16) ? 16 : 0;
+    Align = (Align >= 16) ? 16 : 8;
     return ABIArgInfo::getDirect(
         llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members), 0,
         nullptr, true, Align);

diff  --git a/clang/test/CodeGen/aarch64-ABI-align-packed-assembly.c b/clang/test/CodeGen/aarch64-ABI-align-packed-assembly.c
index 197189a691bd0c..f461eadf652e17 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed-assembly.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed-assembly.c
@@ -86,7 +86,7 @@ struct packed_struct gs_packed_struct;
 __attribute__((noinline)) void named_arg_packed_struct(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
                                  double d8, struct packed_struct s_packed_struct) {
-// CHECK: ldr q0, [sp, #16]
+// CHECK: ldur q0, [sp, #8]
     gd = d8;
     gs_packed_struct = s_packed_struct;
 }
@@ -105,10 +105,10 @@ void test_packed_struct() {
     init(1, &s_packed_struct);
 
 // CHECK: mov x8, #4611686018427387904        // =0x4000000000000000
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
 // CHECK: str x8, [sp]
     named_arg_packed_struct(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_packed_struct);
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
     variadic_packed_struct(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_packed_struct);
 }
 
@@ -117,7 +117,7 @@ struct packed_member gs_packed_member;
 __attribute__((noinline)) void named_arg_packed_member(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
                                  double d8, struct packed_member s_packed_member) {
-// CHECK: ldr q0, [sp, #16]
+// CHECK: ldur q0, [sp, #8]
     gd = d8;
     gs_packed_member = s_packed_member;
 }
@@ -136,10 +136,10 @@ void test_packed_member() {
     init(1, &s_packed_member);
 
 // CHECK: mov x8, #4611686018427387904        // =0x4000000000000000
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
 // CHECK: str x8, [sp]
     named_arg_packed_member(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_packed_member);
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
     variadic_packed_member(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_packed_member);
 }
 
@@ -210,7 +210,7 @@ struct pragma_packed_struct_8 gs_pragma_packed_struct_8;
 __attribute__((noinline)) void named_arg_pragma_packed_struct_8(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
                                  double d8, struct pragma_packed_struct_8 s_pragma_packed_struct_8) {
-// CHECK: ldr q0, [sp, #16]
+// CHECK: ldur q0, [sp, #8]
     gd = d8;
     gs_pragma_packed_struct_8 = s_pragma_packed_struct_8;
 }
@@ -229,10 +229,10 @@ void test_pragma_packed_struct_8() {
     init(1, &s_pragma_packed_struct_8);
 
 // CHECK: mov x8, #4611686018427387904        // =0x4000000000000000
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
 // CHECK: str x8, [sp]
     named_arg_pragma_packed_struct_8(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_8);
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
     variadic_pragma_packed_struct_8(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_8);
 }
 
@@ -241,7 +241,7 @@ struct pragma_packed_struct_4 gs_pragma_packed_struct_4;
 __attribute__((noinline)) void named_arg_pragma_packed_struct_4(double d0, double d1, double d2, double d3,
                                  double d4, double d5, double d6, double d7,
                                  double d8, struct pragma_packed_struct_4 s_pragma_packed_struct_4) {
-// CHECK: ldr q0, [sp, #16]
+// CHECK: ldur q0, [sp, #8]
     gd = d8;
     gs_pragma_packed_struct_4 = s_pragma_packed_struct_4;
 }
@@ -260,9 +260,9 @@ void test_pragma_packed_struct_4() {
     init(1, &s_pragma_packed_struct_4);
 
 // CHECK: mov x8, #4611686018427387904        // =0x4000000000000000
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
 // CHECK: str x8, [sp]
     named_arg_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4);
-// CHECK: str q0, [sp, #16]
+// CHECK: stur q0, [sp, #8]
     variadic_pragma_packed_struct_4(1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, s_pragma_packed_struct_4);
 }

diff  --git a/clang/test/CodeGen/aarch64-ABI-align-packed.c b/clang/test/CodeGen/aarch64-ABI-align-packed.c
index a1c7d8b2cc6a9b..03f4834ccd0bc5 100644
--- a/clang/test/CodeGen/aarch64-ABI-align-packed.c
+++ b/clang/test/CodeGen/aarch64-ABI-align-packed.c
@@ -21,7 +21,8 @@ struct packed_member {
   uint16x8_t M0 __attribute((packed)); // member alignment 1
 };
 
-// natural alignment 16, adjusted alignment 16 since __attribute((aligned (n))) sets the minimum alignment
+// natural alignment 16, adjusted alignment 16, despite the 8-byte alignment specified by the attribute, because the natural alignment
+// for the vector type is 16 and the attribute cannot decrease the minimum required alignment to be less.
 // expected alignment of copy on callee stack: 16
 struct __attribute((aligned (8))) aligned_struct_8 {
   uint16x8_t M0; // member alignment 16
@@ -30,7 +31,8 @@ struct __attribute((aligned (8))) aligned_struct_8 {
 // natural alignment 16, adjusted alignment 16
 // expected alignment of copy on callee stack: 16
 struct aligned_member_8 {
-  uint16x8_t M0 __attribute((aligned (8))); // member alignment 16 since __attribute((aligned (n))) sets the minimum alignment
+  uint16x8_t M0 __attribute((aligned (8))); // member alignment 16, despite the 8-byte alignment specified by the attribute,
+  // because the natural alignment for the vector type is 16 and the attribute cannot decrease the minimum required alignment to be less.
 };
 
 // natural alignment 8, adjusted alignment 8
@@ -53,7 +55,7 @@ void init(int, ...);
 struct non_packed_struct gs_non_packed_struct;
 
 // CHECK-LABEL: define dso_local void @named_arg_non_packed_struct
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_NON_PACKED_STRUCT_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(16) [[S_NON_PACKED_STRUCT_COERCE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_NON_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_NON_PACKED_STRUCT_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2:![0-9]+]]
@@ -91,10 +93,10 @@ void variadic_non_packed_struct(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_NON_PACKED_STRUCT]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_NON_PACKED_STRUCT]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_non_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_non_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(16) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_NON_PACKED_STRUCT]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_non_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_non_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(16) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_NON_PACKED_STRUCT]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_non_packed_struct() {
@@ -108,7 +110,7 @@ void test_non_packed_struct() {
 struct packed_struct gs_packed_struct;
 
 // CHECK-LABEL: define dso_local void @named_arg_packed_struct
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_PACKED_STRUCT_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PACKED_STRUCT_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PACKED_STRUCT_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_STRUCT_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
@@ -146,10 +148,10 @@ void variadic_packed_struct(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_PACKED_STRUCT]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_PACKED_STRUCT]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_PACKED_STRUCT]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_packed_struct(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_PACKED_STRUCT]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_packed_struct() {
@@ -163,7 +165,7 @@ void test_packed_struct() {
 struct packed_member gs_packed_member;
 
 // CHECK-LABEL: define dso_local void @named_arg_packed_member
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_PACKED_MEMBER_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PACKED_MEMBER_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PACKED_MEMBER_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PACKED_MEMBER_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
@@ -201,10 +203,10 @@ void variadic_packed_member(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_PACKED_MEMBER]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_PACKED_MEMBER]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_packed_member(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_packed_member(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_PACKED_MEMBER]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_packed_member(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_packed_member(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_PACKED_MEMBER]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_packed_member() {
@@ -218,7 +220,7 @@ void test_packed_member() {
 struct aligned_struct_8 gs_aligned_struct_8;
 
 // CHECK-LABEL: define dso_local void @named_arg_aligned_struct_8
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_ALIGNED_STRUCT_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(16) [[S_ALIGNED_STRUCT_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_ALIGNED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_STRUCT_8_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
@@ -256,10 +258,10 @@ void variadic_aligned_struct_8(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_ALIGNED_STRUCT_8]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_ALIGNED_STRUCT_8]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_aligned_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_aligned_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(16) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_ALIGNED_STRUCT_8]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_aligned_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_aligned_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(16) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_ALIGNED_STRUCT_8]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_aligned_struct_8() {
@@ -273,7 +275,7 @@ void test_aligned_struct_8() {
 struct aligned_member_8 gs_aligned_member_8;
 
 // CHECK-LABEL: define dso_local void @named_arg_aligned_member_8
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_ALIGNED_MEMBER_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(16) [[S_ALIGNED_MEMBER_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_ALIGNED_MEMBER_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_ALIGNED_MEMBER_8_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
@@ -311,10 +313,10 @@ void variadic_aligned_member_8(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_ALIGNED_MEMBER_8]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_ALIGNED_MEMBER_8]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_aligned_member_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_aligned_member_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(16) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_ALIGNED_MEMBER_8]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_aligned_member_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_aligned_member_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(16) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_ALIGNED_MEMBER_8]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_aligned_member_8() {
@@ -328,7 +330,7 @@ void test_aligned_member_8() {
 struct pragma_packed_struct_8 gs_pragma_packed_struct_8;
 
 // CHECK-LABEL: define dso_local void @named_arg_pragma_packed_struct_8
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PRAGMA_PACKED_STRUCT_8_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PRAGMA_PACKED_STRUCT_8_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_8_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
@@ -366,10 +368,10 @@ void variadic_pragma_packed_struct_8(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_PRAGMA_PACKED_STRUCT_8]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_PRAGMA_PACKED_STRUCT_8]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_pragma_packed_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_pragma_packed_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_PRAGMA_PACKED_STRUCT_8]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_pragma_packed_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_pragma_packed_struct_8(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_PRAGMA_PACKED_STRUCT_8]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_pragma_packed_struct_8() {
@@ -383,7 +385,7 @@ void test_pragma_packed_struct_8() {
 struct pragma_packed_struct_4 gs_pragma_packed_struct_4;
 
 // CHECK-LABEL: define dso_local void @named_arg_pragma_packed_struct_4
-// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_4_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-SAME: (double [[D0:%.*]], double [[D1:%.*]], double [[D2:%.*]], double [[D3:%.*]], double [[D4:%.*]], double [[D5:%.*]], double [[D6:%.*]], double [[D7:%.*]], double noundef [[D8:%.*]], [1 x <8 x i16>] alignstack(8) [[S_PRAGMA_PACKED_STRUCT_4_COERCE:%.*]]) local_unnamed_addr #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[S_PRAGMA_PACKED_STRUCT_4_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [1 x <8 x i16>] [[S_PRAGMA_PACKED_STRUCT_4_COERCE]], 0
 // CHECK-NEXT:    store double [[D8]], ptr @gd, align 8, !tbaa [[TBAA2]]
@@ -421,10 +423,10 @@ void variadic_pragma_packed_struct_4(double d0, double d1, double d2, double d3,
 // CHECK-NEXT:    call void (i32, ...) @init(i32 noundef 1, ptr noundef nonnull [[S_PRAGMA_PACKED_STRUCT_4]]) #[[ATTR6]]
 // CHECK-NEXT:    [[DOTFCA_0_LOAD:%.*]] = load <8 x i16>, ptr [[S_PRAGMA_PACKED_STRUCT_4]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD]], 0
-// CHECK-NEXT:    call void @named_arg_pragma_packed_struct_4(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] [[DOTFCA_0_INSERT]])
+// CHECK-NEXT:    call void @named_arg_pragma_packed_struct_4(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double noundef 2.000000e+00, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT]])
 // CHECK-NEXT:    [[DOTFCA_0_LOAD3:%.*]] = load <8 x i16>, ptr [[S_PRAGMA_PACKED_STRUCT_4]], align 16
 // CHECK-NEXT:    [[DOTFCA_0_INSERT4:%.*]] = insertvalue [1 x <8 x i16>] poison, <8 x i16> [[DOTFCA_0_LOAD3]], 0
-// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_pragma_packed_struct_4(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] [[DOTFCA_0_INSERT4]])
+// CHECK-NEXT:    call void (double, double, double, double, double, double, double, double, double, ...) @variadic_pragma_packed_struct_4(double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, double poison, [1 x <8 x i16>] alignstack(8) [[DOTFCA_0_INSERT4]])
 // CHECK-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[S_PRAGMA_PACKED_STRUCT_4]]) #[[ATTR6]]
 // CHECK-NEXT:    ret void
 void test_pragma_packed_struct_4() {

diff  --git a/clang/test/CodeGen/aarch64-args-hfa.c b/clang/test/CodeGen/aarch64-args-hfa.c
index fbfedc7db7913f..75dfd880516645 100644
--- a/clang/test/CodeGen/aarch64-args-hfa.c
+++ b/clang/test/CodeGen/aarch64-args-hfa.c
@@ -6,13 +6,15 @@ typedef struct {
   float v[2];
 } S0;
 
-// CHECK: define{{.*}} float @f0([2 x float] %h.coerce)
+// CHECK-AAPCS:  define{{.*}} float @f0([2 x float] alignstack(8) %h.coerce)
+// CHECK-DARWIN: define{{.*}} float @f0([2 x float] %h.coerce)
 float f0(S0 h) {
   return h.v[0];
 }
 
 // CHECK: define{{.*}} float @f0_call()
-// CHECK: %call = call float @f0([2 x float] %0)
+// CHECK-AAPCS: %call = call float @f0([2 x float] alignstack(8) %0)
+// CHECK-DARWIN: %call = call float @f0([2 x float] %0)
 float f0_call(void) {
   S0 h = {1.0f, 2.0f};
   return f0(h);
@@ -20,14 +22,15 @@ float f0_call(void) {
 typedef struct {
   double v[2];
 } S1;
-
-// CHECK: define{{.*}} double @f1([2 x double] %h.coerce)
+// CHECK-AAPCS: define{{.*}} double @f1([2 x double] alignstack(8) %h.coerce)
+// CHECK-DARWIN: define{{.*}} double @f1([2 x double] %h.coerce)
 double f1(S1 h) {
   return h.v[0];
 }
 
 // CHECK: define{{.*}} double @f1_call()
-// CHECK: %call = call double @f1([2 x double] %0
+// CHECK-AAPCS: %call = call double @f1([2 x double] alignstack(8) %0
+// CHECK-DARWIN: %call = call double @f1([2 x double] %0
 double f1_call(void) {
   S1 h = {1.0, 2.0};
   return f1(h);

diff  --git a/clang/test/CodeGen/aarch64-neon-ldst-one.c b/clang/test/CodeGen/aarch64-neon-ldst-one.c
index ac741c4ebbb178..6cfdfe0e53e702 100644
--- a/clang/test/CodeGen/aarch64-neon-ldst-one.c
+++ b/clang/test/CodeGen/aarch64-neon-ldst-one.c
@@ -748,7 +748,7 @@ poly64x1_t test_vld1_lane_p64(poly64_t  *a, poly64x1_t b) {
   return vld1_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int8x16x2_t @test_vld2q_lane_s8(ptr noundef %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int8x16x2_t @test_vld2q_lane_s8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
@@ -771,7 +771,7 @@ int8x16x2_t test_vld2q_lane_s8(int8_t const * ptr, int8x16x2_t src) {
   return vld2q_lane_s8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint8x16x2_t @test_vld2q_lane_u8(ptr noundef %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint8x16x2_t @test_vld2q_lane_u8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
@@ -794,7 +794,7 @@ uint8x16x2_t test_vld2q_lane_u8(uint8_t const * ptr, uint8x16x2_t src) {
   return vld2q_lane_u8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly8x16x2_t @test_vld2q_lane_p8(ptr noundef %ptr, [2 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly8x16x2_t @test_vld2q_lane_p8(ptr noundef %ptr, [2 x <16 x i8>] alignstack(16) %src.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
@@ -817,7 +817,7 @@ poly8x16x2_t test_vld2q_lane_p8(poly8_t const * ptr, poly8x16x2_t src) {
   return vld2q_lane_p8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int8x16x3_t @test_vld3q_lane_s8(ptr noundef %ptr, [3 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int8x16x3_t @test_vld3q_lane_s8(ptr noundef %ptr, [3 x <16 x i8>] alignstack(16) %src.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
@@ -843,7 +843,7 @@ int8x16x3_t test_vld3q_lane_s8(int8_t const * ptr, int8x16x3_t src) {
   return vld3q_lane_s8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint8x16x3_t @test_vld3q_lane_u8(ptr noundef %ptr, [3 x <16 x i8>] %src.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint8x16x3_t @test_vld3q_lane_u8(ptr noundef %ptr, [3 x <16 x i8>] alignstack(16) %src.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[SRC:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
@@ -869,7 +869,7 @@ uint8x16x3_t test_vld3q_lane_u8(uint8_t const * ptr, uint8x16x3_t src) {
   return vld3q_lane_u8(ptr, src, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint16x8x2_t @test_vld2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint16x8x2_t @test_vld2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
@@ -896,7 +896,7 @@ uint16x8x2_t test_vld2q_lane_u16(uint16_t  *a, uint16x8x2_t b) {
   return vld2q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint32x4x2_t @test_vld2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint32x4x2_t @test_vld2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
@@ -923,7 +923,7 @@ uint32x4x2_t test_vld2q_lane_u32(uint32_t  *a, uint32x4x2_t b) {
   return vld2q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint64x2x2_t @test_vld2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align 16
@@ -950,7 +950,7 @@ uint64x2x2_t test_vld2q_lane_u64(uint64_t  *a, uint64x2x2_t b) {
   return vld2q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int16x8x2_t @test_vld2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int16x8x2_t @test_vld2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
@@ -977,7 +977,7 @@ int16x8x2_t test_vld2q_lane_s16(int16_t  *a, int16x8x2_t b) {
   return vld2q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int32x4x2_t @test_vld2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int32x4x2_t @test_vld2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
@@ -1004,7 +1004,7 @@ int32x4x2_t test_vld2q_lane_s32(int32_t  *a, int32x4x2_t b) {
   return vld2q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int64x2x2_t @test_vld2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x2_t, align 16
@@ -1031,7 +1031,7 @@ int64x2x2_t test_vld2q_lane_s64(int64_t  *a, int64x2x2_t b) {
   return vld2q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float16x8x2_t @test_vld2q_lane_f16(ptr noundef %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float16x8x2_t @test_vld2q_lane_f16(ptr noundef %a, [2 x <8 x half>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
@@ -1058,7 +1058,7 @@ float16x8x2_t test_vld2q_lane_f16(float16_t  *a, float16x8x2_t b) {
   return vld2q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float32x4x2_t @test_vld2q_lane_f32(ptr noundef %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float32x4x2_t @test_vld2q_lane_f32(ptr noundef %a, [2 x <4 x float>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
@@ -1085,7 +1085,7 @@ float32x4x2_t test_vld2q_lane_f32(float32_t  *a, float32x4x2_t b) {
   return vld2q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_lane_f64(ptr noundef %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float64x2x2_t @test_vld2q_lane_f64(ptr noundef %a, [2 x <2 x double>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x2_t, align 16
@@ -1112,7 +1112,7 @@ float64x2x2_t test_vld2q_lane_f64(float64_t  *a, float64x2x2_t b) {
   return vld2q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly16x8x2_t @test_vld2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly16x8x2_t @test_vld2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
@@ -1139,7 +1139,7 @@ poly16x8x2_t test_vld2q_lane_p16(poly16_t  *a, poly16x8x2_t b) {
   return vld2q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
@@ -1166,7 +1166,7 @@ poly64x2x2_t test_vld2q_lane_p64(poly64_t  *a, poly64x2x2_t b) {
   return vld2q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint8x8x2_t @test_vld2_lane_u8(ptr noundef %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint8x8x2_t @test_vld2_lane_u8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
@@ -1189,7 +1189,7 @@ uint8x8x2_t test_vld2_lane_u8(uint8_t  *a, uint8x8x2_t b) {
   return vld2_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint16x4x2_t @test_vld2_lane_u16(ptr noundef %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint16x4x2_t @test_vld2_lane_u16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
@@ -1216,7 +1216,7 @@ uint16x4x2_t test_vld2_lane_u16(uint16_t  *a, uint16x4x2_t b) {
   return vld2_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint32x2x2_t @test_vld2_lane_u32(ptr noundef %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint32x2x2_t @test_vld2_lane_u32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
@@ -1243,7 +1243,7 @@ uint32x2x2_t test_vld2_lane_u32(uint32_t  *a, uint32x2x2_t b) {
   return vld2_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint64x1x2_t @test_vld2_lane_u64(ptr noundef %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint64x1x2_t @test_vld2_lane_u64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
@@ -1270,7 +1270,7 @@ uint64x1x2_t test_vld2_lane_u64(uint64_t  *a, uint64x1x2_t b) {
   return vld2_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int8x8x2_t @test_vld2_lane_s8(ptr noundef %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int8x8x2_t @test_vld2_lane_s8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
@@ -1293,7 +1293,7 @@ int8x8x2_t test_vld2_lane_s8(int8_t  *a, int8x8x2_t b) {
   return vld2_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int16x4x2_t @test_vld2_lane_s16(ptr noundef %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int16x4x2_t @test_vld2_lane_s16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
@@ -1320,7 +1320,7 @@ int16x4x2_t test_vld2_lane_s16(int16_t  *a, int16x4x2_t b) {
   return vld2_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int32x2x2_t @test_vld2_lane_s32(ptr noundef %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int32x2x2_t @test_vld2_lane_s32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
@@ -1347,7 +1347,7 @@ int32x2x2_t test_vld2_lane_s32(int32_t  *a, int32x2x2_t b) {
   return vld2_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int64x1x2_t @test_vld2_lane_s64(ptr noundef %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int64x1x2_t @test_vld2_lane_s64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
@@ -1374,7 +1374,7 @@ int64x1x2_t test_vld2_lane_s64(int64_t  *a, int64x1x2_t b) {
   return vld2_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float16x4x2_t @test_vld2_lane_f16(ptr noundef %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float16x4x2_t @test_vld2_lane_f16(ptr noundef %a, [2 x <4 x half>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
@@ -1401,7 +1401,7 @@ float16x4x2_t test_vld2_lane_f16(float16_t  *a, float16x4x2_t b) {
   return vld2_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float32x2x2_t @test_vld2_lane_f32(ptr noundef %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float32x2x2_t @test_vld2_lane_f32(ptr noundef %a, [2 x <2 x float>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
@@ -1428,7 +1428,7 @@ float32x2x2_t test_vld2_lane_f32(float32_t  *a, float32x2x2_t b) {
   return vld2_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_lane_f64(ptr noundef %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float64x1x2_t @test_vld2_lane_f64(ptr noundef %a, [2 x <1 x double>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x2_t, align 8
@@ -1455,7 +1455,7 @@ float64x1x2_t test_vld2_lane_f64(float64_t  *a, float64x1x2_t b) {
   return vld2_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly8x8x2_t @test_vld2_lane_p8(ptr noundef %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly8x8x2_t @test_vld2_lane_p8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
@@ -1478,7 +1478,7 @@ poly8x8x2_t test_vld2_lane_p8(poly8_t  *a, poly8x8x2_t b) {
   return vld2_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly16x4x2_t @test_vld2_lane_p16(ptr noundef %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly16x4x2_t @test_vld2_lane_p16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
@@ -1505,7 +1505,7 @@ poly16x4x2_t test_vld2_lane_p16(poly16_t  *a, poly16x4x2_t b) {
   return vld2_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_lane_p64(ptr noundef %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_lane_p64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
@@ -1532,7 +1532,7 @@ poly64x1x2_t test_vld2_lane_p64(poly64_t  *a, poly64x1x2_t b) {
   return vld2_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint16x8x3_t @test_vld3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint16x8x3_t @test_vld3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
@@ -1564,7 +1564,7 @@ uint16x8x3_t test_vld3q_lane_u16(uint16_t  *a, uint16x8x3_t b) {
   return vld3q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint32x4x3_t @test_vld3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint32x4x3_t @test_vld3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
@@ -1596,7 +1596,7 @@ uint32x4x3_t test_vld3q_lane_u32(uint32_t  *a, uint32x4x3_t b) {
   return vld3q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint64x2x3_t @test_vld3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align 16
@@ -1628,7 +1628,7 @@ uint64x2x3_t test_vld3q_lane_u64(uint64_t  *a, uint64x2x3_t b) {
   return vld3q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int16x8x3_t @test_vld3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int16x8x3_t @test_vld3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
@@ -1660,7 +1660,7 @@ int16x8x3_t test_vld3q_lane_s16(int16_t  *a, int16x8x3_t b) {
   return vld3q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int32x4x3_t @test_vld3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int32x4x3_t @test_vld3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
@@ -1692,7 +1692,7 @@ int32x4x3_t test_vld3q_lane_s32(int32_t  *a, int32x4x3_t b) {
   return vld3q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int64x2x3_t @test_vld3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x3_t, align 16
@@ -1724,7 +1724,7 @@ int64x2x3_t test_vld3q_lane_s64(int64_t  *a, int64x2x3_t b) {
   return vld3q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float16x8x3_t @test_vld3q_lane_f16(ptr noundef %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float16x8x3_t @test_vld3q_lane_f16(ptr noundef %a, [3 x <8 x half>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
@@ -1756,7 +1756,7 @@ float16x8x3_t test_vld3q_lane_f16(float16_t  *a, float16x8x3_t b) {
   return vld3q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float32x4x3_t @test_vld3q_lane_f32(ptr noundef %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float32x4x3_t @test_vld3q_lane_f32(ptr noundef %a, [3 x <4 x float>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
@@ -1788,7 +1788,7 @@ float32x4x3_t test_vld3q_lane_f32(float32_t  *a, float32x4x3_t b) {
   return vld3q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_lane_f64(ptr noundef %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float64x2x3_t @test_vld3q_lane_f64(ptr noundef %a, [3 x <2 x double>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x3_t, align 16
@@ -1820,7 +1820,7 @@ float64x2x3_t test_vld3q_lane_f64(float64_t  *a, float64x2x3_t b) {
   return vld3q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly8x16x3_t @test_vld3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly8x16x3_t @test_vld3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
@@ -1846,7 +1846,7 @@ poly8x16x3_t test_vld3q_lane_p8(poly8_t  *a, poly8x16x3_t b) {
   return vld3q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly16x8x3_t @test_vld3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly16x8x3_t @test_vld3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
@@ -1878,7 +1878,7 @@ poly16x8x3_t test_vld3q_lane_p16(poly16_t  *a, poly16x8x3_t b) {
   return vld3q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
@@ -1910,7 +1910,7 @@ poly64x2x3_t test_vld3q_lane_p64(poly64_t  *a, poly64x2x3_t b) {
   return vld3q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint8x8x3_t @test_vld3_lane_u8(ptr noundef %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint8x8x3_t @test_vld3_lane_u8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
@@ -1936,7 +1936,7 @@ uint8x8x3_t test_vld3_lane_u8(uint8_t  *a, uint8x8x3_t b) {
   return vld3_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint16x4x3_t @test_vld3_lane_u16(ptr noundef %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint16x4x3_t @test_vld3_lane_u16(ptr noundef %a,  [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
@@ -1968,7 +1968,7 @@ uint16x4x3_t test_vld3_lane_u16(uint16_t  *a, uint16x4x3_t b) {
   return vld3_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint32x2x3_t @test_vld3_lane_u32(ptr noundef %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint32x2x3_t @test_vld3_lane_u32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
@@ -2000,7 +2000,7 @@ uint32x2x3_t test_vld3_lane_u32(uint32_t  *a, uint32x2x3_t b) {
   return vld3_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint64x1x3_t @test_vld3_lane_u64(ptr noundef %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint64x1x3_t @test_vld3_lane_u64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
@@ -2032,7 +2032,7 @@ uint64x1x3_t test_vld3_lane_u64(uint64_t  *a, uint64x1x3_t b) {
   return vld3_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int8x8x3_t @test_vld3_lane_s8(ptr noundef %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int8x8x3_t @test_vld3_lane_s8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
@@ -2058,7 +2058,7 @@ int8x8x3_t test_vld3_lane_s8(int8_t  *a, int8x8x3_t b) {
   return vld3_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int16x4x3_t @test_vld3_lane_s16(ptr noundef %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int16x4x3_t @test_vld3_lane_s16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
@@ -2090,7 +2090,7 @@ int16x4x3_t test_vld3_lane_s16(int16_t  *a, int16x4x3_t b) {
   return vld3_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int32x2x3_t @test_vld3_lane_s32(ptr noundef %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int32x2x3_t @test_vld3_lane_s32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
@@ -2122,7 +2122,7 @@ int32x2x3_t test_vld3_lane_s32(int32_t  *a, int32x2x3_t b) {
   return vld3_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int64x1x3_t @test_vld3_lane_s64(ptr noundef %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int64x1x3_t @test_vld3_lane_s64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
@@ -2154,7 +2154,7 @@ int64x1x3_t test_vld3_lane_s64(int64_t  *a, int64x1x3_t b) {
   return vld3_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float16x4x3_t @test_vld3_lane_f16(ptr noundef %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float16x4x3_t @test_vld3_lane_f16(ptr noundef %a, [3 x <4 x half>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
@@ -2186,7 +2186,7 @@ float16x4x3_t test_vld3_lane_f16(float16_t  *a, float16x4x3_t b) {
   return vld3_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float32x2x3_t @test_vld3_lane_f32(ptr noundef %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float32x2x3_t @test_vld3_lane_f32(ptr noundef %a, [3 x <2 x float>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
@@ -2218,7 +2218,7 @@ float32x2x3_t test_vld3_lane_f32(float32_t  *a, float32x2x3_t b) {
   return vld3_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_lane_f64(ptr noundef %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float64x1x3_t @test_vld3_lane_f64(ptr noundef %a, [3 x <1 x double>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x3_t, align 8
@@ -2250,7 +2250,7 @@ float64x1x3_t test_vld3_lane_f64(float64_t  *a, float64x1x3_t b) {
   return vld3_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly8x8x3_t @test_vld3_lane_p8(ptr noundef %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly8x8x3_t @test_vld3_lane_p8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
@@ -2276,7 +2276,7 @@ poly8x8x3_t test_vld3_lane_p8(poly8_t  *a, poly8x8x3_t b) {
   return vld3_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly16x4x3_t @test_vld3_lane_p16(ptr noundef %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly16x4x3_t @test_vld3_lane_p16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
@@ -2308,7 +2308,7 @@ poly16x4x3_t test_vld3_lane_p16(poly16_t  *a, poly16x4x3_t b) {
   return vld3_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_lane_p64(ptr noundef %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_lane_p64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
@@ -2340,7 +2340,7 @@ poly64x1x3_t test_vld3_lane_p64(poly64_t  *a, poly64x1x3_t b) {
   return vld3_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint8x16x4_t @test_vld4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint8x16x4_t @test_vld4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
@@ -2369,7 +2369,7 @@ uint8x16x4_t test_vld4q_lane_u8(uint8_t  *a, uint8x16x4_t b) {
   return vld4q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint16x8x4_t @test_vld4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint16x8x4_t @test_vld4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
@@ -2406,7 +2406,7 @@ uint16x8x4_t test_vld4q_lane_u16(uint16_t  *a, uint16x8x4_t b) {
   return vld4q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint32x4x4_t @test_vld4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint32x4x4_t @test_vld4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
@@ -2443,7 +2443,7 @@ uint32x4x4_t test_vld4q_lane_u32(uint32_t  *a, uint32x4x4_t b) {
   return vld4q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint64x2x4_t @test_vld4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align 16
@@ -2480,7 +2480,7 @@ uint64x2x4_t test_vld4q_lane_u64(uint64_t  *a, uint64x2x4_t b) {
   return vld4q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int8x16x4_t @test_vld4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int8x16x4_t @test_vld4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
@@ -2509,7 +2509,7 @@ int8x16x4_t test_vld4q_lane_s8(int8_t  *a, int8x16x4_t b) {
   return vld4q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int16x8x4_t @test_vld4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int16x8x4_t @test_vld4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
@@ -2546,7 +2546,7 @@ int16x8x4_t test_vld4q_lane_s16(int16_t  *a, int16x8x4_t b) {
   return vld4q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int32x4x4_t @test_vld4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int32x4x4_t @test_vld4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
@@ -2583,7 +2583,7 @@ int32x4x4_t test_vld4q_lane_s32(int32_t  *a, int32x4x4_t b) {
   return vld4q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int64x2x4_t @test_vld4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x2x4_t, align 16
@@ -2620,7 +2620,7 @@ int64x2x4_t test_vld4q_lane_s64(int64_t  *a, int64x2x4_t b) {
   return vld4q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float16x8x4_t @test_vld4q_lane_f16(ptr noundef %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float16x8x4_t @test_vld4q_lane_f16(ptr noundef %a, [4 x <8 x half>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
@@ -2657,7 +2657,7 @@ float16x8x4_t test_vld4q_lane_f16(float16_t  *a, float16x8x4_t b) {
   return vld4q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float32x4x4_t @test_vld4q_lane_f32(ptr noundef %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float32x4x4_t @test_vld4q_lane_f32(ptr noundef %a, [4 x <4 x float>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
@@ -2694,7 +2694,7 @@ float32x4x4_t test_vld4q_lane_f32(float32_t  *a, float32x4x4_t b) {
   return vld4q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_lane_f64(ptr noundef %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float64x2x4_t @test_vld4q_lane_f64(ptr noundef %a, [4 x <2 x double>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x2x4_t, align 16
@@ -2731,7 +2731,7 @@ float64x2x4_t test_vld4q_lane_f64(float64_t  *a, float64x2x4_t b) {
   return vld4q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly8x16x4_t @test_vld4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly8x16x4_t @test_vld4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
@@ -2760,7 +2760,7 @@ poly8x16x4_t test_vld4q_lane_p8(poly8_t  *a, poly8x16x4_t b) {
   return vld4q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly16x8x4_t @test_vld4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly16x8x4_t @test_vld4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
@@ -2797,7 +2797,7 @@ poly16x8x4_t test_vld4q_lane_p16(poly16_t  *a, poly16x8x4_t b) {
   return vld4q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
@@ -2834,7 +2834,7 @@ poly64x2x4_t test_vld4q_lane_p64(poly64_t  *a, poly64x2x4_t b) {
   return vld4q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint8x8x4_t @test_vld4_lane_u8(ptr noundef %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint8x8x4_t @test_vld4_lane_u8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
@@ -2863,7 +2863,7 @@ uint8x8x4_t test_vld4_lane_u8(uint8_t  *a, uint8x8x4_t b) {
   return vld4_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint16x4x4_t @test_vld4_lane_u16(ptr noundef %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint16x4x4_t @test_vld4_lane_u16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
@@ -2900,7 +2900,7 @@ uint16x4x4_t test_vld4_lane_u16(uint16_t  *a, uint16x4x4_t b) {
   return vld4_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint32x2x4_t @test_vld4_lane_u32(ptr noundef %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint32x2x4_t @test_vld4_lane_u32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
@@ -2937,7 +2937,7 @@ uint32x2x4_t test_vld4_lane_u32(uint32_t  *a, uint32x2x4_t b) {
   return vld4_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.uint64x1x4_t @test_vld4_lane_u64(ptr noundef %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.uint64x1x4_t @test_vld4_lane_u64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
@@ -2974,7 +2974,7 @@ uint64x1x4_t test_vld4_lane_u64(uint64_t  *a, uint64x1x4_t b) {
   return vld4_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int8x8x4_t @test_vld4_lane_s8(ptr noundef %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int8x8x4_t @test_vld4_lane_s8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
@@ -3003,7 +3003,7 @@ int8x8x4_t test_vld4_lane_s8(int8_t  *a, int8x8x4_t b) {
   return vld4_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int16x4x4_t @test_vld4_lane_s16(ptr noundef %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int16x4x4_t @test_vld4_lane_s16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
@@ -3040,7 +3040,7 @@ int16x4x4_t test_vld4_lane_s16(int16_t  *a, int16x4x4_t b) {
   return vld4_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int32x2x4_t @test_vld4_lane_s32(ptr noundef %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int32x2x4_t @test_vld4_lane_s32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
@@ -3077,7 +3077,7 @@ int32x2x4_t test_vld4_lane_s32(int32_t  *a, int32x2x4_t b) {
   return vld4_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.int64x1x4_t @test_vld4_lane_s64(ptr noundef %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.int64x1x4_t @test_vld4_lane_s64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
@@ -3114,7 +3114,7 @@ int64x1x4_t test_vld4_lane_s64(int64_t  *a, int64x1x4_t b) {
   return vld4_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float16x4x4_t @test_vld4_lane_f16(ptr noundef %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float16x4x4_t @test_vld4_lane_f16(ptr noundef %a, [4 x <4 x half>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
@@ -3151,7 +3151,7 @@ float16x4x4_t test_vld4_lane_f16(float16_t  *a, float16x4x4_t b) {
   return vld4_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float32x2x4_t @test_vld4_lane_f32(ptr noundef %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float32x2x4_t @test_vld4_lane_f32(ptr noundef %a, [4 x <2 x float>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
@@ -3188,7 +3188,7 @@ float32x2x4_t test_vld4_lane_f32(float32_t  *a, float32x2x4_t b) {
   return vld4_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_lane_f64(ptr noundef %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.float64x1x4_t @test_vld4_lane_f64(ptr noundef %a, [4 x <1 x double>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.float64x1x4_t, align 8
@@ -3225,7 +3225,7 @@ float64x1x4_t test_vld4_lane_f64(float64_t  *a, float64x1x4_t b) {
   return vld4_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly8x8x4_t @test_vld4_lane_p8(ptr noundef %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly8x8x4_t @test_vld4_lane_p8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
@@ -3254,7 +3254,7 @@ poly8x8x4_t test_vld4_lane_p8(poly8_t  *a, poly8x8x4_t b) {
   return vld4_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly16x4x4_t @test_vld4_lane_p16(ptr noundef %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly16x4x4_t @test_vld4_lane_p16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
@@ -3291,7 +3291,7 @@ poly16x4x4_t test_vld4_lane_p16(poly16_t  *a, poly16x4x4_t b) {
   return vld4_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_lane_p64(ptr noundef %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_lane_p64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
@@ -3596,7 +3596,7 @@ void test_vst1_lane_p64(poly64_t  *a, poly64x1_t b) {
   vst1_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u8(ptr noundef %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, ptr [[B]], i32 0, i32 0
@@ -3614,7 +3614,7 @@ void test_vst2q_lane_u8(uint8_t  *a, uint8x16x2_t b) {
   vst2q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, ptr [[B]], i32 0, i32 0
@@ -3636,7 +3636,7 @@ void test_vst2q_lane_u16(uint16_t  *a, uint16x8x2_t b) {
   vst2q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, ptr [[B]], i32 0, i32 0
@@ -3658,7 +3658,7 @@ void test_vst2q_lane_u32(uint32_t  *a, uint32x4x2_t b) {
   vst2q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_u64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x2_t, ptr [[B]], i32 0, i32 0
@@ -3680,7 +3680,7 @@ void test_vst2q_lane_u64(uint64_t  *a, uint64x2x2_t b) {
   vst2q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s8(ptr noundef %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, ptr [[B]], i32 0, i32 0
@@ -3698,7 +3698,7 @@ void test_vst2q_lane_s8(int8_t  *a, int8x16x2_t b) {
   vst2q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, ptr [[B]], i32 0, i32 0
@@ -3720,7 +3720,7 @@ void test_vst2q_lane_s16(int16_t  *a, int16x8x2_t b) {
   vst2q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s32(ptr noundef %a, [2 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, ptr [[B]], i32 0, i32 0
@@ -3742,7 +3742,7 @@ void test_vst2q_lane_s32(int32_t  *a, int32x4x2_t b) {
   vst2q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_s64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x2_t, ptr [[B]], i32 0, i32 0
@@ -3764,7 +3764,7 @@ void test_vst2q_lane_s64(int64_t  *a, int64x2x2_t b) {
   vst2q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f16(ptr noundef %a, [2 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f16(ptr noundef %a, [2 x <8 x half>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, ptr [[B]], i32 0, i32 0
@@ -3786,7 +3786,7 @@ void test_vst2q_lane_f16(float16_t  *a, float16x8x2_t b) {
   vst2q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f32(ptr noundef %a, [2 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f32(ptr noundef %a, [2 x <4 x float>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, ptr [[B]], i32 0, i32 0
@@ -3808,7 +3808,7 @@ void test_vst2q_lane_f32(float32_t  *a, float32x4x2_t b) {
   vst2q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f64(ptr noundef %a, [2 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_f64(ptr noundef %a, [2 x <2 x double>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x2_t, ptr [[B]], i32 0, i32 0
@@ -3830,7 +3830,7 @@ void test_vst2q_lane_f64(float64_t  *a, float64x2x2_t b) {
   vst2q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p8(ptr noundef %a, [2 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p8(ptr noundef %a, [2 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, ptr [[B]], i32 0, i32 0
@@ -3848,7 +3848,7 @@ void test_vst2q_lane_p8(poly8_t  *a, poly8x16x2_t b) {
   vst2q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p16(ptr noundef %a, [2 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, ptr [[B]], i32 0, i32 0
@@ -3870,7 +3870,7 @@ void test_vst2q_lane_p16(poly16_t  *a, poly16x8x2_t b) {
   vst2q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_lane_p64(ptr noundef %a, [2 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[B]], i32 0, i32 0
@@ -3892,7 +3892,7 @@ void test_vst2q_lane_p64(poly64_t  *a, poly64x2x2_t b) {
   vst2q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u8(ptr noundef %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, ptr [[B]], i32 0, i32 0
@@ -3910,7 +3910,7 @@ void test_vst2_lane_u8(uint8_t  *a, uint8x8x2_t b) {
   vst2_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u16(ptr noundef %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, ptr [[B]], i32 0, i32 0
@@ -3932,7 +3932,7 @@ void test_vst2_lane_u16(uint16_t  *a, uint16x4x2_t b) {
   vst2_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u32(ptr noundef %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, ptr [[B]], i32 0, i32 0
@@ -3954,7 +3954,7 @@ void test_vst2_lane_u32(uint32_t  *a, uint32x2x2_t b) {
   vst2_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u64(ptr noundef %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_u64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, ptr [[B]], i32 0, i32 0
@@ -3976,7 +3976,7 @@ void test_vst2_lane_u64(uint64_t  *a, uint64x1x2_t b) {
   vst2_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s8(ptr noundef %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, ptr [[B]], i32 0, i32 0
@@ -3994,7 +3994,7 @@ void test_vst2_lane_s8(int8_t  *a, int8x8x2_t b) {
   vst2_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s16(ptr noundef %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, ptr [[B]], i32 0, i32 0
@@ -4016,7 +4016,7 @@ void test_vst2_lane_s16(int16_t  *a, int16x4x2_t b) {
   vst2_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s32(ptr noundef %a, [2 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s32(ptr noundef %a, [2 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, ptr [[B]], i32 0, i32 0
@@ -4038,7 +4038,7 @@ void test_vst2_lane_s32(int32_t  *a, int32x2x2_t b) {
   vst2_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s64(ptr noundef %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_s64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, ptr [[B]], i32 0, i32 0
@@ -4060,7 +4060,7 @@ void test_vst2_lane_s64(int64_t  *a, int64x1x2_t b) {
   vst2_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f16(ptr noundef %a, [2 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f16(ptr noundef %a, [2 x <4 x half>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, ptr [[B]], i32 0, i32 0
@@ -4082,7 +4082,7 @@ void test_vst2_lane_f16(float16_t  *a, float16x4x2_t b) {
   vst2_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f32(ptr noundef %a, [2 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f32(ptr noundef %a, [2 x <2 x float>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, ptr [[B]], i32 0, i32 0
@@ -4104,7 +4104,7 @@ void test_vst2_lane_f32(float32_t  *a, float32x2x2_t b) {
   vst2_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f64(ptr noundef %a, [2 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_f64(ptr noundef %a, [2 x <1 x double>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x2_t, ptr [[B]], i32 0, i32 0
@@ -4126,7 +4126,7 @@ void test_vst2_lane_f64(float64_t  *a, float64x1x2_t b) {
   vst2_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p8(ptr noundef %a, [2 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p8(ptr noundef %a, [2 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, ptr [[B]], i32 0, i32 0
@@ -4144,7 +4144,7 @@ void test_vst2_lane_p8(poly8_t  *a, poly8x8x2_t b) {
   vst2_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p16(ptr noundef %a, [2 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p16(ptr noundef %a, [2 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, ptr [[B]], i32 0, i32 0
@@ -4166,7 +4166,7 @@ void test_vst2_lane_p16(poly16_t  *a, poly16x4x2_t b) {
   vst2_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p64(ptr noundef %a, [2 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_lane_p64(ptr noundef %a, [2 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[B]], i32 0, i32 0
@@ -4188,7 +4188,7 @@ void test_vst2_lane_p64(poly64_t  *a, poly64x1x2_t b) {
   vst2_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u8(ptr noundef %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, ptr [[B]], i32 0, i32 0
@@ -4209,7 +4209,7 @@ void test_vst3q_lane_u8(uint8_t  *a, uint8x16x3_t b) {
   vst3q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4236,7 +4236,7 @@ void test_vst3q_lane_u16(uint16_t  *a, uint16x8x3_t b) {
   vst3q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4263,7 +4263,7 @@ void test_vst3q_lane_u32(uint32_t  *a, uint32x4x3_t b) {
   vst3q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_u64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4290,7 +4290,7 @@ void test_vst3q_lane_u64(uint64_t  *a, uint64x2x3_t b) {
   vst3q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s8(ptr noundef %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, ptr [[B]], i32 0, i32 0
@@ -4311,7 +4311,7 @@ void test_vst3q_lane_s8(int8_t  *a, int8x16x3_t b) {
   vst3q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4338,7 +4338,7 @@ void test_vst3q_lane_s16(int16_t  *a, int16x8x3_t b) {
   vst3q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s32(ptr noundef %a, [3 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4365,7 +4365,7 @@ void test_vst3q_lane_s32(int32_t  *a, int32x4x3_t b) {
   vst3q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_s64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4392,7 +4392,7 @@ void test_vst3q_lane_s64(int64_t  *a, int64x2x3_t b) {
   vst3q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f16(ptr noundef %a, [3 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f16(ptr noundef %a, [3 x <8 x half>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4419,7 +4419,7 @@ void test_vst3q_lane_f16(float16_t  *a, float16x8x3_t b) {
   vst3q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f32(ptr noundef %a, [3 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f32(ptr noundef %a, [3 x <4 x float>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4446,7 +4446,7 @@ void test_vst3q_lane_f32(float32_t  *a, float32x4x3_t b) {
   vst3q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f64(ptr noundef %a, [3 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_f64(ptr noundef %a, [3 x <2 x double>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4473,7 +4473,7 @@ void test_vst3q_lane_f64(float64_t  *a, float64x2x3_t b) {
   vst3q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p8(ptr noundef %a, [3 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, ptr [[B]], i32 0, i32 0
@@ -4494,7 +4494,7 @@ void test_vst3q_lane_p8(poly8_t  *a, poly8x16x3_t b) {
   vst3q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p16(ptr noundef %a, [3 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4521,7 +4521,7 @@ void test_vst3q_lane_p16(poly16_t  *a, poly16x8x3_t b) {
   vst3q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_lane_p64(ptr noundef %a, [3 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4548,7 +4548,7 @@ void test_vst3q_lane_p64(poly64_t  *a, poly64x2x3_t b) {
   vst3q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u8(ptr noundef %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4569,7 +4569,7 @@ void test_vst3_lane_u8(uint8_t  *a, uint8x8x3_t b) {
   vst3_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u16(ptr noundef %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4596,7 +4596,7 @@ void test_vst3_lane_u16(uint16_t  *a, uint16x4x3_t b) {
   vst3_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u32(ptr noundef %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4623,7 +4623,7 @@ void test_vst3_lane_u32(uint32_t  *a, uint32x2x3_t b) {
   vst3_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u64(ptr noundef %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_u64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, ptr [[B]], i32 0, i32 0
@@ -4650,7 +4650,7 @@ void test_vst3_lane_u64(uint64_t  *a, uint64x1x3_t b) {
   vst3_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s8(ptr noundef %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4671,7 +4671,7 @@ void test_vst3_lane_s8(int8_t  *a, int8x8x3_t b) {
   vst3_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s16(ptr noundef %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4698,7 +4698,7 @@ void test_vst3_lane_s16(int16_t  *a, int16x4x3_t b) {
   vst3_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s32(ptr noundef %a, [3 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s32(ptr noundef %a, [3 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4725,7 +4725,7 @@ void test_vst3_lane_s32(int32_t  *a, int32x2x3_t b) {
   vst3_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s64(ptr noundef %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_s64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, ptr [[B]], i32 0, i32 0
@@ -4752,7 +4752,7 @@ void test_vst3_lane_s64(int64_t  *a, int64x1x3_t b) {
   vst3_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f16(ptr noundef %a, [3 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f16(ptr noundef %a, [3 x <4 x half>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4779,7 +4779,7 @@ void test_vst3_lane_f16(float16_t  *a, float16x4x3_t b) {
   vst3_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f32(ptr noundef %a, [3 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f32(ptr noundef %a, [3 x <2 x float>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, ptr [[B]], i32 0, i32 0
@@ -4806,7 +4806,7 @@ void test_vst3_lane_f32(float32_t  *a, float32x2x3_t b) {
   vst3_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f64(ptr noundef %a, [3 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_f64(ptr noundef %a, [3 x <1 x double>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x3_t, ptr [[B]], i32 0, i32 0
@@ -4833,7 +4833,7 @@ void test_vst3_lane_f64(float64_t  *a, float64x1x3_t b) {
   vst3_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p8(ptr noundef %a, [3 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p8(ptr noundef %a, [3 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, ptr [[B]], i32 0, i32 0
@@ -4854,7 +4854,7 @@ void test_vst3_lane_p8(poly8_t  *a, poly8x8x3_t b) {
   vst3_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p16(ptr noundef %a, [3 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p16(ptr noundef %a, [3 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, ptr [[B]], i32 0, i32 0
@@ -4881,7 +4881,7 @@ void test_vst3_lane_p16(poly16_t  *a, poly16x4x3_t b) {
   vst3_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p64(ptr noundef %a, [3 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_lane_p64(ptr noundef %a, [3 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[B]], i32 0, i32 0
@@ -4908,7 +4908,7 @@ void test_vst3_lane_p64(poly64_t  *a, poly64x1x3_t b) {
   vst3_lane_p64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, ptr [[B]], i32 0, i32 0
@@ -4932,7 +4932,7 @@ void test_vst4q_lane_u8(uint8_t  *a, uint8x16x4_t b) {
   vst4q_lane_u8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, ptr [[B]], i32 0, i32 0
@@ -4964,7 +4964,7 @@ void test_vst4q_lane_u16(uint16_t  *a, uint16x8x4_t b) {
   vst4q_lane_u16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, ptr [[B]], i32 0, i32 0
@@ -4996,7 +4996,7 @@ void test_vst4q_lane_u32(uint32_t  *a, uint32x4x4_t b) {
   vst4q_lane_u32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_u64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5028,7 +5028,7 @@ void test_vst4q_lane_u64(uint64_t  *a, uint64x2x4_t b) {
   vst4q_lane_u64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, ptr [[B]], i32 0, i32 0
@@ -5052,7 +5052,7 @@ void test_vst4q_lane_s8(int8_t  *a, int8x16x4_t b) {
   vst4q_lane_s8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, ptr [[B]], i32 0, i32 0
@@ -5084,7 +5084,7 @@ void test_vst4q_lane_s16(int16_t  *a, int16x8x4_t b) {
   vst4q_lane_s16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s32(ptr noundef %a, [4 x <4 x i32>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, ptr [[B]], i32 0, i32 0
@@ -5116,7 +5116,7 @@ void test_vst4q_lane_s32(int32_t  *a, int32x4x4_t b) {
   vst4q_lane_s32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_s64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5148,7 +5148,7 @@ void test_vst4q_lane_s64(int64_t  *a, int64x2x4_t b) {
   vst4q_lane_s64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f16(ptr noundef %a, [4 x <8 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f16(ptr noundef %a, [4 x <8 x half>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, ptr [[B]], i32 0, i32 0
@@ -5180,7 +5180,7 @@ void test_vst4q_lane_f16(float16_t  *a, float16x8x4_t b) {
   vst4q_lane_f16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f32(ptr noundef %a, [4 x <4 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f32(ptr noundef %a, [4 x <4 x float>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, ptr [[B]], i32 0, i32 0
@@ -5212,7 +5212,7 @@ void test_vst4q_lane_f32(float32_t  *a, float32x4x4_t b) {
   vst4q_lane_f32(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f64(ptr noundef %a, [4 x <2 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_f64(ptr noundef %a, [4 x <2 x double>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5244,7 +5244,7 @@ void test_vst4q_lane_f64(float64_t  *a, float64x2x4_t b) {
   vst4q_lane_f64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p8(ptr noundef %a, [4 x <16 x i8>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, ptr [[B]], i32 0, i32 0
@@ -5268,7 +5268,7 @@ void test_vst4q_lane_p8(poly8_t  *a, poly8x16x4_t b) {
   vst4q_lane_p8(a, b, 15);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p16(ptr noundef %a, [4 x <8 x i16>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, ptr [[B]], i32 0, i32 0
@@ -5300,7 +5300,7 @@ void test_vst4q_lane_p16(poly16_t  *a, poly16x8x4_t b) {
   vst4q_lane_p16(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_lane_p64(ptr noundef %a, [4 x <2 x i64>] alignstack(16) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5332,7 +5332,7 @@ void test_vst4q_lane_p64(poly64_t  *a, poly64x2x4_t b) {
   vst4q_lane_p64(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u8(ptr noundef %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, ptr [[B]], i32 0, i32 0
@@ -5356,7 +5356,7 @@ void test_vst4_lane_u8(uint8_t  *a, uint8x8x4_t b) {
   vst4_lane_u8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u16(ptr noundef %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, ptr [[B]], i32 0, i32 0
@@ -5388,7 +5388,7 @@ void test_vst4_lane_u16(uint16_t  *a, uint16x4x4_t b) {
   vst4_lane_u16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u32(ptr noundef %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5420,7 +5420,7 @@ void test_vst4_lane_u32(uint32_t  *a, uint32x2x4_t b) {
   vst4_lane_u32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u64(ptr noundef %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_u64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, ptr [[B]], i32 0, i32 0
@@ -5452,7 +5452,7 @@ void test_vst4_lane_u64(uint64_t  *a, uint64x1x4_t b) {
   vst4_lane_u64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s8(ptr noundef %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, ptr [[B]], i32 0, i32 0
@@ -5476,7 +5476,7 @@ void test_vst4_lane_s8(int8_t  *a, int8x8x4_t b) {
   vst4_lane_s8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s16(ptr noundef %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, ptr [[B]], i32 0, i32 0
@@ -5508,7 +5508,7 @@ void test_vst4_lane_s16(int16_t  *a, int16x4x4_t b) {
   vst4_lane_s16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s32(ptr noundef %a, [4 x <2 x i32>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s32(ptr noundef %a, [4 x <2 x i32>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5540,7 +5540,7 @@ void test_vst4_lane_s32(int32_t  *a, int32x2x4_t b) {
   vst4_lane_s32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s64(ptr noundef %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_s64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, ptr [[B]], i32 0, i32 0
@@ -5572,7 +5572,7 @@ void test_vst4_lane_s64(int64_t  *a, int64x1x4_t b) {
   vst4_lane_s64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f16(ptr noundef %a, [4 x <4 x half>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f16(ptr noundef %a, [4 x <4 x half>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, ptr [[B]], i32 0, i32 0
@@ -5604,7 +5604,7 @@ void test_vst4_lane_f16(float16_t  *a, float16x4x4_t b) {
   vst4_lane_f16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f32(ptr noundef %a, [4 x <2 x float>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f32(ptr noundef %a, [4 x <2 x float>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, ptr [[B]], i32 0, i32 0
@@ -5636,7 +5636,7 @@ void test_vst4_lane_f32(float32_t  *a, float32x2x4_t b) {
   vst4_lane_f32(a, b, 1);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f64(ptr noundef %a, [4 x <1 x double>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_f64(ptr noundef %a, [4 x <1 x double>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.float64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float64x1x4_t, ptr [[B]], i32 0, i32 0
@@ -5668,7 +5668,7 @@ void test_vst4_lane_f64(float64_t  *a, float64x1x4_t b) {
   vst4_lane_f64(a, b, 0);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p8(ptr noundef %a, [4 x <8 x i8>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p8(ptr noundef %a, [4 x <8 x i8>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, ptr [[B]], i32 0, i32 0
@@ -5692,7 +5692,7 @@ void test_vst4_lane_p8(poly8_t  *a, poly8x8x4_t b) {
   vst4_lane_p8(a, b, 7);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p16(ptr noundef %a, [4 x <4 x i16>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p16(ptr noundef %a, [4 x <4 x i16>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, ptr [[B]], i32 0, i32 0
@@ -5724,7 +5724,7 @@ void test_vst4_lane_p16(poly16_t  *a, poly16x4x4_t b) {
   vst4_lane_p16(a, b, 3);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p64(ptr noundef %a, [4 x <1 x i64>] %b.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_lane_p64(ptr noundef %a, [4 x <1 x i64>] alignstack(8) %b.coerce) #0 {
 // CHECK:   [[B:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[B]], i32 0, i32 0

diff  --git a/clang/test/CodeGen/aarch64-neon-tbl.c b/clang/test/CodeGen/aarch64-neon-tbl.c
index 264e7d731b4c60..415f3f28a96673 100644
--- a/clang/test/CodeGen/aarch64-neon-tbl.c
+++ b/clang/test/CodeGen/aarch64-neon-tbl.c
@@ -28,7 +28,7 @@ int8x8_t test_vqtbl1_s8(int8x16_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_s8
-// CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
@@ -49,7 +49,7 @@ int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_s8
-// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
@@ -69,7 +69,7 @@ int8x8_t test_vqtbl2_s8(int8x16x2_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_s8
-// CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
@@ -93,7 +93,7 @@ int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_s8
-// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
@@ -115,7 +115,7 @@ int8x8_t test_vqtbl3_s8(int8x16x3_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_s8
-// CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
@@ -141,7 +141,7 @@ int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_s8
-// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
@@ -175,7 +175,7 @@ int8x16_t test_vqtbl1q_s8(int8x16_t a, int8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_s8
-// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
@@ -195,7 +195,7 @@ int8x16_t test_vqtbl2q_s8(int8x16x2_t a, int8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_s8
-// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
@@ -217,7 +217,7 @@ int8x16_t test_vqtbl3q_s8(int8x16x3_t a, int8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_s8
-// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
@@ -258,7 +258,7 @@ int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_s8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X2_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X8X2_T]], align 8
@@ -279,7 +279,7 @@ int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_s8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X3_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X8X3_T]], align 8
@@ -309,7 +309,7 @@ int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_s8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X8X4_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X8X4_T]], align 8
@@ -345,7 +345,7 @@ int8x8_t test_vqtbx1_s8(int8x8_t a, int8x16_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_s8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
@@ -365,7 +365,7 @@ int8x8_t test_vqtbx2_s8(int8x8_t a, int8x16x2_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_s8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
@@ -387,7 +387,7 @@ int8x8_t test_vqtbx3_s8(int8x8_t a, int8x16x3_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_s8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
@@ -421,7 +421,7 @@ int8x16_t test_vqtbx1q_s8(int8x16_t a, int8x16_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X2_T]], align 16
@@ -441,7 +441,7 @@ int8x16_t test_vqtbx2q_s8(int8x16_t a, int8x16x2_t b, int8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X3_T]], align 16
@@ -463,7 +463,7 @@ int8x16_t test_vqtbx3q_s8(int8x16_t a, int8x16x3_t b, int8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_s8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_INT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_INT8X16X4_T]], align 16
@@ -508,7 +508,7 @@ uint8x8_t test_vqtbl1_u8(uint8x16_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_u8
-// CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
@@ -529,7 +529,7 @@ uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_u8
-// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
@@ -549,7 +549,7 @@ uint8x8_t test_vqtbl2_u8(uint8x16x2_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_u8
-// CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
@@ -573,7 +573,7 @@ uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_u8
-// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
@@ -595,7 +595,7 @@ uint8x8_t test_vqtbl3_u8(uint8x16x3_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_u8
-// CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
@@ -621,7 +621,7 @@ uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_u8
-// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
@@ -655,7 +655,7 @@ uint8x16_t test_vqtbl1q_u8(uint8x16_t a, uint8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_u8
-// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
@@ -675,7 +675,7 @@ uint8x16_t test_vqtbl2q_u8(uint8x16x2_t a, uint8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_u8
-// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
@@ -697,7 +697,7 @@ uint8x16_t test_vqtbl3q_u8(uint8x16x3_t a, uint8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_u8
-// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
@@ -738,7 +738,7 @@ uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_u8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X2_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X8X2_T]], align 8
@@ -759,7 +759,7 @@ uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_u8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X3_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X8X3_T]], align 8
@@ -789,7 +789,7 @@ uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_u8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X8X4_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X8X4_T]], align 8
@@ -825,7 +825,7 @@ uint8x8_t test_vqtbx1_u8(uint8x8_t a, uint8x16_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_u8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
@@ -845,7 +845,7 @@ uint8x8_t test_vqtbx2_u8(uint8x8_t a, uint8x16x2_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_u8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
@@ -867,7 +867,7 @@ uint8x8_t test_vqtbx3_u8(uint8x8_t a, uint8x16x3_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_u8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
@@ -901,7 +901,7 @@ uint8x16_t test_vqtbx1q_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X2_T]], align 16
@@ -921,7 +921,7 @@ uint8x16_t test_vqtbx2q_u8(uint8x16_t a, uint8x16x2_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X3_T]], align 16
@@ -943,7 +943,7 @@ uint8x16_t test_vqtbx3q_u8(uint8x16_t a, uint8x16x3_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_u8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_UINT8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_UINT8X16X4_T]], align 16
@@ -988,7 +988,7 @@ poly8x8_t test_vqtbl1_p8(poly8x16_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl2_p8
-// CHECK-SAME: ([2 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([2 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
@@ -1009,7 +1009,7 @@ poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2_p8
-// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
@@ -1029,7 +1029,7 @@ poly8x8_t test_vqtbl2_p8(poly8x16x2_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl3_p8
-// CHECK-SAME: ([3 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([3 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
@@ -1053,7 +1053,7 @@ poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3_p8
-// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
@@ -1075,7 +1075,7 @@ poly8x8_t test_vqtbl3_p8(poly8x16x3_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbl4_p8
-// CHECK-SAME: ([4 x <8 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: ([4 x <8 x i8>] alignstack(8) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
@@ -1101,7 +1101,7 @@ poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4_p8
-// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
@@ -1135,7 +1135,7 @@ poly8x16_t test_vqtbl1q_p8(poly8x16_t a, uint8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl2q_p8
-// CHECK-SAME: ([2 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([2 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
@@ -1155,7 +1155,7 @@ poly8x16_t test_vqtbl2q_p8(poly8x16x2_t a, uint8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl3q_p8
-// CHECK-SAME: ([3 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([3 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
@@ -1177,7 +1177,7 @@ poly8x16_t test_vqtbl3q_p8(poly8x16x3_t a, uint8x16_t b) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbl4q_p8
-// CHECK-SAME: ([4 x <16 x i8>] [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: ([4 x <16 x i8>] alignstack(16) [[A_COERCE:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P0_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[A:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
@@ -1218,7 +1218,7 @@ poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx2_p8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X2_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X8X2_T]], align 8
@@ -1239,7 +1239,7 @@ poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx3_p8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X3_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X8X3_T]], align 8
@@ -1269,7 +1269,7 @@ poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vtbx4_p8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X8X4_T:%.*]], align 8
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X8X4_T]], align 8
@@ -1305,7 +1305,7 @@ poly8x8_t test_vqtbx1_p8(poly8x8_t a, uint8x16_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2_p8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
@@ -1325,7 +1325,7 @@ poly8x8_t test_vqtbx2_p8(poly8x8_t a, poly8x16x2_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3_p8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
@@ -1347,7 +1347,7 @@ poly8x8_t test_vqtbx3_p8(poly8x8_t a, poly8x16x3_t b, uint8x8_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4_p8
-// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<8 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <8 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16
@@ -1381,7 +1381,7 @@ poly8x16_t test_vqtbx1q_p8(poly8x16_t a, uint8x16_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx2q_p8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X2_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X2_T]], align 16
@@ -1401,7 +1401,7 @@ poly8x16_t test_vqtbx2q_p8(poly8x16_t a, poly8x16x2_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx3q_p8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X3_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X3_T]], align 16
@@ -1423,7 +1423,7 @@ poly8x16_t test_vqtbx3q_p8(poly8x16_t a, poly8x16x3_t b, uint8x16_t c) {
 }
 
 // CHECK-LABEL: define {{[^@]+}}@test_vqtbx4q_p8
-// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
+// CHECK-SAME: (<16 x i8> noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]], <16 x i8> noundef [[C:%.*]]) #[[ATTR1]] {
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[__P1_I:%.*]] = alloca [[STRUCT_POLY8X16X4_T:%.*]], align 16
 // CHECK-NEXT:    [[B:%.*]] = alloca [[STRUCT_POLY8X16X4_T]], align 16

diff  --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c
index ed28bbb2a621e1..09d977c64cbca9 100644
--- a/clang/test/CodeGen/aarch64-poly64.c
+++ b/clang/test/CodeGen/aarch64-poly64.c
@@ -287,7 +287,7 @@ poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
   return vld4q_p64(ptr);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2_p64(ptr noundef %ptr, [2 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2_p64(ptr noundef %ptr, [2 x <1 x i64>] alignstack(8) %val.coerce) #0 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, ptr [[VAL]], i32 0, i32 0
@@ -309,7 +309,7 @@ void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
   return vst2_p64(ptr, val);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst2q_p64(ptr noundef %ptr, [2 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst2q_p64(ptr noundef %ptr, [2 x <2 x i64>] alignstack(16) %val.coerce) #0 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, ptr [[VAL]], i32 0, i32 0
@@ -331,7 +331,7 @@ void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
   return vst2q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3_p64(ptr noundef %ptr, [3 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3_p64(ptr noundef %ptr, [3 x <1 x i64>] alignstack(8) %val.coerce) #0 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, ptr [[VAL]], i32 0, i32 0
@@ -358,7 +358,7 @@ void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
   return vst3_p64(ptr, val);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst3q_p64(ptr noundef %ptr, [3 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst3q_p64(ptr noundef %ptr, [3 x <2 x i64>] alignstack(16) %val.coerce) #0 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, ptr [[VAL]], i32 0, i32 0
@@ -385,7 +385,7 @@ void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
   return vst3q_p64(ptr, val);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4_p64(ptr noundef %ptr, [4 x <1 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4_p64(ptr noundef %ptr, [4 x <1 x i64>] alignstack(8) %val.coerce) #0 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, ptr [[VAL]], i32 0, i32 0
@@ -417,7 +417,7 @@ void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
   return vst4_p64(ptr, val);
 }
 
-// CHECK-LABEL: define{{.*}} void @test_vst4q_p64(ptr noundef %ptr, [4 x <2 x i64>] %val.coerce) #0 {
+// CHECK-LABEL: define{{.*}} void @test_vst4q_p64(ptr noundef %ptr, [4 x <2 x i64>] alignstack(16) %val.coerce) #0 {
 // CHECK:   [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, ptr [[VAL]], i32 0, i32 0

diff  --git a/clang/test/CodeGen/arm-aapcs-vfp.c b/clang/test/CodeGen/arm-aapcs-vfp.c
index c18170c67d5e6d..5caf93016cd2ec 100644
--- a/clang/test/CodeGen/arm-aapcs-vfp.c
+++ b/clang/test/CodeGen/arm-aapcs-vfp.c
@@ -26,7 +26,7 @@ struct homogeneous_struct {
   float f4;
 };
 // CHECK: define{{.*}} arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}})
-// CHECK64: define{{.*}} %struct.homogeneous_struct @test_struct([4 x float] %{{.*}})
+// CHECK64: define{{.*}} %struct.homogeneous_struct @test_struct([4 x float] alignstack(8) %{{.*}})
 extern struct homogeneous_struct struct_callee(struct homogeneous_struct);
 struct homogeneous_struct test_struct(struct homogeneous_struct arg) {
   return struct_callee(arg);
@@ -41,7 +41,7 @@ struct nested_array {
   double d[4];
 };
 // CHECK: define{{.*}} arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}})
-// CHECK64: define{{.*}} void @test_array([4 x double] %{{.*}})
+// CHECK64: define{{.*}} void @test_array([4 x double] alignstack(8) %{{.*}})
 extern void array_callee(struct nested_array);
 void test_array(struct nested_array arg) {
   array_callee(arg);
@@ -49,7 +49,7 @@ void test_array(struct nested_array arg) {
 
 extern void complex_callee(__complex__ double);
 // CHECK: define{{.*}} arm_aapcs_vfpcc void @test_complex({ double, double } noundef %{{.*}})
-// CHECK64: define{{.*}} void @test_complex([2 x double] noundef %cd.coerce)
+// CHECK64: define{{.*}} void @test_complex([2 x double] noundef alignstack(8) %cd.coerce)
 void test_complex(__complex__ double cd) {
   complex_callee(cd);
 }
@@ -95,7 +95,7 @@ void test_hetero(struct heterogeneous_struct arg) {
 
 // Neon multi-vector types are homogeneous aggregates.
 // CHECK: define{{.*}} arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}})
-// CHECK64: define{{.*}} <16 x i8> @f0([4 x <16 x i8>] %{{.*}})
+// CHECK64: define{{.*}} <16 x i8> @f0([4 x <16 x i8>] alignstack(16) %{{.*}})
 int8x16_t f0(int8x16x4_t v4) {
   return vaddq_s8(v4.val[0], v4.val[3]);
 }
@@ -109,7 +109,7 @@ struct neon_struct {
   int16x4_t v4;
 };
 // CHECK: define{{.*}} arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}})
-// CHECK64: define{{.*}} void @test_neon([4 x <8 x i8>] %{{.*}})
+// CHECK64: define{{.*}} void @test_neon([4 x <8 x i8>] alignstack(8) %{{.*}})
 extern void neon_callee(struct neon_struct);
 void test_neon(struct neon_struct arg) {
   neon_callee(arg);

diff  --git a/clang/test/CodeGen/arm64-aapcs-arguments.c b/clang/test/CodeGen/arm64-aapcs-arguments.c
index 1f323fe0841d06..dfe63c166e3343 100644
--- a/clang/test/CodeGen/arm64-aapcs-arguments.c
+++ b/clang/test/CodeGen/arm64-aapcs-arguments.c
@@ -17,7 +17,7 @@ void test2(int x0, Small x2_x3, int x4, Small x6_x7, int sp, Small sp16) {
 // stack in order to avoid holes. Make sure we get all of them, and not just the
 // first:
 
-// CHECK: void @test3([4 x float] %s0_s3.coerce, float noundef %s4, [4 x float] %sp.coerce, [4 x float] %sp16.coerce)
+// CHECK: void @test3([4 x float] alignstack(8) %s0_s3.coerce, float noundef %s4, [4 x float] alignstack(8) %sp.coerce, [4 x float] alignstack(8) %sp16.coerce)
 typedef struct { float arr[4]; } HFA;
 void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
 }
@@ -28,7 +28,7 @@ void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
 // fp128] or something, but leaving them as-is retains more information for
 // users to debug.
 
-//  CHECK: void @test4([3 x <16 x i8>] %v0_v2.coerce, [3 x <16 x i8>] %v3_v5.coerce, [3 x <16 x i8>] %sp.coerce, double noundef %sp48, [3 x <16 x i8>] %sp64.coerce)
+//  CHECK: void @test4([3 x <16 x i8>] alignstack(16) %v0_v2.coerce, [3 x <16 x i8>] alignstack(16) %v3_v5.coerce, [3 x <16 x i8>] alignstack(16) %sp.coerce, double noundef %sp48, [3 x <16 x i8>] alignstack(16) %sp64.coerce)
 typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
 typedef struct { int8x16_t arr[3]; } BigHFA;
 void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
@@ -46,6 +46,6 @@ unsigned char test5(unsigned char a, signed short b) {
 __fp16 test_half(__fp16 A) { }
 
 // __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
-// CHECK: define{{.*}} %struct.HFA_half @test_half_hfa([4 x half] %{{.*}})
+// CHECK: define{{.*}} %struct.HFA_half @test_half_hfa([4 x half] alignstack(8) %{{.*}})
 struct HFA_half { __fp16 a[4]; };
 struct HFA_half test_half_hfa(struct HFA_half A) { }

diff  --git a/clang/test/CodeGen/complex-math.c b/clang/test/CodeGen/complex-math.c
index d5af6db8095342..c59baaa452369d 100644
--- a/clang/test/CodeGen/complex-math.c
+++ b/clang/test/CodeGen/complex-math.c
@@ -136,7 +136,7 @@ float _Complex div_float_rc(float a, float _Complex b) {
   // SPIR: call spir_func {{.*}} @__divsc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
-  // AARCH64-FASTMATH-LABEL: @div_float_rc(float noundef nofpclass(nan inf) %a, [2 x float] noundef nofpclass(nan inf) %b.coerce)
+  // AARCH64-FASTMATH-LABEL: @div_float_rc(float noundef nofpclass(nan inf) %a, [2 x float] noundef nofpclass(nan inf) alignstack(8) %b.coerce)
   // A = a
   // B = 0
   //
@@ -165,7 +165,7 @@ float _Complex div_float_cc(float _Complex a, float _Complex b) {
   // SPIR: call spir_func {{.*}} @__divsc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
-  // AARCH64-FASTMATH-LABEL: @div_float_cc([2 x float] noundef nofpclass(nan inf) %a.coerce, [2 x float] noundef nofpclass(nan inf) %b.coerce)
+  // AARCH64-FASTMATH-LABEL: @div_float_cc([2 x float] noundef nofpclass(nan inf) alignstack(8) %a.coerce, [2 x float] noundef nofpclass(nan inf) alignstack(8) %b.coerce)
   //
   // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast float
   // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast float
@@ -313,7 +313,7 @@ double _Complex div_double_rc(double a, double _Complex b) {
   // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
-  // AARCH64-FASTMATH-LABEL: @div_double_rc(double noundef nofpclass(nan inf) %a, [2 x double] noundef nofpclass(nan inf) %b.coerce)
+  // AARCH64-FASTMATH-LABEL: @div_double_rc(double noundef nofpclass(nan inf) %a, [2 x double] noundef nofpclass(nan inf) alignstack(8) %b.coerce)
   // A = a
   // B = 0
   //
@@ -342,7 +342,7 @@ double _Complex div_double_cc(double _Complex a, double _Complex b) {
   // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
-  // AARCH64-FASTMATH-LABEL: @div_double_cc([2 x double] noundef nofpclass(nan inf) %a.coerce, [2 x double] noundef nofpclass(nan inf) %b.coerce)
+  // AARCH64-FASTMATH-LABEL: @div_double_cc([2 x double] noundef nofpclass(nan inf) alignstack(8) %a.coerce, [2 x double] noundef nofpclass(nan inf) alignstack(8) %b.coerce)
   //
   // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast double
   // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast double
@@ -506,7 +506,7 @@ long double _Complex div_long_double_rc(long double a, long double _Complex b) {
   // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
-  // AARCH64-FASTMATH-LABEL: @div_long_double_rc(fp128 noundef nofpclass(nan inf) %a, [2 x fp128] noundef nofpclass(nan inf) %b.coerce)
+  // AARCH64-FASTMATH-LABEL: @div_long_double_rc(fp128 noundef nofpclass(nan inf) %a, [2 x fp128] noundef nofpclass(nan inf) alignstack(16) %b.coerce)
   // A = a
   // B = 0
   //
@@ -538,7 +538,7 @@ long double _Complex div_long_double_cc(long double _Complex a, long double _Com
   // SPIR: call spir_func {{.*}} @__divdc3(
 
   // a / b = (A+iB) / (C+iD) = ((AC+BD)/(CC+DD)) + i((BC-AD)/(CC+DD))
-  // AARCH64-FASTMATH-LABEL: @div_long_double_cc([2 x fp128] noundef nofpclass(nan inf) %a.coerce, [2 x fp128] noundef nofpclass(nan inf) %b.coerce)
+  // AARCH64-FASTMATH-LABEL: @div_long_double_cc([2 x fp128] noundef nofpclass(nan inf) alignstack(16) %a.coerce, [2 x fp128] noundef nofpclass(nan inf) alignstack(16) %b.coerce)
   //
   // AARCH64-FASTMATH: [[AC:%.*]] = fmul fast fp128
   // AARCH64-FASTMATH: [[BD:%.*]] = fmul fast fp128

diff  --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
index 2fcfcf43909451..bd83b9f932aad4 100644
--- a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -47,7 +47,7 @@ D1 CC func_D1(D1 x) { return x; }
 
 // PPC: define{{.*}} [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
 // ARM32: define{{.*}} arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
-// ARM64: define{{.*}} %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
+// ARM64: define{{.*}} %struct.D2 @_Z7func_D22D2([3 x double] alignstack(8) %x.coerce)
 // X64: define dso_local x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce)
 D2 CC func_D2(D2 x) { return x; }
 
@@ -58,7 +58,7 @@ D3 CC func_D3(D3 x) { return x; }
 
 // PPC: define{{.*}} [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
 // ARM32: define{{.*}} arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
-// ARM64: define{{.*}} %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce)
+// ARM64: define{{.*}} %struct.D4 @_Z7func_D42D4([4 x double] alignstack(8) %x.coerce)
 D4 CC func_D4(D4 x) { return x; }
 
 D5 CC func_D5(D5 x) { return x; }
@@ -68,7 +68,7 @@ D5 CC func_D5(D5 x) { return x; }
 // The C++ multiple inheritance expansion case is a little more complicated, so
 // do some extra checking.
 //
-// ARM64-LABEL: define{{.*}} %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce)
+// ARM64-LABEL: define{{.*}} %struct.D5 @_Z7func_D52D5([3 x double] alignstack(8) %x.coerce)
 // ARM64: store [3 x double] %x.coerce, ptr
 
 void call_D5(D5 *p) {
@@ -79,7 +79,7 @@ void call_D5(D5 *p) {
 //
 // ARM64-LABEL: define{{.*}} void @_Z7call_D5P2D5(ptr noundef %p)
 // ARM64: load [3 x double], ptr
-// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}})
+// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] alignstack(8) %{{.*}})
 
 struct Empty { };
 struct Float1 { float x; };
@@ -87,7 +87,7 @@ struct Float2 { float y; };
 struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
 
 // PPC: define{{.*}} void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
-// ARM64: define{{.*}} void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
+// ARM64: define{{.*}} void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] alignstack(8) %a.coerce)
 // ARM32: define{{.*}} arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
 void CC with_empty_base(HVAWithEmptyBase a) {}
 
@@ -100,7 +100,7 @@ struct HVAWithEmptyBitField : Float1, Float2 {
 };
 
 // PPC: define{{.*}} void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
-// ARM64: define{{.*}} void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
+// ARM64: define{{.*}} void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] alignstack(8) %a.coerce)
 // ARM32: define{{.*}} arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
 // X64: define dso_local x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(%struct.HVAWithEmptyBitField inreg %a.coerce)
 void CC with_empty_bitfield(HVAWithEmptyBitField a) {}


        


More information about the cfe-commits mailing list