[clang] de98cf9 - [CodeGen] Add an alignment attribute to all sret parameters

Tue Mar 24 12:32:23 PDT 2020

Author: Erik Pilkington
Date: 2020-03-24T15:31:57-04:00
New Revision: de98cf92e301ab559a7417f1eca5cfa53624c9e1

URL: https://github.com/llvm/llvm-project/commit/de98cf92e301ab559a7417f1eca5cfa53624c9e1
DIFF: https://github.com/llvm/llvm-project/commit/de98cf92e301ab559a7417f1eca5cfa53624c9e1.diff

LOG: [CodeGen] Add an alignment attribute to all sret parameters

This fixes a miscompile when the parameter is actually underaligned.
rdar://58316406

Differential revision: https://reviews.llvm.org/D74183

Added: 
    clang/test/CodeGen/aligned-sret.c

Modified: 
    clang/lib/CodeGen/CGCall.cpp
    clang/test/CodeGen/2006-05-19-SingleEltReturn.c
    clang/test/CodeGen/aarch64-varargs.c
    clang/test/CodeGen/aggregate-assign-call.c
    clang/test/CodeGen/arc/arguments.c
    clang/test/CodeGen/arm-aapcs-vfp.c
    clang/test/CodeGen/arm-homogenous.c
    clang/test/CodeGen/arm-neon-vld.c
    clang/test/CodeGen/arm-varargs.c
    clang/test/CodeGen/arm-vector-arguments.c
    clang/test/CodeGen/arm-vfp16-arguments.c
    clang/test/CodeGen/arm-vfp16-arguments2.cpp
    clang/test/CodeGen/arm64-arguments.c
    clang/test/CodeGen/arm64-microsoft-arguments.cpp
    clang/test/CodeGen/arm64_32.c
    clang/test/CodeGen/arm_neon_intrinsics.c
    clang/test/CodeGen/blocks.c
    clang/test/CodeGen/c11atomics-ios.c
    clang/test/CodeGen/c11atomics.c
    clang/test/CodeGen/lanai-arguments.c
    clang/test/CodeGen/le32-arguments.c
    clang/test/CodeGen/mcu-struct-return.c
    clang/test/CodeGen/mingw-long-double.c
    clang/test/CodeGen/mips-zero-sized-struct.c
    clang/test/CodeGen/mips64-padding-arg.c
    clang/test/CodeGen/ms_abi.c
    clang/test/CodeGen/ppc64-align-struct.c
    clang/test/CodeGen/ppc64-elf-abi.c
    clang/test/CodeGen/ppc64-qpx-vector.c
    clang/test/CodeGen/ppc64-soft-float.c
    clang/test/CodeGen/ppc64-vector.c
    clang/test/CodeGen/ppc64le-aggregates.c
    clang/test/CodeGen/ppc64le-f128Aggregates.c
    clang/test/CodeGen/regparm-struct.c
    clang/test/CodeGen/renderscript.c
    clang/test/CodeGen/riscv32-ilp32-abi.c
    clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
    clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
    clang/test/CodeGen/riscv32-ilp32d-abi.c
    clang/test/CodeGen/riscv32-ilp32f-abi.c
    clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
    clang/test/CodeGen/riscv64-lp64-abi.c
    clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
    clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
    clang/test/CodeGen/riscv64-lp64d-abi.c
    clang/test/CodeGen/sparcv9-abi.c
    clang/test/CodeGen/struct-passing.c
    clang/test/CodeGen/systemz-abi-vector.c
    clang/test/CodeGen/systemz-abi.c
    clang/test/CodeGen/systemz-abi.cpp
    clang/test/CodeGen/systemz-inline-asm.c
    clang/test/CodeGen/vectorcall.c
    clang/test/CodeGen/wasm-arguments.c
    clang/test/CodeGen/wasm-varargs.c
    clang/test/CodeGen/windows-struct-abi.c
    clang/test/CodeGen/x86_32-arguments-darwin.c
    clang/test/CodeGen/x86_32-arguments-iamcu.c
    clang/test/CodeGen/x86_64-arguments-nacl.c
    clang/test/CodeGen/x86_64-arguments-win32.c
    clang/test/CodeGen/x86_64-arguments.c
    clang/test/CodeGenCXX/arm-cc.cpp
    clang/test/CodeGenCXX/builtin-source-location.cpp
    clang/test/CodeGenCXX/call-with-static-chain.cpp
    clang/test/CodeGenCXX/conditional-gnu-ext.cpp
    clang/test/CodeGenCXX/cxx1z-copy-omission.cpp
    clang/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
    clang/test/CodeGenCXX/exceptions.cpp
    clang/test/CodeGenCXX/homogeneous-aggregates.cpp
    clang/test/CodeGenCXX/lambda-expressions.cpp
    clang/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
    clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
    clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
    clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
    clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
    clang/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
    clang/test/CodeGenCXX/regcall.cpp
    clang/test/CodeGenCXX/stack-reuse-miscompile.cpp
    clang/test/CodeGenCXX/stack-reuse.cpp
    clang/test/CodeGenCXX/temporaries.cpp
    clang/test/CodeGenCXX/thiscall-struct-return.cpp
    clang/test/CodeGenCXX/thunk-returning-memptr.cpp
    clang/test/CodeGenCXX/thunks.cpp
    clang/test/CodeGenCXX/trivial_abi.cpp
    clang/test/CodeGenCXX/unknown-anytype.cpp
    clang/test/CodeGenCXX/wasm-args-returns.cpp
    clang/test/CodeGenCXX/x86_32-arguments.cpp
    clang/test/CodeGenCXX/x86_64-arguments.cpp
    clang/test/CodeGenCoroutines/coro-await.cpp
    clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp
    clang/test/CodeGenObjC/arc.m
    clang/test/CodeGenObjC/direct-method.m
    clang/test/CodeGenObjC/nontrivial-c-struct-exception.m
    clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
    clang/test/CodeGenObjC/stret-1.m
    clang/test/CodeGenObjC/weak-in-c-struct.m
    clang/test/CodeGenObjCXX/objc-struct-cxx-abi.mm
    clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
    clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
    clang/test/CodeGenOpenCLCXX/addrspace-of-this.cl
    clang/test/Modules/templates.mm

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 20da9f24a3b5..76520ba17541 100644

--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2084,6 +2084,7 @@ void CodeGenModule::ConstructAttributeList(
     hasUsedSRet = true;
     if (RetAI.getInReg())
       SRETAttrs.addAttribute(llvm::Attribute::InReg);
+    SRETAttrs.addAlignmentAttr(RetAI.getIndirectAlign().getQuantity());
     ArgAttrs[IRFunctionArgs.getSRetArgNo()] =
         llvm::AttributeSet::get(getLLVMContext(), SRETAttrs);
   }

diff  --git a/clang/test/CodeGen/2006-05-19-SingleEltReturn.c b/clang/test/CodeGen/2006-05-19-SingleEltReturn.c
index dfc23f84ab42..d3f9e4e00acd 100644
--- a/clang/test/CodeGen/2006-05-19-SingleEltReturn.c
+++ b/clang/test/CodeGen/2006-05-19-SingleEltReturn.c
@@ -24,7 +24,7 @@ struct Y bar() {
 
 
 // X86_32: define void @foo(%struct.Y* %P)
-// X86_32:   call void @bar(%struct.Y* sret %{{[^),]*}})
+// X86_32:   call void @bar(%struct.Y* sret align 4 %{{[^),]*}})
 
-// X86_32: define void @bar(%struct.Y* noalias sret %{{[^,)]*}})
+// X86_32: define void @bar(%struct.Y* noalias sret align 4 %{{[^,)]*}})
 // X86_32:   ret void

diff  --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c
index c213f5b9375b..27bb602e75de 100644
--- a/clang/test/CodeGen/aarch64-varargs.c
+++ b/clang/test/CodeGen/aarch64-varargs.c
@@ -639,7 +639,7 @@ typedef struct __attribute__((aligned(32))) {
   __int128 val;
 } overaligned_int128_struct;
 overaligned_int128_struct overaligned_int128_struct_test() {
-// CHECK-LABEL: define void @overaligned_int128_struct_test(%struct.overaligned_int128_struct* noalias sret %agg.result)
+// CHECK-LABEL: define void @overaligned_int128_struct_test(%struct.overaligned_int128_struct* noalias sret align 32 %agg.result)
   return va_arg(the_list, overaligned_int128_struct);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
@@ -853,7 +853,7 @@ typedef struct {
   __int128 val __attribute__((aligned(32)));
 } overaligned_int128_struct_member;
 overaligned_int128_struct_member overaligned_int128_struct_member_test() {
-// CHECK-LABEL: define void @overaligned_int128_struct_member_test(%struct.overaligned_int128_struct_member* noalias sret %agg.result)
+// CHECK-LABEL: define void @overaligned_int128_struct_member_test(%struct.overaligned_int128_struct_member* noalias sret align 32 %agg.result)
   return va_arg(the_list, overaligned_int128_struct_member);
 // CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
 // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0

diff  --git a/clang/test/CodeGen/aggregate-assign-call.c b/clang/test/CodeGen/aggregate-assign-call.c
index d00cb90c0940..9616e6d22562 100644
--- a/clang/test/CodeGen/aggregate-assign-call.c
+++ b/clang/test/CodeGen/aggregate-assign-call.c
@@ -62,8 +62,8 @@ struct S baz(int i, volatile int *j) {
     // O1-NEWPM: %[[TMP3:.*]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
     // O1-NEWPM: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* nonnull %[[P]])
     //
-    // O1-LEGACY: call void @foo_int(%struct.S* sret %[[TMP1_ALLOCA]],
-    // O1-NEWPM: call void @foo_int(%struct.S* nonnull sret %[[TMP1_ALLOCA]],
+    // O1-LEGACY: call void @foo_int(%struct.S* sret align 4 %[[TMP1_ALLOCA]],
+    // O1-NEWPM: call void @foo_int(%struct.S* nonnull sret align 4 %[[TMP1_ALLOCA]],
     // O1: call void @llvm.memcpy
     // O1-LEGACY: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP1_ALLOCA]] to i8*
     // O1-LEGACY: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])
@@ -71,8 +71,8 @@ struct S baz(int i, volatile int *j) {
     // O1-LEGACY: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
     // O1-LEGACY: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* %[[P]])
     // O1-NEWPM: call void @llvm.lifetime.start.p0i8({{[^,]*}}, i8* nonnull %[[TMP3]])
-    // O1-LEGACY: call void @foo_int(%struct.S* sret %[[TMP2_ALLOCA]],
-    // O1-NEWPM: call void @foo_int(%struct.S* nonnull sret %[[TMP2_ALLOCA]],
+    // O1-LEGACY: call void @foo_int(%struct.S* sret align 4 %[[TMP2_ALLOCA]],
+    // O1-NEWPM: call void @foo_int(%struct.S* nonnull sret align 4 %[[TMP2_ALLOCA]],
     // O1: call void @llvm.memcpy
     // O1-LEGACY: %[[P:[^ ]+]] = bitcast %struct.S* %[[TMP2_ALLOCA]] to i8*
     // O1-LEGACY: call void @llvm.lifetime.end.p0i8({{[^,]*}}, i8* %[[P]])

diff  --git a/clang/test/CodeGen/aligned-sret.c b/clang/test/CodeGen/aligned-sret.c
new file mode 100644
index 000000000000..c459fe730163
--- /dev/null
+++ b/clang/test/CodeGen/aligned-sret.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -triple x86_64-apple-macos %s -S -emit-llvm -o- | FileCheck %s
+
+typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) double simd_double4;
+typedef struct { simd_double4 columns[4]; } simd_double4x4;
+typedef simd_double4x4 matrix_double4x4;
+
+// CHECK: define void @ident(%struct.simd_double4x4* noalias sret align 16 %agg.result
+matrix_double4x4 ident(matrix_double4x4 x) {
+  return x;
+}

diff  --git a/clang/test/CodeGen/arc/arguments.c b/clang/test/CodeGen/arc/arguments.c
index fdc6037da730..9c9b553b1be4 100644
--- a/clang/test/CodeGen/arc/arguments.c
+++ b/clang/test/CodeGen/arc/arguments.c
@@ -22,7 +22,7 @@ void cf1(cs1 i) {}
 typedef struct {
   int cc;
 } s2;
-// CHECK: define void @f2(%struct.s2* noalias sret %agg.result)
+// CHECK: define void @f2(%struct.s2* noalias sret align 4 %agg.result)
 s2 f2() {
   s2 foo;
   return foo;
@@ -32,7 +32,7 @@ typedef struct {
   int cc;
   int dd;
 } s3;
-// CHECK: define void @f3(%struct.s3* noalias sret %agg.result)
+// CHECK: define void @f3(%struct.s3* noalias sret align 4 %agg.result)
 s3 f3() {
   s3 foo;
   return foo;
@@ -128,8 +128,8 @@ void st3(s16 a, s16 b, s16 c) {}
 
 // 1 sret + 1 i32 + 2*(i32 coerce) + 4*(i32 coerce) + 1 byval
 s16 st4(int x, s8 a, s16 b, s16 c) { return b; }
-// CHECK: define void @st4(%struct.s16* noalias sret %agg.result, i32 inreg %x, i32 inreg %a.coerce0, i32 inreg %a.coerce1, i32 inreg %b.coerce0, i32 inreg %b.coerce1, i32 inreg %b.coerce2, i32 inreg %b.coerce3, { i32, i32, i32, i32 } %c.coerce)
+// CHECK: define void @st4(%struct.s16* noalias sret align 4 %agg.result, i32 inreg %x, i32 inreg %a.coerce0, i32 inreg %a.coerce1, i32 inreg %b.coerce0, i32 inreg %b.coerce1, i32 inreg %b.coerce2, i32 inreg %b.coerce3, { i32, i32, i32, i32 } %c.coerce)
 
 // 1 sret + 2*(i32 coerce) + 4*(i32 coerce) + 4*(i32 coerce)
 s16 st5(s8 a, s16 b, s16 c) { return b; }
-// CHECK: define void @st5(%struct.s16* noalias sret %agg.result, i32 inreg %a.coerce0, i32 inreg %a.coerce1, i32 inreg %b.coerce0, i32 inreg %b.coerce1, i32 inreg %b.coerce2, i32 inreg %b.coerce3, { i32, i32, i32, i32 } %c.coerce)
+// CHECK: define void @st5(%struct.s16* noalias sret align 4 %agg.result, i32 inreg %a.coerce0, i32 inreg %a.coerce1, i32 inreg %b.coerce0, i32 inreg %b.coerce1, i32 inreg %b.coerce2, i32 inreg %b.coerce3, { i32, i32, i32, i32 } %c.coerce)

diff  --git a/clang/test/CodeGen/arm-aapcs-vfp.c b/clang/test/CodeGen/arm-aapcs-vfp.c
index 69581fcab247..486ed6ab94fd 100644
--- a/clang/test/CodeGen/arm-aapcs-vfp.c
+++ b/clang/test/CodeGen/arm-aapcs-vfp.c
@@ -125,7 +125,7 @@ void test_vfp_stack_gpr_split_1(double a, double b, double c, double d, double e
 // CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_2(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, i32 %j, [2 x i64] %k.coerce)
 void test_vfp_stack_gpr_split_2(double a, double b, double c, double d, double e, double f, double g, double h, double i, int j, struct_long_long_int k) {}
 
-// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [2 x i64] %k.coerce)
+// CHECK: define arm_aapcs_vfpcc void @test_vfp_stack_gpr_split_3(%struct.struct_long_long_int* noalias sret align 8 %agg.result, double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i, [2 x i64] %k.coerce)
 struct_long_long_int test_vfp_stack_gpr_split_3(double a, double b, double c, double d, double e, double f, double g, double h, double i, struct_long_long_int k) {}
 
 typedef struct { int a; int b:4; int c; } struct_int_bitfield_int;

diff  --git a/clang/test/CodeGen/arm-homogenous.c b/clang/test/CodeGen/arm-homogenous.c
index 42a9bc1c1643..d321fc974c52 100644
--- a/clang/test/CodeGen/arm-homogenous.c
+++ b/clang/test/CodeGen/arm-homogenous.c
@@ -27,7 +27,7 @@ void test_union_with_first_floats(void) {
 void test_return_union_with_first_floats(void) {
   g_u_f = returns_union_with_first_floats();
 }
-// CHECK: declare arm_aapcs_vfpcc void @returns_union_with_first_floats(%union.union_with_first_floats* sret)
+// CHECK: declare arm_aapcs_vfpcc void @returns_union_with_first_floats(%union.union_with_first_floats* sret align 4)
 
 /* This is not a homogenous aggregate - fundamental types are 
diff erent */
 typedef union {
@@ -47,7 +47,7 @@ void test_union_with_non_first_floats(void) {
 void test_return_union_with_non_first_floats(void) {
   g_u_nf_f = returns_union_with_non_first_floats();
 }
-// CHECK: declare arm_aapcs_vfpcc void @returns_union_with_non_first_floats(%union.union_with_non_first_floats* sret)
+// CHECK: declare arm_aapcs_vfpcc void @returns_union_with_non_first_floats(%union.union_with_non_first_floats* sret align 4)
 
 /* This is not a homogenous aggregate - fundamental types are 
diff erent */
 typedef struct {
@@ -67,7 +67,7 @@ void test_struct_with_union_with_first_floats(void) {
 void test_return_struct_with_union_with_first_floats(void) {
   g_s_f = returns_struct_with_union_with_first_floats();
 }
-// CHECK: declare arm_aapcs_vfpcc void @returns_struct_with_union_with_first_floats(%struct.struct_with_union_with_first_floats* sret)
+// CHECK: declare arm_aapcs_vfpcc void @returns_struct_with_union_with_first_floats(%struct.struct_with_union_with_first_floats* sret align 4)
 
 /* This is not a homogenous aggregate - fundamental types are 
diff erent */
 typedef struct {
@@ -87,7 +87,7 @@ void test_struct_with_union_with_non_first_floats(void) {
 void test_return_struct_with_union_with_non_first_floats(void) {
   g_s_nf_f = returns_struct_with_union_with_non_first_floats();
 }
-// CHECK: declare arm_aapcs_vfpcc void @returns_struct_with_union_with_non_first_floats(%struct.struct_with_union_with_non_first_floats* sret)
+// CHECK: declare arm_aapcs_vfpcc void @returns_struct_with_union_with_non_first_floats(%struct.struct_with_union_with_non_first_floats* sret align 4)
 
 /* Plain array is not a homogenous aggregate */
 extern void takes_array_of_floats(float a[4]);

diff  --git a/clang/test/CodeGen/arm-neon-vld.c b/clang/test/CodeGen/arm-neon-vld.c
index 2c7af92f4796..8d3d61c250a9 100644
--- a/clang/test/CodeGen/arm-neon-vld.c
+++ b/clang/test/CodeGen/arm-neon-vld.c
@@ -9,7 +9,7 @@
 
 // CHECK-LABEL: @test_vld1_f16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x2_t, align 8
-// CHECK-A32: %struct.float16x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float16x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
@@ -29,7 +29,7 @@ float16x4x2_t test_vld1_f16_x2(float16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_f16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x3_t, align 8
-// CHECK-A32: %struct.float16x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float16x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
@@ -49,7 +49,7 @@ float16x4x3_t test_vld1_f16_x3(float16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_f16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x4x4_t, align 8
-// CHECK-A32: %struct.float16x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float16x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
@@ -69,7 +69,7 @@ float16x4x4_t test_vld1_f16_x4(float16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_f32_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
-// CHECK-A32: %struct.float32x2x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float32x2x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
@@ -89,7 +89,7 @@ float32x2x2_t test_vld1_f32_x2(float32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_f32_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x3_t, align 8
-// CHECK-A32: %struct.float32x2x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float32x2x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
@@ -108,7 +108,7 @@ float32x2x3_t test_vld1_f32_x3(float32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_f32_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x2x4_t, align 8
-// CHECK-A32: %struct.float32x2x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float32x2x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
@@ -128,7 +128,7 @@ float32x2x4_t test_vld1_f32_x4(float32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_p16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
-// CHECK-A32: %struct.poly16x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly16x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -148,7 +148,7 @@ poly16x4x2_t test_vld1_p16_x2(poly16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_p16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x3_t, align 8
-// CHECK-A32: %struct.poly16x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly16x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -168,7 +168,7 @@ poly16x4x3_t test_vld1_p16_x3(poly16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_p16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x4x4_t, align 8
-// CHECK-A32: %struct.poly16x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly16x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -188,7 +188,7 @@ poly16x4x4_t test_vld1_p16_x4(poly16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_p8_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
-// CHECK-A32: %struct.poly8x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly8x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a)
@@ -206,7 +206,7 @@ poly8x8x2_t test_vld1_p8_x2(poly8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_p8_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x3_t, align 8
-// CHECK-A32: %struct.poly8x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly8x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a)
@@ -224,7 +224,7 @@ poly8x8x3_t test_vld1_p8_x3(poly8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_p8_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x8x4_t, align 8
-// CHECK-A32: %struct.poly8x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly8x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a)
@@ -242,7 +242,7 @@ poly8x8x4_t test_vld1_p8_x4(poly8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
-// CHECK-A32: %struct.int16x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int16x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -262,7 +262,7 @@ int16x4x2_t test_vld1_s16_x2(int16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x3_t, align 8
-// CHECK-A32: %struct.int16x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int16x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -282,7 +282,7 @@ int16x4x3_t test_vld1_s16_x3(int16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x4x4_t, align 8
-// CHECK-A32: %struct.int16x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int16x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -302,7 +302,7 @@ int16x4x4_t test_vld1_s16_x4(int16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s32_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
-// CHECK-A32: %struct.int32x2x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int32x2x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -322,7 +322,7 @@ int32x2x2_t test_vld1_s32_x2(int32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s32_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x3_t, align 8
-// CHECK-A32: %struct.int32x2x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int32x2x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -342,7 +342,7 @@ int32x2x3_t test_vld1_s32_x3(int32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s32_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x2x4_t, align 8
-// CHECK-A32: %struct.int32x2x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int32x2x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -362,7 +362,7 @@ int32x2x4_t test_vld1_s32_x4(int32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s64_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x2_t, align 8
-// CHECK-A32: %struct.int64x1x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int64x1x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -382,7 +382,7 @@ int64x1x2_t test_vld1_s64_x2(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s64_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x3_t, align 8
-// CHECK-A32: %struct.int64x1x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int64x1x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -402,7 +402,7 @@ int64x1x3_t test_vld1_s64_x3(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s64_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x1x4_t, align 8
-// CHECK-A32: %struct.int64x1x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int64x1x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -422,7 +422,7 @@ int64x1x4_t test_vld1_s64_x4(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s8_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
-// CHECK-A32: %struct.int8x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int8x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a)
@@ -440,7 +440,7 @@ int8x8x2_t test_vld1_s8_x2(int8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s8_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x3_t, align 8
-// CHECK-A32: %struct.int8x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int8x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a)
@@ -458,7 +458,7 @@ int8x8x3_t test_vld1_s8_x3(int8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_s8_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x8x4_t, align 8
-// CHECK-A32: %struct.int8x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int8x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a)
@@ -476,7 +476,7 @@ int8x8x4_t test_vld1_s8_x4(int8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
-// CHECK-A32: %struct.uint16x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint16x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -496,7 +496,7 @@ uint16x4x2_t test_vld1_u16_x2(uint16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x3_t, align 8
-// CHECK-A32: %struct.uint16x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint16x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -516,7 +516,7 @@ uint16x4x3_t test_vld1_u16_x3(uint16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x4x4_t, align 8
-// CHECK-A32: %struct.uint16x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint16x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -536,7 +536,7 @@ uint16x4x4_t test_vld1_u16_x4(uint16_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u32_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
-// CHECK-A32: %struct.uint32x2x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint32x2x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -556,7 +556,7 @@ uint32x2x2_t test_vld1_u32_x2(uint32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u32_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x3_t, align 8
-// CHECK-A32: %struct.uint32x2x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint32x2x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -576,7 +576,7 @@ uint32x2x3_t test_vld1_u32_x3(uint32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u32_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x2x4_t, align 8
-// CHECK-A32: %struct.uint32x2x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint32x2x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -596,7 +596,7 @@ uint32x2x4_t test_vld1_u32_x4(uint32_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u64_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x2_t, align 8
-// CHECK-A32: %struct.uint64x1x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint64x1x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -616,7 +616,7 @@ uint64x1x2_t test_vld1_u64_x2(uint64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u64_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x3_t, align 8
-// CHECK-A32: %struct.uint64x1x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint64x1x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -636,7 +636,7 @@ uint64x1x3_t test_vld1_u64_x3(uint64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u64_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x1x4_t, align 8
-// CHECK-A32: %struct.uint64x1x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint64x1x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -656,7 +656,7 @@ uint64x1x4_t test_vld1_u64_x4(uint64_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u8_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
-// CHECK-A32: %struct.uint8x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint8x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v8i8.p0i8(i8* %a)
@@ -674,7 +674,7 @@ uint8x8x2_t test_vld1_u8_x2(uint8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u8_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x3_t, align 8
-// CHECK-A32: %struct.uint8x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint8x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v8i8.p0i8(i8* %a)
@@ -692,7 +692,7 @@ uint8x8x3_t test_vld1_u8_x3(uint8_t const *a) {
 
 // CHECK-LABEL: @test_vld1_u8_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x8x4_t, align 8
-// CHECK-A32: %struct.uint8x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint8x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v8i8.p0i8(i8* %a)
@@ -710,7 +710,7 @@ uint8x8x4_t test_vld1_u8_x4(uint8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x2_t, align 16
-// CHECK-A32: %struct.float16x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float16x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
@@ -730,7 +730,7 @@ float16x8x2_t test_vld1q_f16_x2(float16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x3_t, align 16
-// CHECK-A32: %struct.float16x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float16x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
@@ -750,7 +750,7 @@ float16x8x3_t test_vld1q_f16_x3(float16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float16x8x4_t, align 16
-// CHECK-A32: %struct.float16x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float16x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float16x8x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast half* %a to i8*
@@ -770,7 +770,7 @@ float16x8x4_t test_vld1q_f16_x4(float16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f32_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
-// CHECK-A32: %struct.float32x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float32x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
@@ -790,7 +790,7 @@ float32x4x2_t test_vld1q_f32_x2(float32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f32_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x3_t, align 16
-// CHECK-A32: %struct.float32x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float32x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
@@ -810,7 +810,7 @@ float32x4x3_t test_vld1q_f32_x3(float32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_f32_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.float32x4x4_t, align 16
-// CHECK-A32: %struct.float32x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.float32x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.float32x4x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast float* %a to i8*
@@ -830,7 +830,7 @@ float32x4x4_t test_vld1q_f32_x4(float32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_p16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
-// CHECK-A32: %struct.poly16x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly16x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -850,7 +850,7 @@ poly16x8x2_t test_vld1q_p16_x2(poly16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_p16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x3_t, align 16
-// CHECK-A32: %struct.poly16x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly16x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -870,7 +870,7 @@ poly16x8x3_t test_vld1q_p16_x3(poly16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_p16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly16x8x4_t, align 16
-// CHECK-A32: %struct.poly16x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly16x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -890,7 +890,7 @@ poly16x8x4_t test_vld1q_p16_x4(poly16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_p8_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
-// CHECK-A32: %struct.poly8x16x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly8x16x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a)
@@ -908,7 +908,7 @@ poly8x16x2_t test_vld1q_p8_x2(poly8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_p8_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x3_t, align 16
-// CHECK-A32: %struct.poly8x16x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly8x16x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a)
@@ -926,7 +926,7 @@ poly8x16x3_t test_vld1q_p8_x3(poly8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_p8_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.poly8x16x4_t, align 16
-// CHECK-A32: %struct.poly8x16x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.poly8x16x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a)
@@ -944,7 +944,7 @@ poly8x16x4_t test_vld1q_p8_x4(poly8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
-// CHECK-A32: %struct.int16x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int16x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -964,7 +964,7 @@ int16x8x2_t test_vld1q_s16_x2(int16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x3_t, align 16
-// CHECK-A32: %struct.int16x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int16x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -984,7 +984,7 @@ int16x8x3_t test_vld1q_s16_x3(int16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int16x8x4_t, align 16
-// CHECK-A32: %struct.int16x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int16x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int16x8x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -1004,7 +1004,7 @@ int16x8x4_t test_vld1q_s16_x4(int16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s32_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
-// CHECK-A32: %struct.int32x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int32x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -1024,7 +1024,7 @@ int32x4x2_t test_vld1q_s32_x2(int32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s32_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x3_t, align 16
-// CHECK-A32: %struct.int32x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int32x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -1044,7 +1044,7 @@ int32x4x3_t test_vld1q_s32_x3(int32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s32_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int32x4x4_t, align 16
-// CHECK-A32: %struct.int32x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int32x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int32x4x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -1064,7 +1064,7 @@ int32x4x4_t test_vld1q_s32_x4(int32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s64_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x2_t, align 16
-// CHECK-A32: %struct.int64x2x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int64x2x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -1084,7 +1084,7 @@ int64x2x2_t test_vld1q_s64_x2(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s64_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x3_t, align 16
-// CHECK-A32: %struct.int64x2x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int64x2x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -1104,7 +1104,7 @@ int64x2x3_t test_vld1q_s64_x3(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s64_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int64x2x4_t, align 16
-// CHECK-A32: %struct.int64x2x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int64x2x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int64x2x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int64x2x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -1124,7 +1124,7 @@ int64x2x4_t test_vld1q_s64_x4(int64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s8_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
-// CHECK-A32: %struct.int8x16x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int8x16x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a)
@@ -1142,7 +1142,7 @@ int8x16x2_t test_vld1q_s8_x2(int8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s8_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x3_t, align 16
-// CHECK-A32: %struct.int8x16x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int8x16x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a)
@@ -1160,7 +1160,7 @@ int8x16x3_t test_vld1q_s8_x3(int8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_s8_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.int8x16x4_t, align 16
-// CHECK-A32: %struct.int8x16x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.int8x16x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.int8x16x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a)
@@ -1178,7 +1178,7 @@ int8x16x4_t test_vld1q_s8_x4(int8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u16_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
-// CHECK-A32: %struct.uint16x8x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint16x8x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -1198,7 +1198,7 @@ uint16x8x2_t test_vld1q_u16_x2(uint16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u16_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x3_t, align 16
-// CHECK-A32: %struct.uint16x8x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint16x8x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -1218,7 +1218,7 @@ uint16x8x3_t test_vld1q_u16_x3(uint16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u16_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint16x8x4_t, align 16
-// CHECK-A32: %struct.uint16x8x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint16x8x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i16* %a to i8*
@@ -1238,7 +1238,7 @@ uint16x8x4_t test_vld1q_u16_x4(uint16_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u32_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
-// CHECK-A32: %struct.uint32x4x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint32x4x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -1258,7 +1258,7 @@ uint32x4x2_t test_vld1q_u32_x2(uint32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u32_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x3_t, align 16
-// CHECK-A32: %struct.uint32x4x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint32x4x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -1278,7 +1278,7 @@ uint32x4x3_t test_vld1q_u32_x3(uint32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u32_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint32x4x4_t, align 16
-// CHECK-A32: %struct.uint32x4x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint32x4x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i32* %a to i8*
@@ -1298,7 +1298,7 @@ uint32x4x4_t test_vld1q_u32_x4(uint32_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u64_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x2_t, align 16
-// CHECK-A32: %struct.uint64x2x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint64x2x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x2_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -1318,7 +1318,7 @@ uint64x2x2_t test_vld1q_u64_x2(uint64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u64_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x3_t, align 16
-// CHECK-A32: %struct.uint64x2x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint64x2x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x3_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -1338,7 +1338,7 @@ uint64x2x3_t test_vld1q_u64_x3(uint64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u64_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint64x2x4_t, align 16
-// CHECK-A32: %struct.uint64x2x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint64x2x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint64x2x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint64x2x4_t* [[__RET]] to i8*
 // CHECK: [[TMP1:%.*]] = bitcast i64* %a to i8*
@@ -1358,7 +1358,7 @@ uint64x2x4_t test_vld1q_u64_x4(uint64_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u8_x2(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
-// CHECK-A32: %struct.uint8x16x2_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint8x16x2_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x2|arm.neon.vld1x2}}.v16i8.p0i8(i8* %a)
@@ -1376,7 +1376,7 @@ uint8x16x2_t test_vld1q_u8_x2(uint8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u8_x3(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x3_t, align 16
-// CHECK-A32: %struct.uint8x16x3_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint8x16x3_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x3|arm.neon.vld1x3}}.v16i8.p0i8(i8* %a)
@@ -1394,7 +1394,7 @@ uint8x16x3_t test_vld1q_u8_x3(uint8_t const *a) {
 
 // CHECK-LABEL: @test_vld1q_u8_x4(
 // CHECK-A64: [[RETVAL:%.*]] = alloca %struct.uint8x16x4_t, align 16
-// CHECK-A32: %struct.uint8x16x4_t* noalias sret [[RETVAL:%.*]],
+// CHECK-A32: %struct.uint8x16x4_t* noalias sret align 8 [[RETVAL:%.*]],
 // CHECK: [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align {{16|8}}
 // CHECK: [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
 // CHECK: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.{{aarch64.neon.ld1x4|arm.neon.vld1x4}}.v16i8.p0i8(i8* %a)

diff  --git a/clang/test/CodeGen/arm-varargs.c b/clang/test/CodeGen/arm-varargs.c
index 1f5c07ef57da..dff62568b6ca 100644
--- a/clang/test/CodeGen/arm-varargs.c
+++ b/clang/test/CodeGen/arm-varargs.c
@@ -24,7 +24,7 @@ struct bigstruct {
 };
 
 struct bigstruct simple_struct(void) {
-// CHECK-LABEL: define void @simple_struct(%struct.bigstruct* noalias sret %agg.result)
+// CHECK-LABEL: define void @simple_struct(%struct.bigstruct* noalias sret align 4 %agg.result)
   return va_arg(the_list, struct bigstruct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 40
@@ -42,7 +42,7 @@ struct aligned_bigstruct {
 };
 
 struct aligned_bigstruct simple_aligned_struct(void) {
-// CHECK-LABEL: define void @simple_aligned_struct(%struct.aligned_bigstruct* noalias sret %agg.result)
+// CHECK-LABEL: define void @simple_aligned_struct(%struct.aligned_bigstruct* noalias sret align 8 %agg.result)
   return va_arg(the_list, struct aligned_bigstruct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
@@ -78,7 +78,7 @@ struct hfa {
 };
 
 struct hfa simple_hfa(void) {
-// CHECK-LABEL: define void @simple_hfa(%struct.hfa* noalias sret %agg.result)
+// CHECK-LABEL: define void @simple_hfa(%struct.hfa* noalias sret align 4 %agg.result)
   return va_arg(the_list, struct hfa);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8
@@ -185,7 +185,7 @@ typedef struct __attribute__((aligned(16))) {
   int val;
 } overaligned_int_struct;
 overaligned_int_struct overaligned_int_struct_test() {
-// CHECK-LABEL: define void @overaligned_int_struct_test(%struct.overaligned_int_struct* noalias sret %agg.result)
+// CHECK-LABEL: define void @overaligned_int_struct_test(%struct.overaligned_int_struct* noalias sret align 16 %agg.result)
   return va_arg(the_list, overaligned_int_struct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 16
@@ -201,7 +201,7 @@ typedef struct __attribute__((packed,aligned(2))) {
   long long val;
 } underaligned_long_long_struct;
 underaligned_long_long_struct underaligned_long_long_struct_test() {
-// CHECK-LABEL: define void @underaligned_long_long_struct_test(%struct.underaligned_long_long_struct* noalias sret %agg.result)
+// CHECK-LABEL: define void @underaligned_long_long_struct_test(%struct.underaligned_long_long_struct* noalias sret align 2 %agg.result)
   return va_arg(the_list, underaligned_long_long_struct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8
@@ -217,7 +217,7 @@ typedef struct __attribute__((aligned(16))) {
   long long val;
 } overaligned_long_long_struct;
 overaligned_long_long_struct overaligned_long_long_struct_test() {
-// CHECK-LABEL: define void @overaligned_long_long_struct_test(%struct.overaligned_long_long_struct* noalias sret %agg.result)
+// CHECK-LABEL: define void @overaligned_long_long_struct_test(%struct.overaligned_long_long_struct* noalias sret align 16 %agg.result)
   return va_arg(the_list, overaligned_long_long_struct);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
@@ -259,7 +259,7 @@ typedef struct {
   int val __attribute__((aligned(16)));
 } overaligned_int_struct_member;
 overaligned_int_struct_member overaligned_int_struct_member_test() {
-// CHECK-LABEL: define void @overaligned_int_struct_member_test(%struct.overaligned_int_struct_member* noalias sret %agg.result)
+// CHECK-LABEL: define void @overaligned_int_struct_member_test(%struct.overaligned_int_struct_member* noalias sret align 16 %agg.result)
   return va_arg(the_list, overaligned_int_struct_member);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
@@ -279,7 +279,7 @@ typedef struct {
   long long val __attribute__((packed,aligned(2)));
 } underaligned_long_long_struct_member;
 underaligned_long_long_struct_member underaligned_long_long_struct_member_test() {
-// CHECK-LABEL: define void @underaligned_long_long_struct_member_test(%struct.underaligned_long_long_struct_member* noalias sret %agg.result)
+// CHECK-LABEL: define void @underaligned_long_long_struct_member_test(%struct.underaligned_long_long_struct_member* noalias sret align 2 %agg.result)
   return va_arg(the_list, underaligned_long_long_struct_member);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8
@@ -295,7 +295,7 @@ typedef struct {
   long long val __attribute__((aligned(16)));
 } overaligned_long_long_struct_member;
 overaligned_long_long_struct_member overaligned_long_long_struct_member_test() {
-// CHECK-LABEL: define void @overaligned_long_long_struct_member_test(%struct.overaligned_long_long_struct_member* noalias sret %agg.result)
+// CHECK-LABEL: define void @overaligned_long_long_struct_member_test(%struct.overaligned_long_long_struct_member* noalias sret align 16 %agg.result)
   return va_arg(the_list, overaligned_long_long_struct_member);
 // CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
 // CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32

diff  --git a/clang/test/CodeGen/arm-vector-arguments.c b/clang/test/CodeGen/arm-vector-arguments.c
index 9bdddb72696e..aa8e65ba366f 100644
--- a/clang/test/CodeGen/arm-vector-arguments.c
+++ b/clang/test/CodeGen/arm-vector-arguments.c
@@ -9,7 +9,7 @@
 
 #include <arm_neon.h>
 
-// CHECK: define void @f0(%struct.int8x16x2_t* noalias sret %agg.result, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+// CHECK: define void @f0(%struct.int8x16x2_t* noalias sret align 16 %agg.result, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
 int8x16x2_t f0(int8x16_t a0, int8x16_t a1) {
   return vzipq_s8(a0, a1);
 }
@@ -25,7 +25,7 @@ typedef float T_float32x16 __attribute__ ((__vector_size__ (64)));
 T_float32x2 f1_0(T_float32x2 a0) { return a0; }
 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}})
 T_float32x4 f1_1(T_float32x4 a0) { return a0; }
-// CHECK: define void @f1_2(<8 x float>* noalias sret %{{.*}}, <8 x float> %{{.*}})
+// CHECK: define void @f1_2(<8 x float>* noalias sret align 32 %{{.*}}, <8 x float> %{{.*}})
 T_float32x8 f1_2(T_float32x8 a0) { return a0; }
-// CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float> %{{.*}})
+// CHECK: define void @f1_3(<16 x float>* noalias sret align 64 %{{.*}}, <16 x float> %{{.*}})
 T_float32x16 f1_3(T_float32x16 a0) { return a0; }

diff  --git a/clang/test/CodeGen/arm-vfp16-arguments.c b/clang/test/CodeGen/arm-vfp16-arguments.c
index 32f1bb7b2a77..42d990d97086 100644
--- a/clang/test/CodeGen/arm-vfp16-arguments.c
+++ b/clang/test/CodeGen/arm-vfp16-arguments.c
@@ -71,6 +71,6 @@ void test_hfa(hfa_t a) {}
 
 hfa_t ghfa;
 hfa_t test_ret_hfa(void) { return ghfa; }
-// CHECK-SOFT: define void @test_ret_hfa(%struct.hfa_t* noalias nocapture sret %agg.result)
+// CHECK-SOFT: define void @test_ret_hfa(%struct.hfa_t* noalias nocapture sret align 8 %agg.result)
 // CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @test_ret_hfa()
 // CHECK-FULL: define arm_aapcs_vfpcc %struct.hfa_t @test_ret_hfa()

diff  --git a/clang/test/CodeGen/arm-vfp16-arguments2.cpp b/clang/test/CodeGen/arm-vfp16-arguments2.cpp
index e436a5ecd6ab..ccc81a3bfdd9 100644
--- a/clang/test/CodeGen/arm-vfp16-arguments2.cpp
+++ b/clang/test/CodeGen/arm-vfp16-arguments2.cpp
@@ -37,27 +37,27 @@ struct S5 : B1 {
   B1 M[1];
 };
 
-// CHECK-SOFT: define void @_Z2f12S1(%struct.S1* noalias nocapture sret %agg.result, [2 x i64] %s1.coerce)
+// CHECK-SOFT: define void @_Z2f12S1(%struct.S1* noalias nocapture sret align 8 %agg.result, [2 x i64] %s1.coerce)
 // CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f12S1([2 x <2 x i32>] returned %s1.coerce)
 // CHECK-FULL: define arm_aapcs_vfpcc %struct.S1 @_Z2f12S1(%struct.S1 returned %s1.coerce)
 struct S1 f1(struct S1 s1) { return s1; }
 
-// CHECK-SOFT: define void @_Z2f22S2(%struct.S2* noalias nocapture sret %agg.result, [4 x i32] %s2.coerce)
+// CHECK-SOFT: define void @_Z2f22S2(%struct.S2* noalias nocapture sret align 8 %agg.result, [4 x i32] %s2.coerce)
 // CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f22S2([2 x <2 x i32>] returned %s2.coerce)
 // CHECK-FULL: define arm_aapcs_vfpcc %struct.S2 @_Z2f22S2(%struct.S2 returned %s2.coerce)
 struct S2 f2(struct S2 s2) { return s2; }
 
-// CHECK-SOFT: define void @_Z2f32S3(%struct.S3* noalias nocapture sret %agg.result, [2 x i64] %s3.coerce)
+// CHECK-SOFT: define void @_Z2f32S3(%struct.S3* noalias nocapture sret align 8 %agg.result, [2 x i64] %s3.coerce)
 // CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f32S3([2 x <2 x i32>] returned %s3.coerce)
 // CHECK-FULL: define arm_aapcs_vfpcc %struct.S3 @_Z2f32S3(%struct.S3 returned %s3.coerce)
 struct S3 f3(struct S3 s3) { return s3; }
 
-// CHECK-SOFT: define void @_Z2f42S4(%struct.S4* noalias nocapture sret %agg.result, [2 x i64] %s4.coerce)
+// CHECK-SOFT: define void @_Z2f42S4(%struct.S4* noalias nocapture sret align 8 %agg.result, [2 x i64] %s4.coerce)
 // CHECK-HARD: define arm_aapcs_vfpcc [2 x <2 x i32>] @_Z2f42S4([2 x <2 x i32>] returned %s4.coerce)
 // CHECK-FULL: define arm_aapcs_vfpcc %struct.S4 @_Z2f42S4(%struct.S4 returned %s4.coerce)
 struct S4 f4(struct S4 s4) { return s4; }
 
-// CHECK-SOFT: define void @_Z2f52S5(%struct.S5* noalias nocapture sret %agg.result, [2 x i64] %s5.coerce)
+// CHECK-SOFT: define void @_Z2f52S5(%struct.S5* noalias nocapture sret align 8 %agg.result, [2 x i64] %s5.coerce)
 // CHECK-HARD: define arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 returned %s5.coerce)
 // CHECK-FULL: define arm_aapcs_vfpcc %struct.S5 @_Z2f52S5(%struct.S5 returned %s5.coerce)
 struct S5 f5(struct S5 s5) { return s5; }

diff  --git a/clang/test/CodeGen/arm64-arguments.c b/clang/test/CodeGen/arm64-arguments.c
index 5c8474cae7be..97332deb7c80 100644
--- a/clang/test/CodeGen/arm64-arguments.c
+++ b/clang/test/CodeGen/arm64-arguments.c
@@ -181,9 +181,9 @@ T_float32x2 f1_0(T_float32x2 a0) { return a0; }
 // CHECK: define <4 x float> @f1_1(<4 x float> %{{.*}})
 T_float32x4 f1_1(T_float32x4 a0) { return a0; }
 // Vector with length bigger than 16-byte is illegal and is passed indirectly.
-// CHECK: define void @f1_2(<8 x float>* noalias sret  %{{.*}}, <8 x float>* %0)
+// CHECK: define void @f1_2(<8 x float>* noalias sret align 16 %{{.*}}, <8 x float>* %0)
 T_float32x8 f1_2(T_float32x8 a0) { return a0; }
-// CHECK: define void @f1_3(<16 x float>* noalias sret %{{.*}}, <16 x float>* %0)
+// CHECK: define void @f1_3(<16 x float>* noalias sret align 16 %{{.*}}, <16 x float>* %0)
 T_float32x16 f1_3(T_float32x16 a0) { return a0; }
 
 // Testing alignment with aggregates: HFA, aggregates with size <= 16 bytes and

diff  --git a/clang/test/CodeGen/arm64-microsoft-arguments.cpp b/clang/test/CodeGen/arm64-microsoft-arguments.cpp
index bca7cc94b393..f5bcda756b06 100644
--- a/clang/test/CodeGen/arm64-microsoft-arguments.cpp
+++ b/clang/test/CodeGen/arm64-microsoft-arguments.cpp
@@ -28,8 +28,8 @@ S2 f2() {
 }
 
 // Pass and return for type size > 16 bytes.
-// CHECK: define {{.*}} void @{{.*}}f3{{.*}}(%struct.S3* noalias sret %agg.result)
-// CHECK: call void {{.*}}func3{{.*}}(%struct.S3* sret %agg.result, %struct.S3* %agg.tmp)
+// CHECK: define {{.*}} void @{{.*}}f3{{.*}}(%struct.S3* noalias sret align 4 %agg.result)
+// CHECK: call void {{.*}}func3{{.*}}(%struct.S3* sret align 4 %agg.result, %struct.S3* %agg.tmp)
 struct S3 {
   int a[5];
 };
@@ -42,8 +42,8 @@ S3 f3() {
 
 // Pass and return aggregate (of size < 16 bytes) with non-trivial destructor.
 // Passed directly but returned indirectly.
-// CHECK: define {{.*}} void {{.*}}f4{{.*}}(%struct.S4* inreg noalias sret %agg.result)
-// CHECK: call void {{.*}}func4{{.*}}(%struct.S4* inreg sret %agg.result, [2 x i64] %5)
+// CHECK: define {{.*}} void {{.*}}f4{{.*}}(%struct.S4* inreg noalias sret align 4 %agg.result)
+// CHECK: call void {{.*}}func4{{.*}}(%struct.S4* inreg sret align 4 %agg.result, [2 x i64] %5)
 struct S4 {
   int a[3];
   ~S4();
@@ -56,8 +56,8 @@ S4 f4() {
 }
 
 // Pass and return from instance method called from instance method.
-// CHECK: define {{.*}} void @{{.*}}bar at Q1{{.*}}(%class.Q1* %this, %class.P1* inreg noalias sret %agg.result)
-// CHECK: call void {{.*}}foo at P1{{.*}}(%class.P1* %ref.tmp, %class.P1* inreg sret %agg.result, i8 %1)
+// CHECK: define {{.*}} void @{{.*}}bar at Q1{{.*}}(%class.Q1* %this, %class.P1* inreg noalias sret align 1 %agg.result)
+// CHECK: call void {{.*}}foo at P1{{.*}}(%class.P1* %ref.tmp, %class.P1* inreg sret align 1 %agg.result, i8 %1)
 
 class P1 {
 public:
@@ -76,7 +76,7 @@ P1 Q1::bar() {
 
 // Pass and return from instance method called from free function.
 // CHECK: define {{.*}} void {{.*}}bar{{.*}}()
-// CHECK: call void {{.*}}foo at P2{{.*}}(%class.P2* %ref.tmp, %class.P2* inreg sret %retval, i8 %0)
+// CHECK: call void {{.*}}foo at P2{{.*}}(%class.P2* %ref.tmp, %class.P2* inreg sret align 1 %retval, i8 %0)
 class P2 {
 public:
   P2 foo(P2 x);
@@ -89,8 +89,8 @@ P2 bar() {
 
 // Pass and return an object with a user-provided constructor (passed directly,
 // returned indirectly)
-// CHECK: define {{.*}} void @{{.*}}f5{{.*}}(%struct.S5* inreg noalias sret %agg.result)
-// CHECK: call void {{.*}}func5{{.*}}(%struct.S5* inreg sret %agg.result, i64 {{.*}})
+// CHECK: define {{.*}} void @{{.*}}f5{{.*}}(%struct.S5* inreg noalias sret align 4 %agg.result)
+// CHECK: call void {{.*}}func5{{.*}}(%struct.S5* inreg sret align 4 %agg.result, i64 {{.*}})
 struct S5 {
   S5();
   int x;
@@ -146,8 +146,8 @@ struct S8 {
   int y;
 };
 
-// CHECK: define {{.*}} void {{.*}}?f8{{.*}}(%struct.S8* inreg noalias sret {{.*}})
-// CHECK: call void {{.*}}func8{{.*}}(%struct.S8* inreg sret {{.*}}, i64 {{.*}})
+// CHECK: define {{.*}} void {{.*}}?f8{{.*}}(%struct.S8* inreg noalias sret align 4 {{.*}})
+// CHECK: call void {{.*}}func8{{.*}}(%struct.S8* inreg sret align 4 {{.*}}, i64 {{.*}})
 S8 func8(S8 x);
 S8 f8() {
   S8 x;
@@ -157,8 +157,8 @@ S8 f8() {
 
 // Pass and return an object with a non-trivial copy-assignment operator and
 // a trivial copy constructor (passed directly, returned indirectly)
-// CHECK: define {{.*}} void @"?f9@@YA?AUS9@@XZ"(%struct.S9* inreg noalias sret {{.*}})
-// CHECK: call void {{.*}}func9{{.*}}(%struct.S9* inreg sret {{.*}}, i64 {{.*}})
+// CHECK: define {{.*}} void @"?f9@@YA?AUS9@@XZ"(%struct.S9* inreg noalias sret align 4 {{.*}})
+// CHECK: call void {{.*}}func9{{.*}}(%struct.S9* inreg sret align 4 {{.*}}, i64 {{.*}})
 struct S9 {
   S9& operator=(const S9&);
   int x;
@@ -174,8 +174,8 @@ S9 f9() {
 
 // Pass and return an object with a base class (passed directly, returned
 // indirectly).
-// CHECK: define dso_local void {{.*}}f10{{.*}}(%struct.S10* inreg noalias sret {{.*}})
-// CHECK: call void {{.*}}func10{{.*}}(%struct.S10* inreg sret {{.*}}, [2 x i64] {{.*}})
+// CHECK: define dso_local void {{.*}}f10{{.*}}(%struct.S10* inreg noalias sret align 4 {{.*}})
+// CHECK: call void {{.*}}func10{{.*}}(%struct.S10* inreg sret align 4 {{.*}}, [2 x i64] {{.*}})
 struct S10 : public S1 {
   int x;
 };
@@ -189,8 +189,8 @@ S10 f10() {
 
 // Pass and return a non aggregate object exceeding > 128 bits (passed
 // indirectly, returned indirectly)
-// CHECK: define dso_local void {{.*}}f11{{.*}}(%struct.S11* inreg noalias sret {{.*}})
-// CHECK: call void {{.*}}func11{{.*}}(%struct.S11* inreg sret {{.*}}, %struct.S11* {{.*}})
+// CHECK: define dso_local void {{.*}}f11{{.*}}(%struct.S11* inreg noalias sret align 8 {{.*}})
+// CHECK: call void {{.*}}func11{{.*}}(%struct.S11* inreg sret align 8 {{.*}}, %struct.S11* {{.*}})
 struct S11 {
   virtual void f();
   int a[5];

diff  --git a/clang/test/CodeGen/arm64_32.c b/clang/test/CodeGen/arm64_32.c
index 245dfefc99e3..1fb121cfcfb1 100644
--- a/clang/test/CodeGen/arm64_32.c
+++ b/clang/test/CodeGen/arm64_32.c
@@ -27,4 +27,4 @@ long double LongDoubleVar = 0.0;
 
 typedef float __attribute__((ext_vector_type(16))) v16f32;
 v16f32 func(v16f32 in) { return in; }
-// CHECK: define void @func(<16 x float>* noalias sret {{%.*}}, <16 x float> {{%.*}})
+// CHECK: define void @func(<16 x float>* noalias sret align 16 {{%.*}}, <16 x float> {{%.*}})

diff  --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c
index 2cdbfb9ba26b..bff9f6a2e30f 100644
--- a/clang/test/CodeGen/arm_neon_intrinsics.c
+++ b/clang/test/CodeGen/arm_neon_intrinsics.c
@@ -20223,7 +20223,7 @@ poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
   return vtbx4_p8(a, b, c);
 }
 
-// CHECK: @test_vtrn_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -20236,7 +20236,7 @@ int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
   return vtrn_s8(a, b);
 }
 
-// CHECK: @test_vtrn_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20251,7 +20251,7 @@ int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
   return vtrn_s16(a, b);
 }
 
-// CHECK: @test_vtrn_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
@@ -20266,7 +20266,7 @@ int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
   return vtrn_s32(a, b);
 }
 
-// CHECK: @test_vtrn_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -20279,7 +20279,7 @@ uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
   return vtrn_u8(a, b);
 }
 
-// CHECK: @test_vtrn_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20294,7 +20294,7 @@ uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
   return vtrn_u16(a, b);
 }
 
-// CHECK: @test_vtrn_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
@@ -20309,7 +20309,7 @@ uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
   return vtrn_u32(a, b);
 }
 
-// CHECK: @test_vtrn_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
@@ -20324,7 +20324,7 @@ float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
   return vtrn_f32(a, b);
 }
 
-// CHECK: @test_vtrn_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
@@ -20337,7 +20337,7 @@ poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
   return vtrn_p8(a, b);
 }
 
-// CHECK: @test_vtrn_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrn_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20352,7 +20352,7 @@ poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
   return vtrn_p16(a, b);
 }
 
-// CHECK: @test_vtrnq_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -20365,7 +20365,7 @@ int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
   return vtrnq_s8(a, b);
 }
 
-// CHECK: @test_vtrnq_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -20380,7 +20380,7 @@ int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
   return vtrnq_s16(a, b);
 }
 
-// CHECK: @test_vtrnq_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
@@ -20395,7 +20395,7 @@ int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
   return vtrnq_s32(a, b);
 }
 
-// CHECK: @test_vtrnq_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -20408,7 +20408,7 @@ uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
   return vtrnq_u8(a, b);
 }
 
-// CHECK: @test_vtrnq_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -20423,7 +20423,7 @@ uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
   return vtrnq_u16(a, b);
 }
 
-// CHECK: @test_vtrnq_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
@@ -20438,7 +20438,7 @@ uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
   return vtrnq_u32(a, b);
 }
 
-// CHECK: @test_vtrnq_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
@@ -20453,7 +20453,7 @@ float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
   return vtrnq_f32(a, b);
 }
 
-// CHECK: @test_vtrnq_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
@@ -20466,7 +20466,7 @@ poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
   return vtrnq_p8(a, b);
 }
 
-// CHECK: @test_vtrnq_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vtrnq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -20645,7 +20645,7 @@ uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
   return vtstq_p16(a, b);
 }
 
-// CHECK: @test_vuzp_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -20658,7 +20658,7 @@ int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
   return vuzp_s8(a, b);
 }
 
-// CHECK: @test_vuzp_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20673,7 +20673,7 @@ int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
   return vuzp_s16(a, b);
 }
 
-// CHECK: @test_vuzp_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
@@ -20688,7 +20688,7 @@ int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
   return vuzp_s32(a, b);
 }
 
-// CHECK: @test_vuzp_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -20701,7 +20701,7 @@ uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
   return vuzp_u8(a, b);
 }
 
-// CHECK: @test_vuzp_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20716,7 +20716,7 @@ uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
   return vuzp_u16(a, b);
 }
 
-// CHECK: @test_vuzp_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
@@ -20731,7 +20731,7 @@ uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
   return vuzp_u32(a, b);
 }
 
-// CHECK: @test_vuzp_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
@@ -20746,7 +20746,7 @@ float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
   return vuzp_f32(a, b);
 }
 
-// CHECK: @test_vuzp_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
@@ -20759,7 +20759,7 @@ poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
   return vuzp_p8(a, b);
 }
 
-// CHECK: @test_vuzp_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzp_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20774,7 +20774,7 @@ poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
   return vuzp_p16(a, b);
 }
 
-// CHECK: @test_vuzpq_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -20787,7 +20787,7 @@ int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
   return vuzpq_s8(a, b);
 }
 
-// CHECK: @test_vuzpq_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -20802,7 +20802,7 @@ int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
   return vuzpq_s16(a, b);
 }
 
-// CHECK: @test_vuzpq_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
@@ -20817,7 +20817,7 @@ int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
   return vuzpq_s32(a, b);
 }
 
-// CHECK: @test_vuzpq_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -20830,7 +20830,7 @@ uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
   return vuzpq_u8(a, b);
 }
 
-// CHECK: @test_vuzpq_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -20845,7 +20845,7 @@ uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
   return vuzpq_u16(a, b);
 }
 
-// CHECK: @test_vuzpq_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
@@ -20860,7 +20860,7 @@ uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
   return vuzpq_u32(a, b);
 }
 
-// CHECK: @test_vuzpq_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
@@ -20875,7 +20875,7 @@ float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
   return vuzpq_f32(a, b);
 }
 
-// CHECK: @test_vuzpq_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
@@ -20888,7 +20888,7 @@ poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
   return vuzpq_p8(a, b);
 }
 
-// CHECK: @test_vuzpq_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vuzpq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -20903,7 +20903,7 @@ poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
   return vuzpq_p16(a, b);
 }
 
-// CHECK: @test_vzip_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_s8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -20916,7 +20916,7 @@ int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
   return vzip_s8(a, b);
 }
 
-// CHECK: @test_vzip_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_s16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20931,7 +20931,7 @@ int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
   return vzip_s16(a, b);
 }
 
-// CHECK: @test_vzip_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_s32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
@@ -20946,7 +20946,7 @@ int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
   return vzip_s32(a, b);
 }
 
-// CHECK: @test_vzip_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_u8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -20959,7 +20959,7 @@ uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
   return vzip_u8(a, b);
 }
 
-// CHECK: @test_vzip_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_u16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -20974,7 +20974,7 @@ uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
   return vzip_u16(a, b);
 }
 
-// CHECK: @test_vzip_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_u32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
@@ -20989,7 +20989,7 @@ uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
   return vzip_u32(a, b);
 }
 
-// CHECK: @test_vzip_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_f32({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
@@ -21004,7 +21004,7 @@ float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
   return vzip_f32(a, b);
 }
 
-// CHECK: @test_vzip_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_p8({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
@@ -21017,7 +21017,7 @@ poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
   return vzip_p8(a, b);
 }
 
-// CHECK: @test_vzip_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzip_p16({{.*}} sret align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
@@ -21032,7 +21032,7 @@ poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
   return vzip_p16(a, b);
 }
 
-// CHECK: @test_vzipq_s8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_s8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -21045,7 +21045,7 @@ int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
   return vzipq_s8(a, b);
 }
 
-// CHECK: @test_vzipq_s16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_s16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -21060,7 +21060,7 @@ int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
   return vzipq_s16(a, b);
 }
 
-// CHECK: @test_vzipq_s32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_s32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
@@ -21075,7 +21075,7 @@ int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
   return vzipq_s32(a, b);
 }
 
-// CHECK: @test_vzipq_u8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_u8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -21088,7 +21088,7 @@ uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
   return vzipq_u8(a, b);
 }
 
-// CHECK: @test_vzipq_u16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_u16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
@@ -21103,7 +21103,7 @@ uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
   return vzipq_u16(a, b);
 }
 
-// CHECK: @test_vzipq_u32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_u32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
@@ -21118,7 +21118,7 @@ uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
   return vzipq_u32(a, b);
 }
 
-// CHECK: @test_vzipq_f32({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_f32({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
@@ -21133,7 +21133,7 @@ float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
   return vzipq_f32(a, b);
 }
 
-// CHECK: @test_vzipq_p8({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_p8({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
@@ -21146,7 +21146,7 @@ poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
   return vzipq_p8(a, b);
 }
 
-// CHECK: @test_vzipq_p16({{.*}} sret [[AGG_RESULT:%[0-9a-zA-Z.]+]],
+// CHECK: @test_vzipq_p16({{.*}} sret align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>

diff  --git a/clang/test/CodeGen/blocks.c b/clang/test/CodeGen/blocks.c
index fd348c98f65f..3f1f2502652c 100644
--- a/clang/test/CodeGen/blocks.c
+++ b/clang/test/CodeGen/blocks.c
@@ -18,7 +18,7 @@ struct s0 {
   int a[64];
 };
 
-// CHECK: define internal void @__f2_block_invoke(%struct.s0* noalias sret {{%.*}}, i8* {{%.*}}, %struct.s0* byval(%struct.s0) align 4 {{.*}})
+// CHECK: define internal void @__f2_block_invoke(%struct.s0* noalias sret align 4 {{%.*}}, i8* {{%.*}}, %struct.s0* byval(%struct.s0) align 4 {{.*}})
 struct s0 f2(struct s0 a0) {
   return ^(struct s0 a1){ return a1; }(a0);
 }

diff  --git a/clang/test/CodeGen/c11atomics-ios.c b/clang/test/CodeGen/c11atomics-ios.c
index f48e10e4aa43..92d318dac1c3 100644
--- a/clang/test/CodeGen/c11atomics-ios.c
+++ b/clang/test/CodeGen/c11atomics-ios.c
@@ -203,7 +203,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 }
 
 PS test_promoted_load(_Atomic(PS) *addr) {
-  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret align 2 %agg.result, { %struct.PS, [2 x i8] }* %addr)
   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
   // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
   // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
@@ -245,7 +245,7 @@ void test_promoted_store(_Atomic(PS) *addr, PS *val) {
 }
 
 PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
-  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret align 2 %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
   // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
   // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2

diff  --git a/clang/test/CodeGen/c11atomics.c b/clang/test/CodeGen/c11atomics.c
index 0a32ebcc724e..8697b798566d 100644
--- a/clang/test/CodeGen/c11atomics.c
+++ b/clang/test/CodeGen/c11atomics.c
@@ -368,7 +368,7 @@ void testPromotedStruct(_Atomic(PS) *fp) {
 }
 
 PS test_promoted_load(_Atomic(PS) *addr) {
-  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr)
+  // CHECK-LABEL: @test_promoted_load(%struct.PS* noalias sret align 2 %agg.result, { %struct.PS, [2 x i8] }* %addr)
   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
   // CHECK:   [[ATOMIC_RES:%.*]] = alloca { %struct.PS, [2 x i8] }, align 8
   // CHECK:   store { %struct.PS, [2 x i8] }* %addr, { %struct.PS, [2 x i8] }** [[ADDR_ARG]], align 4
@@ -411,7 +411,7 @@ void test_promoted_store(_Atomic(PS) *addr, PS *val) {
 }
 
 PS test_promoted_exchange(_Atomic(PS) *addr, PS *val) {
-  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
+  // CHECK-LABEL: @test_promoted_exchange(%struct.PS* noalias sret align 2 %agg.result, { %struct.PS, [2 x i8] }* %addr, %struct.PS* %val)
   // CHECK:   [[ADDR_ARG:%.*]] = alloca { %struct.PS, [2 x i8] }*, align 4
   // CHECK:   [[VAL_ARG:%.*]] = alloca %struct.PS*, align 4
   // CHECK:   [[NONATOMIC_TMP:%.*]] = alloca %struct.PS, align 2

diff  --git a/clang/test/CodeGen/lanai-arguments.c b/clang/test/CodeGen/lanai-arguments.c
index 9ce4ed98a78c..ef06b3221bc5 100644
--- a/clang/test/CodeGen/lanai-arguments.c
+++ b/clang/test/CodeGen/lanai-arguments.c
@@ -16,7 +16,7 @@ void f1(s1 i) {}
 typedef struct {
   int cc;
 } s2;
-// CHECK: define void @f2(%struct.s2* noalias sret %agg.result)
+// CHECK: define void @f2(%struct.s2* noalias sret align 4 %agg.result)
 s2 f2() {
   s2 foo;
   return foo;
@@ -26,7 +26,7 @@ typedef struct {
   int cc;
   int dd;
 } s3;
-// CHECK: define void @f3(%struct.s3* noalias sret %agg.result)
+// CHECK: define void @f3(%struct.s3* noalias sret align 4 %agg.result)
 s3 f3() {
   s3 foo;
   return foo;

diff  --git a/clang/test/CodeGen/le32-arguments.c b/clang/test/CodeGen/le32-arguments.c
index 9e6908d7fc41..ad368e1a3941 100644
--- a/clang/test/CodeGen/le32-arguments.c
+++ b/clang/test/CodeGen/le32-arguments.c
@@ -17,7 +17,7 @@ typedef struct {
   int cc;
 } s2;
 // Structs should be returned sret and not simplified by the frontend
-// CHECK-LABEL: define void @f2(%struct.s2* noalias sret %agg.result)
+// CHECK-LABEL: define void @f2(%struct.s2* noalias sret align 4 %agg.result)
 s2 f2() {
   s2 foo;
   return foo;

diff  --git a/clang/test/CodeGen/mcu-struct-return.c b/clang/test/CodeGen/mcu-struct-return.c
index 353c963dadb0..93325254bc8d 100644
--- a/clang/test/CodeGen/mcu-struct-return.c
+++ b/clang/test/CodeGen/mcu-struct-return.c
@@ -42,7 +42,7 @@ struct S1 bar1() { return s1; }
 struct S2 bar2() { return s2; }
 struct S1 bar3(union U1 u) { return s1; }
 // CHECK: define void @foo1()
-// CHECK: define void @foo2([[UNION2_TYPE]]* noalias sret %{{.+}})
+// CHECK: define void @foo2([[UNION2_TYPE]]* noalias sret align 4 %{{.+}})
 // CHECK: define i32 @foo3()
 // CHECK: define void @bar1()
 // CHECK: define i32 @bar2()
@@ -62,7 +62,7 @@ void run() {
   // CHECK: [[Y1:%.+]] = alloca [[STRUCT1_TYPE]]
   // CHECK: [[Y2:%.+]] = alloca [[STRUCT2_TYPE]]
   // CHECK: call void @foo1()
-  // CHECK: call void @foo2([[UNION2_TYPE]]* sret [[X2]])
+  // CHECK: call void @foo2([[UNION2_TYPE]]* sret align 4 [[X2]])
   // CHECK: {{.+}} = call i32 @foo3()
   // CHECK: call void @bar1()
   // CHECK: {{.+}} = call i32 @bar2()

diff  --git a/clang/test/CodeGen/mingw-long-double.c b/clang/test/CodeGen/mingw-long-double.c
index 57e4adaa5fab..08e3ac754d6b 100644
--- a/clang/test/CodeGen/mingw-long-double.c
+++ b/clang/test/CodeGen/mingw-long-double.c
@@ -32,15 +32,15 @@ long double TestLD(long double x) {
   return x * x;
 }
 // GNU32: define dso_local x86_fp80 @TestLD(x86_fp80 %x)
-// GNU64: define dso_local void @TestLD(x86_fp80* noalias sret %agg.result, x86_fp80* %0)
+// GNU64: define dso_local void @TestLD(x86_fp80* noalias sret align 16 %agg.result, x86_fp80* %0)
 // MSC64: define dso_local double @TestLD(double %x)
 
 long double _Complex TestLDC(long double _Complex x) {
   return x * x;
 }
-// GNU32: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 4 %x)
-// GNU64: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* %x)
-// MSC64: define dso_local void @TestLDC({ double, double }* noalias sret %agg.result, { double, double }* %x)
+// GNU32: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret align 4 %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 4 %x)
+// GNU64: define dso_local void @TestLDC({ x86_fp80, x86_fp80 }* noalias sret align 16 %agg.result, { x86_fp80, x86_fp80 }* %x)
+// MSC64: define dso_local void @TestLDC({ double, double }* noalias sret align 8 %agg.result, { double, double }* %x)
 
 // GNU32: declare dso_local void @__mulxc3
 // GNU64: declare dso_local void @__mulxc3

diff  --git a/clang/test/CodeGen/mips-zero-sized-struct.c b/clang/test/CodeGen/mips-zero-sized-struct.c
index 08ebf9df3e93..5f0e660cf395 100644
--- a/clang/test/CodeGen/mips-zero-sized-struct.c
+++ b/clang/test/CodeGen/mips-zero-sized-struct.c
@@ -19,7 +19,7 @@
 // RUN: %clang_cc1 -triple mipsisa64r6-unknown-linux-gnuabi64 -S -emit-llvm -o - %s | FileCheck -check-prefix=N64 %s
 // RUN: %clang_cc1 -triple mipsisa64r6el-unknown-linux-gnuabi64 -S -emit-llvm -o - %s | FileCheck -check-prefix=N64 %s
 
-// O32: define void @fn28(%struct.T2* noalias sret %agg.result, i8 signext %arg0)
+// O32: define void @fn28(%struct.T2* noalias sret align 1 %agg.result, i8 signext %arg0)
 // N32: define void @fn28(i8 signext %arg0)
 // N64: define void @fn28(i8 signext %arg0)
 

diff  --git a/clang/test/CodeGen/mips64-padding-arg.c b/clang/test/CodeGen/mips64-padding-arg.c
index a7c8f0ff6fdc..d440743fd723 100644
--- a/clang/test/CodeGen/mips64-padding-arg.c
+++ b/clang/test/CodeGen/mips64-padding-arg.c
@@ -33,9 +33,9 @@ void foo3(int a0, long double a1) {
 
 // Insert padding after hidden argument.
 //
-// N64-LABEL: define void @foo5(%struct.S0* noalias sret %agg.result, i64 %0, fp128 %a0)
-// N64: call void @foo6(%struct.S0* sret %agg.result, i32 signext 1, i32 signext 2, i64 undef, fp128 %a0)
-// N64: declare void @foo6(%struct.S0* sret, i32 signext, i32 signext, i64, fp128)
+// N64-LABEL: define void @foo5(%struct.S0* noalias sret align 16 %agg.result, i64 %0, fp128 %a0)
+// N64: call void @foo6(%struct.S0* sret align 16 %agg.result, i32 signext 1, i32 signext 2, i64 undef, fp128 %a0)
+// N64: declare void @foo6(%struct.S0* sret align 16, i32 signext, i32 signext, i64, fp128)
 
 extern S0 foo6(int, int, long double);
 

diff  --git a/clang/test/CodeGen/ms_abi.c b/clang/test/CodeGen/ms_abi.c
index 75e1caf922df..8c66c5dc4361 100644
--- a/clang/test/CodeGen/ms_abi.c
+++ b/clang/test/CodeGen/ms_abi.c
@@ -155,7 +155,7 @@ struct i128 {
 };
 
 __attribute__((ms_abi)) struct i128 f7(struct i128 a) {
-  // WIN64: define dso_local void @f7(%struct.i128* noalias sret %agg.result, %struct.i128* %a)
-  // FREEBSD: define win64cc void @f7(%struct.i128* noalias sret %agg.result, %struct.i128* %a)
+  // WIN64: define dso_local void @f7(%struct.i128* noalias sret align 8 %agg.result, %struct.i128* %a)
+  // FREEBSD: define win64cc void @f7(%struct.i128* noalias sret align 8 %agg.result, %struct.i128* %a)
   return a;
 }

diff  --git a/clang/test/CodeGen/ppc64-align-struct.c b/clang/test/CodeGen/ppc64-align-struct.c
index bcff4920d0c4..3435a6e42939 100644
--- a/clang/test/CodeGen/ppc64-align-struct.c
+++ b/clang/test/CodeGen/ppc64-align-struct.c
@@ -48,7 +48,7 @@ void test7 (int x, struct test7 y)
 {
 }
 
-// CHECK: define void @test1va(%struct.test1* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...)
+// CHECK: define void @test1va(%struct.test1* noalias sret align 4 %[[AGG_RESULT:.*]], i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 8
 // CHECK: store i8* %[[NEXT]], i8** %ap
@@ -66,7 +66,7 @@ struct test1 test1va (int x, ...)
   return y;
 }
 
-// CHECK: define void @test2va(%struct.test2* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...)
+// CHECK: define void @test2va(%struct.test2* noalias sret align 16 %[[AGG_RESULT:.*]], i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
@@ -88,7 +88,7 @@ struct test2 test2va (int x, ...)
   return y;
 }
 
-// CHECK: define void @test3va(%struct.test3* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...)
+// CHECK: define void @test3va(%struct.test3* noalias sret align 32 %[[AGG_RESULT:.*]], i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15
@@ -110,7 +110,7 @@ struct test3 test3va (int x, ...)
   return y;
 }
 
-// CHECK: define void @test4va(%struct.test4* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...)
+// CHECK: define void @test4va(%struct.test4* noalias sret align 4 %[[AGG_RESULT:.*]], i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
@@ -128,7 +128,7 @@ struct test4 test4va (int x, ...)
   return y;
 }
 
-// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...)
+// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret align 16 %[[AGG_RESULT:.*]], i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[NEXT:[^ ]+]] = getelementptr inbounds i8, i8* %[[CUR]], i64 16
 // CHECK: store i8* %[[NEXT]], i8** %ap
@@ -147,7 +147,7 @@ struct test_longdouble testva_longdouble (int x, ...)
   return y;
 }
 
-// CHECK: define void @testva_vector(%struct.test_vector* noalias sret %[[AGG_RESULT:.*]], i32 signext %x, ...)
+// CHECK: define void @testva_vector(%struct.test_vector* noalias sret align 16 %[[AGG_RESULT:.*]], i32 signext %x, ...)
 // CHECK: %[[CUR:[^ ]+]] = load i8*, i8** %ap
 // CHECK: %[[TMP0:[^ ]+]] = ptrtoint i8* %[[CUR]] to i64
 // CHECK: %[[TMP1:[^ ]+]] = add i64 %[[TMP0]], 15

diff  --git a/clang/test/CodeGen/ppc64-elf-abi.c b/clang/test/CodeGen/ppc64-elf-abi.c
index 59112a0baf4a..4270ba2c799b 100644
--- a/clang/test/CodeGen/ppc64-elf-abi.c
+++ b/clang/test/CodeGen/ppc64-elf-abi.c
@@ -17,7 +17,7 @@
 // RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s \
 // RUN:   -target-abi elfv2 | FileCheck %s --check-prefix=CHECK-ELFv2
 
-// CHECK-ELFv1: define void @func_fab(%struct.fab* noalias sret %agg.result, i64 %x.coerce)
+// CHECK-ELFv1: define void @func_fab(%struct.fab* noalias sret align 4 %agg.result, i64 %x.coerce)
 // CHECK-ELFv2: define [2 x float] @func_fab([2 x float] %x.coerce)
 struct fab { float a; float b; };
 struct fab func_fab(struct fab x) { return x; }

diff  --git a/clang/test/CodeGen/ppc64-qpx-vector.c b/clang/test/CodeGen/ppc64-qpx-vector.c
index e7c009328b23..0e55851b9f33 100644
--- a/clang/test/CodeGen/ppc64-qpx-vector.c
+++ b/clang/test/CodeGen/ppc64-qpx-vector.c
@@ -24,6 +24,6 @@ v4df foo2(struct sdf a, v4df b, struct sdf2 c) {
 // QPX-LABEL: define <4 x double> @foo2(<4 x double> inreg %a.coerce, <4 x double> %b, [2 x i256] %c.coerce)
 // QPX: ret <4 x double>
 
-// NORMAL-LABEL: define void @foo2(<4 x double>* noalias sret %agg.result, [2 x i128] %a.coerce, <4 x double>* %0, [4 x i128] %c.coerce)
+// NORMAL-LABEL: define void @foo2(<4 x double>* noalias sret align 32 %agg.result, [2 x i128] %a.coerce, <4 x double>* %0, [4 x i128] %c.coerce)
 // NORMAL: ret void
 

diff  --git a/clang/test/CodeGen/ppc64-soft-float.c b/clang/test/CodeGen/ppc64-soft-float.c
index 84ac2d55b636..b033dea68fe2 100644
--- a/clang/test/CodeGen/ppc64-soft-float.c
+++ b/clang/test/CodeGen/ppc64-soft-float.c
@@ -30,53 +30,53 @@ struct fabc { float a; float b; float c; };
 struct f2a2b { float a[2]; float b[2]; };
 
 // CHECK-LE: define i32 @func_f1(float inreg %x.coerce)
-// CHECK-BE: define void @func_f1(%struct.f1* noalias sret %agg.result, float inreg %x.coerce)
+// CHECK-BE: define void @func_f1(%struct.f1* noalias sret align 4 %agg.result, float inreg %x.coerce)
 struct f1 func_f1(struct f1 x) { return x; }
 
 // CHECK-LE: define i64 @func_f2(i64 %x.coerce)
-// CHECK-BE: define void @func_f2(%struct.f2* noalias sret %agg.result, i64 %x.coerce)
+// CHECK-BE: define void @func_f2(%struct.f2* noalias sret align 4 %agg.result, i64 %x.coerce)
 struct f2 func_f2(struct f2 x) { return x; }
 
 // CHECK-LE: define { i64, i64 } @func_f3([2 x i64] %x.coerce)
-// CHECK-BE: define void @func_f3(%struct.f3* noalias sret %agg.result, [2 x i64] %x.coerce)
+// CHECK-BE: define void @func_f3(%struct.f3* noalias sret align 4 %agg.result, [2 x i64] %x.coerce)
 struct f3 func_f3(struct f3 x) { return x; }
 
 // CHECK-LE: define { i64, i64 } @func_f4([2 x i64] %x.coerce)
-// CHECK-BE: define void @func_f4(%struct.f4* noalias sret %agg.result, [2 x i64] %x.coerce)
+// CHECK-BE: define void @func_f4(%struct.f4* noalias sret align 4 %agg.result, [2 x i64] %x.coerce)
 struct f4 func_f4(struct f4 x) { return x; }
 
-// CHECK: define void @func_f5(%struct.f5* noalias sret %agg.result, [3 x i64] %x.coerce)
+// CHECK: define void @func_f5(%struct.f5* noalias sret align 4 %agg.result, [3 x i64] %x.coerce)
 struct f5 func_f5(struct f5 x) { return x; }
 
-// CHECK: define void @func_f6(%struct.f6* noalias sret %agg.result, [3 x i64] %x.coerce)
+// CHECK: define void @func_f6(%struct.f6* noalias sret align 4 %agg.result, [3 x i64] %x.coerce)
 struct f6 func_f6(struct f6 x) { return x; }
 
-// CHECK: define void @func_f7(%struct.f7* noalias sret %agg.result, [4 x i64] %x.coerce)
+// CHECK: define void @func_f7(%struct.f7* noalias sret align 4 %agg.result, [4 x i64] %x.coerce)
 struct f7 func_f7(struct f7 x) { return x; }
 
-// CHECK: define void @func_f8(%struct.f8* noalias sret %agg.result, [4 x i64] %x.coerce)
+// CHECK: define void @func_f8(%struct.f8* noalias sret align 4 %agg.result, [4 x i64] %x.coerce)
 struct f8 func_f8(struct f8 x) { return x; }
 
-// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, [5 x i64] %x.coerce)
+// CHECK: define void @func_f9(%struct.f9* noalias sret align 4 %agg.result, [5 x i64] %x.coerce)
 struct f9 func_f9(struct f9 x) { return x; }
 
 // CHECK-LE: define i64 @func_fab(i64 %x.coerce)
-// CHECK-BE: define void @func_fab(%struct.fab* noalias sret %agg.result, i64 %x.coerce)
+// CHECK-BE: define void @func_fab(%struct.fab* noalias sret align 4 %agg.result, i64 %x.coerce)
 struct fab func_fab(struct fab x) { return x; }
 
 // CHECK-LE: define { i64, i64 } @func_fabc([2 x i64] %x.coerce)
-// CHECK-BE: define void @func_fabc(%struct.fabc* noalias sret %agg.result, [2 x i64] %x.coerce)
+// CHECK-BE: define void @func_fabc(%struct.fabc* noalias sret align 4 %agg.result, [2 x i64] %x.coerce)
 struct fabc func_fabc(struct fabc x) { return x; }
 
 // CHECK-LE: define { i64, i64 } @func_f2a2b([2 x i64] %x.coerce)
-// CHECK-BE: define void @func_f2a2b(%struct.f2a2b* noalias sret %agg.result, [2 x i64] %x.coerce)
+// CHECK-BE: define void @func_f2a2b(%struct.f2a2b* noalias sret align 4 %agg.result, [2 x i64] %x.coerce)
 struct f2a2b func_f2a2b(struct f2a2b x) { return x; }
 
 // CHECK-LABEL: @call_f1
 // CHECK-BE: %[[TMP0:[^ ]+]] = alloca %struct.f1, align 4
 // CHECK: %[[TMP:[^ ]+]] = load float, float* getelementptr inbounds (%struct.f1, %struct.f1* @global_f1, i32 0, i32 0, i32 0), align 4
 // CHECK-LE: call i32 @func_f1(float inreg %[[TMP]])
-// CHECK-BE: call void @func_f1(%struct.f1* sret %[[TMP0]], float inreg %[[TMP]])
+// CHECK-BE: call void @func_f1(%struct.f1* sret align 4 %[[TMP0]], float inreg %[[TMP]])
 struct f1 global_f1;
 void call_f1(void) { global_f1 = func_f1(global_f1); }
 
@@ -84,7 +84,7 @@ void call_f1(void) { global_f1 = func_f1(global_f1); }
 // CHECK-BE: %[[TMP0:[^ ]+]] = alloca %struct.f2, align 4
 // CHECK: %[[TMP:[^ ]+]] = load i64, i64* bitcast (%struct.f2* @global_f2 to i64*), align 4
 // CHECK-LE: call i64 @func_f2(i64 %[[TMP]])
-// CHECK-BE: call void @func_f2(%struct.f2* sret %[[TMP0]], i64 %[[TMP]])
+// CHECK-BE: call void @func_f2(%struct.f2* sret align 4 %[[TMP0]], i64 %[[TMP]])
 struct f2 global_f2;
 void call_f2(void) { global_f2 = func_f2(global_f2); }
 
@@ -95,7 +95,7 @@ void call_f2(void) { global_f2 = func_f2(global_f2); }
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f3* @global_f3 to i8*), i64 12, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [2 x i64], [2 x i64]* %[[TMP1]]
 // CHECK-LE: call { i64, i64 } @func_f3([2 x i64] %[[TMP3]])
-// CHECK-BE: call void @func_f3(%struct.f3* sret %[[TMP0]], [2 x i64] %[[TMP3]])
+// CHECK-BE: call void @func_f3(%struct.f3* sret align 4 %[[TMP0]], [2 x i64] %[[TMP3]])
 struct f3 global_f3;
 void call_f3(void) { global_f3 = func_f3(global_f3); }
 
@@ -103,7 +103,7 @@ void call_f3(void) { global_f3 = func_f3(global_f3); }
 // CHECK-BE: %[[TMP0:[^ ]+]] = alloca %struct.f4, align 4
 // CHECK: %[[TMP:[^ ]+]] = load [2 x i64], [2 x i64]* bitcast (%struct.f4* @global_f4 to [2 x i64]*), align 4
 // CHECK-LE: call { i64, i64 } @func_f4([2 x i64] %[[TMP]])
-// CHECK-BE: call void @func_f4(%struct.f4* sret %[[TMP0]], [2 x i64] %[[TMP]])
+// CHECK-BE: call void @func_f4(%struct.f4* sret align 4 %[[TMP0]], [2 x i64] %[[TMP]])
 struct f4 global_f4;
 void call_f4(void) { global_f4 = func_f4(global_f4); }
 
@@ -113,14 +113,14 @@ void call_f4(void) { global_f4 = func_f4(global_f4); }
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [3 x i64]* %[[TMP1]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f5* @global_f5 to i8*), i64 20, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [3 x i64], [3 x i64]* %[[TMP1]]
-// CHECK: call void @func_f5(%struct.f5* sret %[[TMP0]], [3 x i64] %[[TMP3]])
+// CHECK: call void @func_f5(%struct.f5* sret align 4 %[[TMP0]], [3 x i64] %[[TMP3]])
 struct f5 global_f5;
 void call_f5(void) { global_f5 = func_f5(global_f5); }
 
 // CHECK-LABEL: @call_f6
 // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.f6, align 4
 // CHECK: %[[TMP:[^ ]+]] = load [3 x i64], [3 x i64]* bitcast (%struct.f6* @global_f6 to [3 x i64]*), align 4
-// CHECK: call void @func_f6(%struct.f6* sret %[[TMP0]], [3 x i64] %[[TMP]])
+// CHECK: call void @func_f6(%struct.f6* sret align 4 %[[TMP0]], [3 x i64] %[[TMP]])
 struct f6 global_f6;
 void call_f6(void) { global_f6 = func_f6(global_f6); }
 
@@ -130,14 +130,14 @@ void call_f6(void) { global_f6 = func_f6(global_f6); }
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [4 x i64]* %[[TMP1]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f7* @global_f7 to i8*), i64 28, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [4 x i64], [4 x i64]* %[[TMP1]], align 8
-// CHECK: call void @func_f7(%struct.f7* sret %[[TMP0]], [4 x i64] %[[TMP3]])
+// CHECK: call void @func_f7(%struct.f7* sret align 4 %[[TMP0]], [4 x i64] %[[TMP3]])
 struct f7 global_f7;
 void call_f7(void) { global_f7 = func_f7(global_f7); }
 
 // CHECK-LABEL: @call_f8
 // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.f8, align 4
 // CHECK: %[[TMP:[^ ]+]] = load [4 x i64], [4 x i64]* bitcast (%struct.f8* @global_f8 to [4 x i64]*), align 4
-// CHECK: call void @func_f8(%struct.f8* sret %[[TMP0]], [4 x i64] %[[TMP]])
+// CHECK: call void @func_f8(%struct.f8* sret align 4 %[[TMP0]], [4 x i64] %[[TMP]])
 struct f8 global_f8;
 void call_f8(void) { global_f8 = func_f8(global_f8); }
 
@@ -146,7 +146,7 @@ void call_f8(void) { global_f8 = func_f8(global_f8); }
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f9* @global_f9 to i8*), i64 36, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
-// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
+// CHECK: call void @func_f9(%struct.f9* sret align 4 %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
 void call_f9(void) { global_f9 = func_f9(global_f9); }
 
@@ -154,7 +154,7 @@ void call_f9(void) { global_f9 = func_f9(global_f9); }
 // CHECK: %[[TMP0:[^ ]+]] = alloca %struct.fab, align 4
 // CHECK: %[[TMP:[^ ]+]] = load i64, i64* bitcast (%struct.fab* @global_fab to i64*), align 4
 // CHECK-LE: %call = call i64 @func_fab(i64 %[[TMP]])
-// CHECK-BE: call void @func_fab(%struct.fab* sret %[[TMP0]], i64 %[[TMP]])
+// CHECK-BE: call void @func_fab(%struct.fab* sret align 4 %[[TMP0]], i64 %[[TMP]])
 struct fab global_fab;
 void call_fab(void) { global_fab = func_fab(global_fab); }
 
@@ -165,7 +165,7 @@ void call_fab(void) { global_fab = func_fab(global_fab); }
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.fabc* @global_fabc to i8*), i64 12, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [2 x i64], [2 x i64]* %[[TMP0]], align 8
 // CHECK-LE: %call = call { i64, i64 } @func_fabc([2 x i64] %[[TMP3]])
-// CHECK-BE: call void @func_fabc(%struct.fabc* sret %[[TMPX]], [2 x i64] %[[TMP3]])
+// CHECK-BE: call void @func_fabc(%struct.fabc* sret align 4 %[[TMPX]], [2 x i64] %[[TMP3]])
 struct fabc global_fabc;
 void call_fabc(void) { global_fabc = func_fabc(global_fabc); }
 

diff  --git a/clang/test/CodeGen/ppc64-vector.c b/clang/test/CodeGen/ppc64-vector.c
index 7ed0beade4cd..7ea5b007d5bf 100644
--- a/clang/test/CodeGen/ppc64-vector.c
+++ b/clang/test/CodeGen/ppc64-vector.c
@@ -39,13 +39,13 @@ v8i16 test_v8i16(v8i16 x)
   return x;
 }
 
-// CHECK: define void @test_v16i16(<16 x i16>* noalias sret %agg.result, <16 x i16>* %0)
+// CHECK: define void @test_v16i16(<16 x i16>* noalias sret align 32 %agg.result, <16 x i16>* %0)
 v16i16 test_v16i16(v16i16 x)
 {
   return x;
 }
 
-// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, [2 x i128] %x.coerce)
+// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret align 32 %agg.result, [2 x i128] %x.coerce)
 struct v16i16 test_struct_v16i16(struct v16i16 x)
 {
   return x;

diff  --git a/clang/test/CodeGen/ppc64le-aggregates.c b/clang/test/CodeGen/ppc64le-aggregates.c
index e36faa2b8025..ea32d69b7cf9 100644
--- a/clang/test/CodeGen/ppc64le-aggregates.c
+++ b/clang/test/CodeGen/ppc64le-aggregates.c
@@ -41,7 +41,7 @@ struct f7 func_f7(struct f7 x) { return x; }
 // CHECK: define [8 x float] @func_f8([8 x float] %x.coerce)
 struct f8 func_f8(struct f8 x) { return x; }
 
-// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, [5 x i64] %x.coerce)
+// CHECK: define void @func_f9(%struct.f9* noalias sret align 4 %agg.result, [5 x i64] %x.coerce)
 struct f9 func_f9(struct f9 x) { return x; }
 
 // CHECK: define [2 x float] @func_fab([2 x float] %x.coerce)
@@ -106,7 +106,7 @@ void call_f8(void) { global_f8 = func_f8(global_f8); }
 // CHECK: %[[TMP2:[^ ]+]] = bitcast [5 x i64]* %[[TMP1]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %[[TMP2]], i8* align 4 bitcast (%struct.f9* @global_f9 to i8*), i64 36, i1 false)
 // CHECK: %[[TMP3:[^ ]+]] = load [5 x i64], [5 x i64]* %[[TMP1]]
-// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, [5 x i64] %[[TMP3]])
+// CHECK: call void @func_f9(%struct.f9* sret align 4 %{{[^ ]+}}, [5 x i64] %[[TMP3]])
 struct f9 global_f9;
 void call_f9(void) { global_f9 = func_f9(global_f9); }
 
@@ -162,7 +162,7 @@ struct v7 func_v7(struct v7 x) { return x; }
 // CHECK: define [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %x.coerce)
 struct v8 func_v8(struct v8 x) { return x; }
 
-// CHECK: define void @func_v9(%struct.v9* noalias sret %agg.result, %struct.v9* byval(%struct.v9) align 16 %x)
+// CHECK: define void @func_v9(%struct.v9* noalias sret align 16 %agg.result, %struct.v9* byval(%struct.v9) align 16 %x)
 struct v9 func_v9(struct v9 x) { return x; }
 
 // CHECK: define [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %x.coerce)
@@ -220,7 +220,7 @@ struct v8 global_v8;
 void call_v8(void) { global_v8 = func_v8(global_v8); }
 
 // CHECK-LABEL: @call_v9
-// CHECK: call void @func_v9(%struct.v9* sret %{{[^ ]+}}, %struct.v9* byval(%struct.v9) align 16 @global_v9)
+// CHECK: call void @func_v9(%struct.v9* sret align 16 %{{[^ ]+}}, %struct.v9* byval(%struct.v9) align 16 @global_v9)
 struct v9 global_v9;
 void call_v9(void) { global_v9 = func_v9(global_v9); }
 
@@ -279,7 +279,7 @@ struct v3f7 func_v3f7(struct v3f7 x) { return x; }
 // CHECK: define [8 x <4 x float>] @func_v3f8([8 x <4 x float>] %x.coerce)
 struct v3f8 func_v3f8(struct v3f8 x) { return x; }
 
-// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval(%struct.v3f9) align 16 %x)
+// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret align 16 %agg.result, %struct.v3f9* byval(%struct.v3f9) align 16 %x)
 struct v3f9 func_v3f9(struct v3f9 x) { return x; }
 
 // CHECK: define [2 x <4 x float>] @func_v3fab([2 x <4 x float>] %x.coerce)
@@ -337,7 +337,7 @@ struct v3f8 global_v3f8;
 void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }
 
 // CHECK-LABEL: @call_v3f9
-// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval(%struct.v3f9) align 16 @global_v3f9)
+// CHECK: call void @func_v3f9(%struct.v3f9* sret align 16 %{{[^ ]+}}, %struct.v3f9* byval(%struct.v3f9) align 16 @global_v3f9)
 struct v3f9 global_v3f9;
 void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }
 

diff  --git a/clang/test/CodeGen/ppc64le-f128Aggregates.c b/clang/test/CodeGen/ppc64le-f128Aggregates.c
index 3b363bf0f2ea..acebea69b31d 100644
--- a/clang/test/CodeGen/ppc64le-f128Aggregates.c
+++ b/clang/test/CodeGen/ppc64le-f128Aggregates.c
@@ -42,7 +42,7 @@ struct fp7 func_f7(struct fp7 x) { return x; }
 // CHECK: define [8 x fp128] @func_f8([8 x fp128] %x.coerce)
 struct fp8 func_f8(struct fp8 x) { return x; }
 
-// CHECK: define void @func_f9(%struct.fp9* noalias sret %agg.result, %struct.fp9* byval(%struct.fp9) align 16 %x)
+// CHECK: define void @func_f9(%struct.fp9* noalias sret align 16 %agg.result, %struct.fp9* byval(%struct.fp9) align 16 %x)
 struct fp9 func_f9(struct fp9 x) { return x; }
 
 // CHECK: define [2 x fp128] @func_fab([2 x fp128] %x.coerce)
@@ -104,7 +104,7 @@ void call_fp8(void) { global_f8 = func_f8(global_f8); }
 
 // CHECK-LABEL: @call_fp9
 // CHECK: %[[TMP1:[^ ]+]] = alloca %struct.fp9, align 16
-// CHECK: call void @func_f9(%struct.fp9* sret %[[TMP2:[^ ]+]], %struct.fp9* byval(%struct.fp9) align 16 @global_f9
+// CHECK: call void @func_f9(%struct.fp9* sret align 16 %[[TMP2:[^ ]+]], %struct.fp9* byval(%struct.fp9) align 16 @global_f9
 // CHECK: %[[TMP3:[^ ]+]] = bitcast %struct.fp9* %[[TMP2]] to i8*
 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 bitcast (%struct.fp9* @global_f9 to i8*), i8* align 16 %[[TMP3]], i64 144, i1 false
 // CHECK: ret void

diff  --git a/clang/test/CodeGen/regparm-struct.c b/clang/test/CodeGen/regparm-struct.c
index 7f56ae094a69..8c74c8b1f058 100644
--- a/clang/test/CodeGen/regparm-struct.c
+++ b/clang/test/CodeGen/regparm-struct.c
@@ -159,7 +159,7 @@ void g16(void) {
 }
 
 __attribute__((regparm(3))) struct s12 f17(int a, int b, int c);
-// CHECK: declare void @f17(%struct.s12* inreg sret, i32 inreg, i32 inreg, i32)
+// CHECK: declare void @f17(%struct.s12* inreg sret align 4, i32 inreg, i32 inreg, i32)
 void g17(void) {
   f17(41, 42, 43);
 }

diff  --git a/clang/test/CodeGen/renderscript.c b/clang/test/CodeGen/renderscript.c
index a85dc35c6149..fee97a154344 100644
--- a/clang/test/CodeGen/renderscript.c
+++ b/clang/test/CodeGen/renderscript.c
@@ -83,15 +83,15 @@ void argLongInt(sLongInt s) {}
 // and coerced to [a x iNN] for 64-bit RenderScript
 // =============================================================================
 
-// CHECK-RS32: void @retShortCharShort(%struct.sShortCharShort* noalias sret %agg.result)
+// CHECK-RS32: void @retShortCharShort(%struct.sShortCharShort* noalias sret align 2 %agg.result)
 // CHECK-RS64: [3 x i16] @retShortCharShort()
 sShortCharShort retShortCharShort() { sShortCharShort r; return r; }
 
-// CHECK-RS32: void @retIntShortChar(%struct.sIntShortChar* noalias sret %agg.result)
+// CHECK-RS32: void @retIntShortChar(%struct.sIntShortChar* noalias sret align 4 %agg.result)
 // CHECK-RS64: [2 x i32] @retIntShortChar()
 sIntShortChar retIntShortChar() { sIntShortChar r; return r; }
 
-// CHECK-RS32: void @retLongInt(%struct.sLongInt* noalias sret %agg.result)
+// CHECK-RS32: void @retLongInt(%struct.sLongInt* noalias sret align 8 %agg.result)
 // CHECK-RS64: [2 x i64] @retLongInt()
 sLongInt retLongInt() { sLongInt r; return r; }
 
@@ -116,12 +116,12 @@ void argLong2Char(sLong2Char s) {}
 // 64-bit RenderScript
 // =============================================================================
 
-// CHECK-RS32: void @retInt5(%struct.sInt5* noalias sret %agg.result)
-// CHECK-RS64: void @retInt5(%struct.sInt5* noalias sret %agg.result)
+// CHECK-RS32: void @retInt5(%struct.sInt5* noalias sret align 4 %agg.result)
+// CHECK-RS64: void @retInt5(%struct.sInt5* noalias sret align 4 %agg.result)
 sInt5 retInt5() { sInt5 r; return r;}
 
-// CHECK-RS32: void @retLong2Char(%struct.sLong2Char* noalias sret %agg.result)
-// CHECK-RS64: void @retLong2Char(%struct.sLong2Char* noalias sret %agg.result)
+// CHECK-RS32: void @retLong2Char(%struct.sLong2Char* noalias sret align 8 %agg.result)
+// CHECK-RS64: void @retLong2Char(%struct.sLong2Char* noalias sret align 8 %agg.result)
 sLong2Char retLong2Char() { sLong2Char r; return r;}
 
 // =============================================================================
@@ -135,6 +135,6 @@ typedef struct {long l1, l2, l3, l4, l5, l6, l7, l8, l9; } sLong9;
 // CHECK-RS64: void @argLong9(%struct.sLong9* %s)
 void argLong9(sLong9 s) {}
 
-// CHECK-RS32: void @retLong9(%struct.sLong9* noalias sret %agg.result)
-// CHECK-RS64: void @retLong9(%struct.sLong9* noalias sret %agg.result)
+// CHECK-RS32: void @retLong9(%struct.sLong9* noalias sret align 8 %agg.result)
+// CHECK-RS64: void @retLong9(%struct.sLong9* noalias sret align 8 %agg.result)
 sLong9 retLong9() { sLong9 r; return r; }

diff  --git a/clang/test/CodeGen/riscv32-ilp32-abi.c b/clang/test/CodeGen/riscv32-ilp32-abi.c
index 59f0bb568372..1b32024f5158 100644
--- a/clang/test/CodeGen/riscv32-ilp32-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32-abi.c
@@ -35,7 +35,7 @@ int f_scalar_stack_1(int32_t a, int64_t b, float c, double d, long double e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, float %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret align 4 %agg.result, float %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
 struct large f_scalar_stack_2(float a, int64_t b, double c, long double d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};

diff  --git a/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c b/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
index 677040626f57..225b12358a0e 100644
--- a/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32-ilp32f-abi.c
@@ -37,7 +37,7 @@ int f_scalar_stack_1(int32_t a, int64_t b, int32_t c, double d, long double e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret align 4 %agg.result, i32 %a, i64 %b, double %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
 struct large f_scalar_stack_2(int32_t a, int64_t b, double c, long double d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};

diff  --git a/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
index 86ad8fd370bc..740079d28d3b 100644
--- a/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32-ilp32f-ilp32d-abi.c
@@ -177,7 +177,7 @@ void f_agg_large(struct large x) {
 
 // The address where the struct should be written to will be the first
 // argument
-// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret %agg.result, i32 %i, i8 signext %j)
+// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret align 4 %agg.result, i32 %i, i8 signext %j)
 struct large f_agg_large_ret(int32_t i, int8_t j) {
   return (struct large){1, 2, 3, 4};
 }
@@ -189,7 +189,7 @@ void f_vec_large_v16i8(v16i8 x) {
   x[0] = x[7];
 }
 
-// CHECK-LABEL: define void @f_vec_large_v16i8_ret(<16 x i8>* noalias sret %agg.result)
+// CHECK-LABEL: define void @f_vec_large_v16i8_ret(<16 x i8>* noalias sret align 16 %agg.result)
 v16i8 f_vec_large_v16i8_ret() {
   return (v16i8){1, 2, 3, 4, 5, 6, 7, 8};
 }
@@ -207,7 +207,7 @@ int f_scalar_stack_1(struct tiny a, struct small b, struct small_aligned c,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret align 4 %agg.result, i32 %a, i64 %b, i64 %c, fp128 %d, i8 zeroext %e, i8 %f, i8 %g)
 struct large f_scalar_stack_2(int32_t a, int64_t b, int64_t c, long double d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};

diff  --git a/clang/test/CodeGen/riscv32-ilp32d-abi.c b/clang/test/CodeGen/riscv32-ilp32d-abi.c
index b5b451cee151..02398e66766f 100644
--- a/clang/test/CodeGen/riscv32-ilp32d-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32d-abi.c
@@ -119,7 +119,7 @@ struct double_int32_s f_ret_double_int32_s() {
 // CHECK: define void @f_double_int64_s_arg(%struct.double_int64_s* %a)
 void f_double_int64_s_arg(struct double_int64_s a) {}
 
-// CHECK: define void @f_ret_double_int64_s(%struct.double_int64_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_double_int64_s(%struct.double_int64_s* noalias sret align 8 %agg.result)
 struct double_int64_s f_ret_double_int64_s() {
   return (struct double_int64_s){1.0, 2};
 }
@@ -243,7 +243,7 @@ struct int_double_int_s { int a; double b; int c; };
 // CHECK: define void @f_int_double_int_s_arg(%struct.int_double_int_s* %a)
 void f_int_double_int_s_arg(struct int_double_int_s a) {}
 
-// CHECK: define void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret align 8 %agg.result)
 struct int_double_int_s f_ret_int_double_int_s() {
   return (struct int_double_int_s){1, 2.0, 3};
 }
@@ -253,7 +253,7 @@ struct int64_double_s { int64_t a; double b; };
 // CHECK: define void @f_int64_double_s_arg(%struct.int64_double_s* %a)
 void f_int64_double_s_arg(struct int64_double_s a) {}
 
-// CHECK: define void @f_ret_int64_double_s(%struct.int64_double_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_int64_double_s(%struct.int64_double_s* noalias sret align 8 %agg.result)
 struct int64_double_s f_ret_int64_double_s() {
   return (struct int64_double_s){1, 2.0};
 }
@@ -263,7 +263,7 @@ struct char_char_double_s { char a; char b; double c; };
 // CHECK-LABEL: define void @f_char_char_double_s_arg(%struct.char_char_double_s* %a)
 void f_char_char_double_s_arg(struct char_char_double_s a) {}
 
-// CHECK: define void @f_ret_char_char_double_s(%struct.char_char_double_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_char_char_double_s(%struct.char_char_double_s* noalias sret align 8 %agg.result)
 struct char_char_double_s f_ret_char_char_double_s() {
   return (struct char_char_double_s){1, 2, 3.0};
 }

diff  --git a/clang/test/CodeGen/riscv32-ilp32f-abi.c b/clang/test/CodeGen/riscv32-ilp32f-abi.c
index 76092958aedd..c8e6418b9daa 100644
--- a/clang/test/CodeGen/riscv32-ilp32f-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32f-abi.c
@@ -26,7 +26,7 @@ struct double_double_s { double d; double e; };
 // CHECK: define void @f_double_double_s_arg(%struct.double_double_s* %a)
 void f_double_double_s_arg(struct double_double_s a) {}
 
-// CHECK: define void @f_ret_double_double_s(%struct.double_double_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_double_double_s(%struct.double_double_s* noalias sret align 8 %agg.result)
 struct double_double_s f_ret_double_double_s() {
   return (struct double_double_s){1.0, 2.0};
 }
@@ -38,7 +38,7 @@ struct int_double_s { int a; double b; };
 // CHECK: define void @f_int_double_s_arg(%struct.int_double_s* %a)
 void f_int_double_s_arg(struct int_double_s a) {}
 
-// CHECK: define void @f_ret_int_double_s(%struct.int_double_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_int_double_s(%struct.int_double_s* noalias sret align 8 %agg.result)
 struct int_double_s f_ret_int_double_s() {
   return (struct int_double_s){1, 2.0};
 }

diff  --git a/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c b/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
index e9705ca3d62b..419bd87fdecf 100644
--- a/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
+++ b/clang/test/CodeGen/riscv32-ilp32f-ilp32d-abi.c
@@ -112,7 +112,7 @@ struct float_int32_s f_ret_float_int32_s() {
 // CHECK: define void @f_float_int64_s_arg(%struct.float_int64_s* %a)
 void f_float_int64_s_arg(struct float_int64_s a) {}
 
-// CHECK: define void @f_ret_float_int64_s(%struct.float_int64_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_float_int64_s(%struct.float_int64_s* noalias sret align 8 %agg.result)
 struct float_int64_s f_ret_float_int64_s() {
   return (struct float_int64_s){1.0, 2};
 }
@@ -236,7 +236,7 @@ struct int_float_int_s { int a; float b; int c; };
 // CHECK: define void @f_int_float_int_s_arg(%struct.int_float_int_s* %a)
 void f_int_float_int_s_arg(struct int_float_int_s a) {}
 
-// CHECK: define void @f_ret_int_float_int_s(%struct.int_float_int_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_int_float_int_s(%struct.int_float_int_s* noalias sret align 4 %agg.result)
 struct int_float_int_s f_ret_int_float_int_s() {
   return (struct int_float_int_s){1, 2.0, 3};
 }
@@ -246,7 +246,7 @@ struct int64_float_s { int64_t a; float b; };
 // CHECK: define void @f_int64_float_s_arg(%struct.int64_float_s* %a)
 void f_int64_float_s_arg(struct int64_float_s a) {}
 
-// CHECK: define void @f_ret_int64_float_s(%struct.int64_float_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_int64_float_s(%struct.int64_float_s* noalias sret align 8 %agg.result)
 struct int64_float_s f_ret_int64_float_s() {
   return (struct int64_float_s){1, 2.0};
 }

diff  --git a/clang/test/CodeGen/riscv64-lp64-abi.c b/clang/test/CodeGen/riscv64-lp64-abi.c
index bae5470c377d..8347056c54d3 100644
--- a/clang/test/CodeGen/riscv64-lp64-abi.c
+++ b/clang/test/CodeGen/riscv64-lp64-abi.c
@@ -25,7 +25,7 @@ int f_scalar_stack_1(int32_t a, __int128_t b, float c, long double d, v32i8 e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, double %a, i128 %b, fp128 %c, <32 x i8>* %0, i8 zeroext %e, i8 %f, i8 %g)
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret align 8 %agg.result, double %a, i128 %b, fp128 %c, <32 x i8>* %0, i8 zeroext %e, i8 %f, i8 %g)
 struct large f_scalar_stack_2(double a, __int128_t b, long double c, v32i8 d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};

diff  --git a/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c b/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
index d9c909e88bd8..489d0e83dcbc 100644
--- a/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
+++ b/clang/test/CodeGen/riscv64-lp64-lp64f-abi.c
@@ -27,7 +27,7 @@ int f_scalar_stack_1(int32_t a, __int128_t b, double c, long double d, v32i8 e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret %agg.result, double %a, i128 %b, fp128 %c, <32 x i8>* %0, i8 zeroext %e, i8 %f, i8 %g)
+// CHECK-LABEL: define void @f_scalar_stack_2(%struct.large* noalias sret align 8 %agg.result, double %a, i128 %b, fp128 %c, <32 x i8>* %0, i8 zeroext %e, i8 %f, i8 %g)
 struct large f_scalar_stack_2(double a, __int128_t b, long double c, v32i8 d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};

diff  --git a/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c b/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
index f50a8ca90575..8e263aeba25c 100644
--- a/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
+++ b/clang/test/CodeGen/riscv64-lp64-lp64f-lp64d-abi.c
@@ -166,7 +166,7 @@ void f_agg_large(struct large x) {
 
 // The address where the struct should be written to will be the first
 // argument
-// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret %agg.result, i32 signext %i, i8 signext %j)
+// CHECK-LABEL: define void @f_agg_large_ret(%struct.large* noalias sret align 8 %agg.result, i32 signext %i, i8 signext %j)
 struct large f_agg_large_ret(int32_t i, int8_t j) {
   return (struct large){1, 2, 3, 4};
 }
@@ -178,7 +178,7 @@ void f_vec_large_v32i8(v32i8 x) {
   x[0] = x[7];
 }
 
-// CHECK-LABEL: define void @f_vec_large_v32i8_ret(<32 x i8>* noalias sret %agg.result)
+// CHECK-LABEL: define void @f_vec_large_v32i8_ret(<32 x i8>* noalias sret align 32 %agg.result)
 v32i8 f_vec_large_v32i8_ret() {
   return (v32i8){1, 2, 3, 4, 5, 6, 7, 8};
 }
@@ -202,7 +202,7 @@ int f_scalar_stack_2(int32_t a, __int128_t b, int64_t c, long double d, v32i8 e,
 // the presence of large return values that consume a register due to the need
 // to pass a pointer.
 
-// CHECK-LABEL: define void @f_scalar_stack_3(%struct.large* noalias sret %agg.result, i32 signext %a, i128 %b, fp128 %c, <32 x i8>* %0, i8 zeroext %e, i8 %f, i8 %g)
+// CHECK-LABEL: define void @f_scalar_stack_3(%struct.large* noalias sret align 8 %agg.result, i32 signext %a, i128 %b, fp128 %c, <32 x i8>* %0, i8 zeroext %e, i8 %f, i8 %g)
 struct large f_scalar_stack_3(uint32_t a, __int128_t b, long double c, v32i8 d,
                               uint8_t e, int8_t f, uint8_t g) {
   return (struct large){a, e, f, g};

diff  --git a/clang/test/CodeGen/riscv64-lp64d-abi.c b/clang/test/CodeGen/riscv64-lp64d-abi.c
index 83947def3085..ec47428e6cca 100644
--- a/clang/test/CodeGen/riscv64-lp64d-abi.c
+++ b/clang/test/CodeGen/riscv64-lp64d-abi.c
@@ -243,7 +243,7 @@ struct int_double_int_s { int a; double b; int c; };
 // CHECK: define void @f_int_double_int_s_arg(%struct.int_double_int_s* %a)
 void f_int_double_int_s_arg(struct int_double_int_s a) {}
 
-// CHECK: define void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret %agg.result)
+// CHECK: define void @f_ret_int_double_int_s(%struct.int_double_int_s* noalias sret align 8 %agg.result)
 struct int_double_int_s f_ret_int_double_int_s() {
   return (struct int_double_int_s){1, 2.0, 3};
 }

diff  --git a/clang/test/CodeGen/sparcv9-abi.c b/clang/test/CodeGen/sparcv9-abi.c
index 5984fa558c83..2d97001ab1ae 100644
--- a/clang/test/CodeGen/sparcv9-abi.c
+++ b/clang/test/CodeGen/sparcv9-abi.c
@@ -53,7 +53,7 @@ struct large {
   int x;
 };
 
-// CHECK-LABEL: define void @f_large(%struct.large* noalias sret %agg.result, %struct.large* %x)
+// CHECK-LABEL: define void @f_large(%struct.large* noalias sret align 8 %agg.result, %struct.large* %x)
 struct large f_large(struct large x) {
   x.a += *x.b;
   x.b = 0;

diff  --git a/clang/test/CodeGen/struct-passing.c b/clang/test/CodeGen/struct-passing.c
index 80847b9fea64..e3108b964bd2 100644
--- a/clang/test/CodeGen/struct-passing.c
+++ b/clang/test/CodeGen/struct-passing.c
@@ -18,8 +18,8 @@ void *ps[] = { f0, f1, f2, f3, f4, f5 };
 
 // CHECK: declare i32 @f0() [[RN:#[0-9]+]]
 // CHECK: declare i32 @f1() [[RO:#[0-9]+]]
-// CHECK: declare void @f2({{.*}} sret)
-// CHECK: declare void @f3({{.*}} sret)
+// CHECK: declare void @f2({{.*}} sret align 4)
+// CHECK: declare void @f3({{.*}} sret align 4)
 // CHECK: declare void @f4({{.*}} byval({{.*}}) align 4)
 // CHECK: declare void @f5({{.*}} byval({{.*}}) align 4)
 

diff  --git a/clang/test/CodeGen/systemz-abi-vector.c b/clang/test/CodeGen/systemz-abi-vector.c
index f2e6c13c718f..896cc0994d6d 100644
--- a/clang/test/CodeGen/systemz-abi-vector.c
+++ b/clang/test/CodeGen/systemz-abi-vector.c
@@ -50,91 +50,91 @@ unsigned int align = __alignof__ (v16i8);
 // CHECK-VECTOR: @align = global i32 8
 
 v1i8 pass_v1i8(v1i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1i8(<1 x i8>* noalias sret %{{.*}}, <1 x i8>* %0)
+// CHECK-LABEL: define void @pass_v1i8(<1 x i8>* noalias sret align 1 %{{.*}}, <1 x i8>* %0)
 // CHECK-VECTOR-LABEL: define <1 x i8> @pass_v1i8(<1 x i8> %{{.*}})
 
 v2i8 pass_v2i8(v2i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v2i8(<2 x i8>* noalias sret %{{.*}}, <2 x i8>* %0)
+// CHECK-LABEL: define void @pass_v2i8(<2 x i8>* noalias sret align 2 %{{.*}}, <2 x i8>* %0)
 // CHECK-VECTOR-LABEL: define <2 x i8> @pass_v2i8(<2 x i8> %{{.*}})
 
 v4i8 pass_v4i8(v4i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v4i8(<4 x i8>* noalias sret %{{.*}}, <4 x i8>* %0)
+// CHECK-LABEL: define void @pass_v4i8(<4 x i8>* noalias sret align 4 %{{.*}}, <4 x i8>* %0)
 // CHECK-VECTOR-LABEL: define <4 x i8> @pass_v4i8(<4 x i8> %{{.*}})
 
 v8i8 pass_v8i8(v8i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v8i8(<8 x i8>* noalias sret %{{.*}}, <8 x i8>* %0)
+// CHECK-LABEL: define void @pass_v8i8(<8 x i8>* noalias sret align 8 %{{.*}}, <8 x i8>* %0)
 // CHECK-VECTOR-LABEL: define <8 x i8> @pass_v8i8(<8 x i8> %{{.*}})
 
 v16i8 pass_v16i8(v16i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v16i8(<16 x i8>* noalias sret %{{.*}}, <16 x i8>* %0)
+// CHECK-LABEL: define void @pass_v16i8(<16 x i8>* noalias sret align 16 %{{.*}}, <16 x i8>* %0)
 // CHECK-VECTOR-LABEL: define <16 x i8> @pass_v16i8(<16 x i8> %{{.*}})
 
 v32i8 pass_v32i8(v32i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret %{{.*}}, <32 x i8>* %0)
-// CHECK-VECTOR-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret %{{.*}}, <32 x i8>* %0)
+// CHECK-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret align 32 %{{.*}}, <32 x i8>* %0)
+// CHECK-VECTOR-LABEL: define void @pass_v32i8(<32 x i8>* noalias sret align 8 %{{.*}}, <32 x i8>* %0)
 
 v1i16 pass_v1i16(v1i16 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1i16(<1 x i16>* noalias sret %{{.*}}, <1 x i16>* %0)
+// CHECK-LABEL: define void @pass_v1i16(<1 x i16>* noalias sret align 2 %{{.*}}, <1 x i16>* %0)
 // CHECK-VECTOR-LABEL: define <1 x i16> @pass_v1i16(<1 x i16> %{{.*}})
 
 v2i16 pass_v2i16(v2i16 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v2i16(<2 x i16>* noalias sret %{{.*}}, <2 x i16>* %0)
+// CHECK-LABEL: define void @pass_v2i16(<2 x i16>* noalias sret align 4 %{{.*}}, <2 x i16>* %0)
 // CHECK-VECTOR-LABEL: define <2 x i16> @pass_v2i16(<2 x i16> %{{.*}})
 
 v4i16 pass_v4i16(v4i16 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v4i16(<4 x i16>* noalias sret %{{.*}}, <4 x i16>* %0)
+// CHECK-LABEL: define void @pass_v4i16(<4 x i16>* noalias sret align 8 %{{.*}}, <4 x i16>* %0)
 // CHECK-VECTOR-LABEL: define <4 x i16> @pass_v4i16(<4 x i16> %{{.*}})
 
 v8i16 pass_v8i16(v8i16 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v8i16(<8 x i16>* noalias sret %{{.*}}, <8 x i16>* %0)
+// CHECK-LABEL: define void @pass_v8i16(<8 x i16>* noalias sret align 16 %{{.*}}, <8 x i16>* %0)
 // CHECK-VECTOR-LABEL: define <8 x i16> @pass_v8i16(<8 x i16> %{{.*}})
 
 v1i32 pass_v1i32(v1i32 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1i32(<1 x i32>* noalias sret %{{.*}}, <1 x i32>* %0)
+// CHECK-LABEL: define void @pass_v1i32(<1 x i32>* noalias sret align 4 %{{.*}}, <1 x i32>* %0)
 // CHECK-VECTOR-LABEL: define <1 x i32> @pass_v1i32(<1 x i32> %{{.*}})
 
 v2i32 pass_v2i32(v2i32 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v2i32(<2 x i32>* noalias sret %{{.*}}, <2 x i32>* %0)
+// CHECK-LABEL: define void @pass_v2i32(<2 x i32>* noalias sret align 8 %{{.*}}, <2 x i32>* %0)
 // CHECK-VECTOR-LABEL: define <2 x i32> @pass_v2i32(<2 x i32> %{{.*}})
 
 v4i32 pass_v4i32(v4i32 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v4i32(<4 x i32>* noalias sret %{{.*}}, <4 x i32>* %0)
+// CHECK-LABEL: define void @pass_v4i32(<4 x i32>* noalias sret align 16 %{{.*}}, <4 x i32>* %0)
 // CHECK-VECTOR-LABEL: define <4 x i32> @pass_v4i32(<4 x i32> %{{.*}})
 
 v1i64 pass_v1i64(v1i64 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1i64(<1 x i64>* noalias sret %{{.*}}, <1 x i64>* %0)
+// CHECK-LABEL: define void @pass_v1i64(<1 x i64>* noalias sret align 8 %{{.*}}, <1 x i64>* %0)
 // CHECK-VECTOR-LABEL: define <1 x i64> @pass_v1i64(<1 x i64> %{{.*}})
 
 v2i64 pass_v2i64(v2i64 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v2i64(<2 x i64>* noalias sret %{{.*}}, <2 x i64>* %0)
+// CHECK-LABEL: define void @pass_v2i64(<2 x i64>* noalias sret align 16 %{{.*}}, <2 x i64>* %0)
 // CHECK-VECTOR-LABEL: define <2 x i64> @pass_v2i64(<2 x i64> %{{.*}})
 
 v1i128 pass_v1i128(v1i128 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1i128(<1 x i128>* noalias sret %{{.*}}, <1 x i128>* %0)
+// CHECK-LABEL: define void @pass_v1i128(<1 x i128>* noalias sret align 16 %{{.*}}, <1 x i128>* %0)
 // CHECK-VECTOR-LABEL: define <1 x i128> @pass_v1i128(<1 x i128> %{{.*}})
 
 v1f32 pass_v1f32(v1f32 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1f32(<1 x float>* noalias sret %{{.*}}, <1 x float>* %0)
+// CHECK-LABEL: define void @pass_v1f32(<1 x float>* noalias sret align 4 %{{.*}}, <1 x float>* %0)
 // CHECK-VECTOR-LABEL: define <1 x float> @pass_v1f32(<1 x float> %{{.*}})
 
 v2f32 pass_v2f32(v2f32 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v2f32(<2 x float>* noalias sret %{{.*}}, <2 x float>* %0)
+// CHECK-LABEL: define void @pass_v2f32(<2 x float>* noalias sret align 8 %{{.*}}, <2 x float>* %0)
 // CHECK-VECTOR-LABEL: define <2 x float> @pass_v2f32(<2 x float> %{{.*}})
 
 v4f32 pass_v4f32(v4f32 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v4f32(<4 x float>* noalias sret %{{.*}}, <4 x float>* %0)
+// CHECK-LABEL: define void @pass_v4f32(<4 x float>* noalias sret align 16 %{{.*}}, <4 x float>* %0)
 // CHECK-VECTOR-LABEL: define <4 x float> @pass_v4f32(<4 x float> %{{.*}})
 
 v1f64 pass_v1f64(v1f64 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1f64(<1 x double>* noalias sret %{{.*}}, <1 x double>* %0)
+// CHECK-LABEL: define void @pass_v1f64(<1 x double>* noalias sret align 8 %{{.*}}, <1 x double>* %0)
 // CHECK-VECTOR-LABEL: define <1 x double> @pass_v1f64(<1 x double> %{{.*}})
 
 v2f64 pass_v2f64(v2f64 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v2f64(<2 x double>* noalias sret %{{.*}}, <2 x double>* %0)
+// CHECK-LABEL: define void @pass_v2f64(<2 x double>* noalias sret align 16 %{{.*}}, <2 x double>* %0)
 // CHECK-VECTOR-LABEL: define <2 x double> @pass_v2f64(<2 x double> %{{.*}})
 
 v1f128 pass_v1f128(v1f128 arg) { return arg; }
-// CHECK-LABEL: define void @pass_v1f128(<1 x fp128>* noalias sret %{{.*}}, <1 x fp128>* %0)
+// CHECK-LABEL: define void @pass_v1f128(<1 x fp128>* noalias sret align 16 %{{.*}}, <1 x fp128>* %0)
 // CHECK-VECTOR-LABEL: define <1 x fp128> @pass_v1f128(<1 x fp128> %{{.*}})
 
 
@@ -142,62 +142,62 @@ v1f128 pass_v1f128(v1f128 arg) { return arg; }
 
 struct agg_v1i8 { v1i8 a; };
 struct agg_v1i8 pass_agg_v1i8(struct agg_v1i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_v1i8(%struct.agg_v1i8* noalias sret %{{.*}}, i8 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_v1i8(%struct.agg_v1i8* noalias sret %{{.*}}, <1 x i8> %{{.*}})
+// CHECK-LABEL: define void @pass_agg_v1i8(%struct.agg_v1i8* noalias sret align 1 %{{.*}}, i8 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_v1i8(%struct.agg_v1i8* noalias sret align 1 %{{.*}}, <1 x i8> %{{.*}})
 
 struct agg_v2i8 { v2i8 a; };
 struct agg_v2i8 pass_agg_v2i8(struct agg_v2i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_v2i8(%struct.agg_v2i8* noalias sret %{{.*}}, i16 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_v2i8(%struct.agg_v2i8* noalias sret %{{.*}}, <2 x i8> %{{.*}})
+// CHECK-LABEL: define void @pass_agg_v2i8(%struct.agg_v2i8* noalias sret align 2 %{{.*}}, i16 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_v2i8(%struct.agg_v2i8* noalias sret align 2 %{{.*}}, <2 x i8> %{{.*}})
 
 struct agg_v4i8 { v4i8 a; };
 struct agg_v4i8 pass_agg_v4i8(struct agg_v4i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_v4i8(%struct.agg_v4i8* noalias sret %{{.*}}, i32 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_v4i8(%struct.agg_v4i8* noalias sret %{{.*}}, <4 x i8> %{{.*}})
+// CHECK-LABEL: define void @pass_agg_v4i8(%struct.agg_v4i8* noalias sret align 4 %{{.*}}, i32 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_v4i8(%struct.agg_v4i8* noalias sret align 4 %{{.*}}, <4 x i8> %{{.*}})
 
 struct agg_v8i8 { v8i8 a; };
 struct agg_v8i8 pass_agg_v8i8(struct agg_v8i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_v8i8(%struct.agg_v8i8* noalias sret %{{.*}}, i64 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_v8i8(%struct.agg_v8i8* noalias sret %{{.*}}, <8 x i8> %{{.*}})
+// CHECK-LABEL: define void @pass_agg_v8i8(%struct.agg_v8i8* noalias sret align 8 %{{.*}}, i64 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_v8i8(%struct.agg_v8i8* noalias sret align 8 %{{.*}}, <8 x i8> %{{.*}})
 
 struct agg_v16i8 { v16i8 a; };
 struct agg_v16i8 pass_agg_v16i8(struct agg_v16i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_v16i8(%struct.agg_v16i8* noalias sret %{{.*}}, %struct.agg_v16i8* %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_v16i8(%struct.agg_v16i8* noalias sret %{{.*}}, <16 x i8> %{{.*}})
+// CHECK-LABEL: define void @pass_agg_v16i8(%struct.agg_v16i8* noalias sret align 16 %{{.*}}, %struct.agg_v16i8* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_v16i8(%struct.agg_v16i8* noalias sret align 8 %{{.*}}, <16 x i8> %{{.*}})
 
 struct agg_v32i8 { v32i8 a; };
 struct agg_v32i8 pass_agg_v32i8(struct agg_v32i8 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.agg_v32i8* %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.agg_v32i8* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret align 32 %{{.*}}, %struct.agg_v32i8* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_v32i8(%struct.agg_v32i8* noalias sret align 8 %{{.*}}, %struct.agg_v32i8* %{{.*}})
 
 
 // Verify that the following are *not* vector-like aggregate types
 
 struct agg_novector1 { v4i8 a; v4i8 b; };
 struct agg_novector1 pass_agg_novector1(struct agg_novector1 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_novector1(%struct.agg_novector1* noalias sret %{{.*}}, i64 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_novector1(%struct.agg_novector1* noalias sret %{{.*}}, i64 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_novector1(%struct.agg_novector1* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_novector1(%struct.agg_novector1* noalias sret align 4 %{{.*}}, i64 %{{.*}})
 
 struct agg_novector2 { v4i8 a; float b; };
 struct agg_novector2 pass_agg_novector2(struct agg_novector2 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_novector2(%struct.agg_novector2* noalias sret %{{.*}}, i64 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_novector2(%struct.agg_novector2* noalias sret %{{.*}}, i64 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_novector2(%struct.agg_novector2* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_novector2(%struct.agg_novector2* noalias sret align 4 %{{.*}}, i64 %{{.*}})
 
 struct agg_novector3 { v4i8 a; int : 0; };
 struct agg_novector3 pass_agg_novector3(struct agg_novector3 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_novector3(%struct.agg_novector3* noalias sret %{{.*}}, i32 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_novector3(%struct.agg_novector3* noalias sret %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_novector3(%struct.agg_novector3* noalias sret align 4 %{{.*}}, i32 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_novector3(%struct.agg_novector3* noalias sret align 4 %{{.*}}, i32 %{{.*}})
 
 struct agg_novector4 { v4i8 a __attribute__((aligned (8))); };
 struct agg_novector4 pass_agg_novector4(struct agg_novector4 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_novector4(%struct.agg_novector4* noalias sret %{{.*}}, i64 %{{.*}})
-// CHECK-VECTOR-LABEL: define void @pass_agg_novector4(%struct.agg_novector4* noalias sret %{{.*}}, i64 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_novector4(%struct.agg_novector4* noalias sret align 8 %{{.*}}, i64 %{{.*}})
+// CHECK-VECTOR-LABEL: define void @pass_agg_novector4(%struct.agg_novector4* noalias sret align 8 %{{.*}}, i64 %{{.*}})
 
 
 // Accessing variable argument lists
 
 v1i8 va_v1i8(__builtin_va_list l) { return __builtin_va_arg(l, v1i8); }
-// CHECK-LABEL: define void @va_v1i8(<1 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_v1i8(<1 x i8>* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -229,7 +229,7 @@ v1i8 va_v1i8(__builtin_va_list l) { return __builtin_va_arg(l, v1i8); }
 // CHECK-VECTOR: ret <1 x i8> [[RET]]
 
 v2i8 va_v2i8(__builtin_va_list l) { return __builtin_va_arg(l, v2i8); }
-// CHECK-LABEL: define void @va_v2i8(<2 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_v2i8(<2 x i8>* noalias sret align 2 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -261,7 +261,7 @@ v2i8 va_v2i8(__builtin_va_list l) { return __builtin_va_arg(l, v2i8); }
 // CHECK-VECTOR: ret <2 x i8> [[RET]]
 
 v4i8 va_v4i8(__builtin_va_list l) { return __builtin_va_arg(l, v4i8); }
-// CHECK-LABEL: define void @va_v4i8(<4 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_v4i8(<4 x i8>* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -293,7 +293,7 @@ v4i8 va_v4i8(__builtin_va_list l) { return __builtin_va_arg(l, v4i8); }
 // CHECK-VECTOR: ret <4 x i8> [[RET]]
 
 v8i8 va_v8i8(__builtin_va_list l) { return __builtin_va_arg(l, v8i8); }
-// CHECK-LABEL: define void @va_v8i8(<8 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_v8i8(<8 x i8>* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -325,7 +325,7 @@ v8i8 va_v8i8(__builtin_va_list l) { return __builtin_va_arg(l, v8i8); }
 // CHECK-VECTOR: ret <8 x i8> [[RET]]
 
 v16i8 va_v16i8(__builtin_va_list l) { return __builtin_va_arg(l, v16i8); }
-// CHECK-LABEL: define void @va_v16i8(<16 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_v16i8(<16 x i8>* noalias sret align 16 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -357,7 +357,7 @@ v16i8 va_v16i8(__builtin_va_list l) { return __builtin_va_arg(l, v16i8); }
 // CHECK-VECTOR: ret <16 x i8> [[RET]]
 
 v32i8 va_v32i8(__builtin_va_list l) { return __builtin_va_arg(l, v32i8); }
-// CHECK-LABEL: define void @va_v32i8(<32 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_v32i8(<32 x i8>* noalias sret align 32 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -379,7 +379,7 @@ v32i8 va_v32i8(__builtin_va_list l) { return __builtin_va_arg(l, v32i8); }
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi <32 x i8>** [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: [[INDIRECT_ARG:%[^ ]+]] = load <32 x i8>*, <32 x i8>** [[VA_ARG_ADDR]]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_v32i8(<32 x i8>* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_v32i8(<32 x i8>* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK-VECTOR: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK-VECTOR: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -403,7 +403,7 @@ v32i8 va_v32i8(__builtin_va_list l) { return __builtin_va_arg(l, v32i8); }
 // CHECK-VECTOR: ret void
 
 struct agg_v1i8 va_agg_v1i8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_v1i8); }
-// CHECK-LABEL: define void @va_agg_v1i8(%struct.agg_v1i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_agg_v1i8(%struct.agg_v1i8* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -424,7 +424,7 @@ struct agg_v1i8 va_agg_v1i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK: store i8* [[OVERFLOW_ARG_AREA2]], i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v1i8* [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_agg_v1i8(%struct.agg_v1i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_agg_v1i8(%struct.agg_v1i8* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 2
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load i8*, i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK-VECTOR: [[MEM_ADDR:%[^ ]+]] = bitcast i8* [[OVERFLOW_ARG_AREA]] to %struct.agg_v1i8*
@@ -433,7 +433,7 @@ struct agg_v1i8 va_agg_v1i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK-VECTOR: ret void
 
 struct agg_v2i8 va_agg_v2i8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_v2i8); }
-// CHECK-LABEL: define void @va_agg_v2i8(%struct.agg_v2i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_agg_v2i8(%struct.agg_v2i8* noalias sret align 2 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -454,7 +454,7 @@ struct agg_v2i8 va_agg_v2i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK: store i8* [[OVERFLOW_ARG_AREA2]], i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v2i8* [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_agg_v2i8(%struct.agg_v2i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_agg_v2i8(%struct.agg_v2i8* noalias sret align 2 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 2
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load i8*, i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK-VECTOR: [[MEM_ADDR:%[^ ]+]] = bitcast i8* [[OVERFLOW_ARG_AREA]] to %struct.agg_v2i8*
@@ -463,7 +463,7 @@ struct agg_v2i8 va_agg_v2i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK-VECTOR: ret void
 
 struct agg_v4i8 va_agg_v4i8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_v4i8); }
-// CHECK-LABEL: define void @va_agg_v4i8(%struct.agg_v4i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_agg_v4i8(%struct.agg_v4i8* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -484,7 +484,7 @@ struct agg_v4i8 va_agg_v4i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK: store i8* [[OVERFLOW_ARG_AREA2]], i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v4i8* [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_agg_v4i8(%struct.agg_v4i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_agg_v4i8(%struct.agg_v4i8* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 2
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load i8*, i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK-VECTOR: [[MEM_ADDR:%[^ ]+]] = bitcast i8* [[OVERFLOW_ARG_AREA]] to %struct.agg_v4i8*
@@ -493,7 +493,7 @@ struct agg_v4i8 va_agg_v4i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK-VECTOR: ret void
 
 struct agg_v8i8 va_agg_v8i8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_v8i8); }
-// CHECK-LABEL: define void @va_agg_v8i8(%struct.agg_v8i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_agg_v8i8(%struct.agg_v8i8* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -514,7 +514,7 @@ struct agg_v8i8 va_agg_v8i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK: store i8* [[OVERFLOW_ARG_AREA2]], i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v8i8* [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_agg_v8i8(%struct.agg_v8i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_agg_v8i8(%struct.agg_v8i8* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 2
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load i8*, i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK-VECTOR: [[MEM_ADDR:%[^ ]+]] = bitcast i8* [[OVERFLOW_ARG_AREA]] to %struct.agg_v8i8*
@@ -523,7 +523,7 @@ struct agg_v8i8 va_agg_v8i8(__builtin_va_list l) { return __builtin_va_arg(l, st
 // CHECK-VECTOR: ret void
 
 struct agg_v16i8 va_agg_v16i8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_v16i8); }
-// CHECK-LABEL: define void @va_agg_v16i8(%struct.agg_v16i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_agg_v16i8(%struct.agg_v16i8* noalias sret align 16 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -545,7 +545,7 @@ struct agg_v16i8 va_agg_v16i8(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v16i8** [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: [[INDIRECT_ARG:%[^ ]+]] = load %struct.agg_v16i8*, %struct.agg_v16i8** [[VA_ARG_ADDR]]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_agg_v16i8(%struct.agg_v16i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_agg_v16i8(%struct.agg_v16i8* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 2
 // CHECK-VECTOR: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load i8*, i8** [[OVERFLOW_ARG_AREA_PTR]]
 // CHECK-VECTOR: [[MEM_ADDR:%[^ ]+]] = bitcast i8* [[OVERFLOW_ARG_AREA]] to %struct.agg_v16i8*
@@ -554,7 +554,7 @@ struct agg_v16i8 va_agg_v16i8(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK-VECTOR: ret void
 
 struct agg_v32i8 va_agg_v32i8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_v32i8); }
-// CHECK-LABEL: define void @va_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_agg_v32i8(%struct.agg_v32i8* noalias sret align 32 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -576,7 +576,7 @@ struct agg_v32i8 va_agg_v32i8(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi %struct.agg_v32i8** [ [[REG_ADDR]], %{{.*}} ], [ [[MEM_ADDR]], %{{.*}} ]
 // CHECK: [[INDIRECT_ARG:%[^ ]+]] = load %struct.agg_v32i8*, %struct.agg_v32i8** [[VA_ARG_ADDR]]
 // CHECK: ret void
-// CHECK-VECTOR-LABEL: define void @va_agg_v32i8(%struct.agg_v32i8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-VECTOR-LABEL: define void @va_agg_v32i8(%struct.agg_v32i8* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK-VECTOR: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK-VECTOR: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK-VECTOR: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5

diff  --git a/clang/test/CodeGen/systemz-abi.c b/clang/test/CodeGen/systemz-abi.c
index c46b9ec19626..35adbbe301c4 100644
--- a/clang/test/CodeGen/systemz-abi.c
+++ b/clang/test/CodeGen/systemz-abi.c
@@ -36,7 +36,7 @@ long long pass_longlong(long long arg) { return arg; }
 // CHECK-LABEL: define i64 @pass_longlong(i64 %{{.*}})
 
 __int128 pass_int128(__int128 arg) { return arg; }
-// CHECK-LABEL: define void @pass_int128(i128* noalias sret %{{.*}}, i128* %0)
+// CHECK-LABEL: define void @pass_int128(i128* noalias sret align 16 %{{.*}}, i128* %0)
 
 float pass_float(float arg) { return arg; }
 // CHECK-LABEL: define float @pass_float(float %{{.*}})
@@ -45,114 +45,114 @@ double pass_double(double arg) { return arg; }
 // CHECK-LABEL: define double @pass_double(double %{{.*}})
 
 long double pass_longdouble(long double arg) { return arg; }
-// CHECK-LABEL: define void @pass_longdouble(fp128* noalias sret %{{.*}}, fp128* %0)
+// CHECK-LABEL: define void @pass_longdouble(fp128* noalias sret align 8 %{{.*}}, fp128* %0)
 
 
 // Complex types
 
 _Complex char pass_complex_char(_Complex char arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_char({ i8, i8 }* noalias sret %{{.*}}, { i8, i8 }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_char({ i8, i8 }* noalias sret align 1 %{{.*}}, { i8, i8 }* %{{.*}}arg)
 
 _Complex short pass_complex_short(_Complex short arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_short({ i16, i16 }* noalias sret %{{.*}}, { i16, i16 }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_short({ i16, i16 }* noalias sret align 2 %{{.*}}, { i16, i16 }* %{{.*}}arg)
 
 _Complex int pass_complex_int(_Complex int arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_int({ i32, i32 }* noalias sret %{{.*}}, { i32, i32 }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_int({ i32, i32 }* noalias sret align 4 %{{.*}}, { i32, i32 }* %{{.*}}arg)
 
 _Complex long pass_complex_long(_Complex long arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_long({ i64, i64 }* noalias sret %{{.*}}, { i64, i64 }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_long({ i64, i64 }* noalias sret align 8 %{{.*}}, { i64, i64 }* %{{.*}}arg)
 
 _Complex long long pass_complex_longlong(_Complex long long arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_longlong({ i64, i64 }* noalias sret %{{.*}}, { i64, i64 }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_longlong({ i64, i64 }* noalias sret align 8 %{{.*}}, { i64, i64 }* %{{.*}}arg)
 
 _Complex float pass_complex_float(_Complex float arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_float({ float, float }* noalias sret %{{.*}}, { float, float }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_float({ float, float }* noalias sret align 4 %{{.*}}, { float, float }* %{{.*}}arg)
 
 _Complex double pass_complex_double(_Complex double arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_double({ double, double }* noalias sret %{{.*}}, { double, double }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_double({ double, double }* noalias sret align 8 %{{.*}}, { double, double }* %{{.*}}arg)
 
 _Complex long double pass_complex_longdouble(_Complex long double arg) { return arg; }
-// CHECK-LABEL: define void @pass_complex_longdouble({ fp128, fp128 }* noalias sret %{{.*}}, { fp128, fp128 }* %{{.*}}arg)
+// CHECK-LABEL: define void @pass_complex_longdouble({ fp128, fp128 }* noalias sret align 8 %{{.*}}, { fp128, fp128 }* %{{.*}}arg)
 
 
 // Aggregate types
 
 struct agg_1byte { char a[1]; };
 struct agg_1byte pass_agg_1byte(struct agg_1byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_1byte(%struct.agg_1byte* noalias sret %{{.*}}, i8 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_1byte(%struct.agg_1byte* noalias sret align 1 %{{.*}}, i8 %{{.*}})
 
 struct agg_2byte { char a[2]; };
 struct agg_2byte pass_agg_2byte(struct agg_2byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_2byte(%struct.agg_2byte* noalias sret %{{.*}}, i16 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_2byte(%struct.agg_2byte* noalias sret align 1 %{{.*}}, i16 %{{.*}})
 
 struct agg_3byte { char a[3]; };
 struct agg_3byte pass_agg_3byte(struct agg_3byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_3byte(%struct.agg_3byte* noalias sret %{{.*}}, %struct.agg_3byte* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_3byte(%struct.agg_3byte* noalias sret align 1 %{{.*}}, %struct.agg_3byte* %{{.*}})
 
 struct agg_4byte { char a[4]; };
 struct agg_4byte pass_agg_4byte(struct agg_4byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_4byte(%struct.agg_4byte* noalias sret %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_4byte(%struct.agg_4byte* noalias sret align 1 %{{.*}}, i32 %{{.*}})
 
 struct agg_5byte { char a[5]; };
 struct agg_5byte pass_agg_5byte(struct agg_5byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_5byte(%struct.agg_5byte* noalias sret %{{.*}}, %struct.agg_5byte* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_5byte(%struct.agg_5byte* noalias sret align 1 %{{.*}}, %struct.agg_5byte* %{{.*}})
 
 struct agg_6byte { char a[6]; };
 struct agg_6byte pass_agg_6byte(struct agg_6byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_6byte(%struct.agg_6byte* noalias sret %{{.*}}, %struct.agg_6byte* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_6byte(%struct.agg_6byte* noalias sret align 1 %{{.*}}, %struct.agg_6byte* %{{.*}})
 
 struct agg_7byte { char a[7]; };
 struct agg_7byte pass_agg_7byte(struct agg_7byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_7byte(%struct.agg_7byte* noalias sret %{{.*}}, %struct.agg_7byte* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_7byte(%struct.agg_7byte* noalias sret align 1 %{{.*}}, %struct.agg_7byte* %{{.*}})
 
 struct agg_8byte { char a[8]; };
 struct agg_8byte pass_agg_8byte(struct agg_8byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_8byte(%struct.agg_8byte* noalias sret %{{.*}}, i64 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_8byte(%struct.agg_8byte* noalias sret align 1 %{{.*}}, i64 %{{.*}})
 
 struct agg_16byte { char a[16]; };
 struct agg_16byte pass_agg_16byte(struct agg_16byte arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_16byte(%struct.agg_16byte* noalias sret %{{.*}}, %struct.agg_16byte* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_16byte(%struct.agg_16byte* noalias sret align 1 %{{.*}}, %struct.agg_16byte* %{{.*}})
 
 
 // Float-like aggregate types
 
 struct agg_float { float a; };
 struct agg_float pass_agg_float(struct agg_float arg) { return arg; }
-// HARD-FLOAT-LABEL: define void @pass_agg_float(%struct.agg_float* noalias sret %{{.*}}, float %{{.*}})
-// SOFT-FLOAT-LABEL: define void @pass_agg_float(%struct.agg_float* noalias sret %{{.*}}, i32 %{{.*}})
+// HARD-FLOAT-LABEL: define void @pass_agg_float(%struct.agg_float* noalias sret align 4 %{{.*}}, float %{{.*}})
+// SOFT-FLOAT-LABEL: define void @pass_agg_float(%struct.agg_float* noalias sret align 4 %{{.*}}, i32 %{{.*}})
 
 struct agg_double { double a; };
 struct agg_double pass_agg_double(struct agg_double arg) { return arg; }
-// HARD-FLOAT-LABEL: define void @pass_agg_double(%struct.agg_double* noalias sret %{{.*}}, double %{{.*}})
-// SOFT-FLOAT-LABEL: define void @pass_agg_double(%struct.agg_double* noalias sret %{{.*}}, i64 %{{.*}})
+// HARD-FLOAT-LABEL: define void @pass_agg_double(%struct.agg_double* noalias sret align 8 %{{.*}}, double %{{.*}})
+// SOFT-FLOAT-LABEL: define void @pass_agg_double(%struct.agg_double* noalias sret align 8 %{{.*}}, i64 %{{.*}})
 
 struct agg_longdouble { long double a; };
 struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_longdouble(%struct.agg_longdouble* noalias sret %{{.*}}, %struct.agg_longdouble* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_longdouble(%struct.agg_longdouble* noalias sret align 8 %{{.*}}, %struct.agg_longdouble* %{{.*}})
 
 struct agg_float_a8 { float a __attribute__((aligned (8))); };
 struct agg_float_a8 pass_agg_float_a8(struct agg_float_a8 arg) { return arg; }
-// HARD-FLOAT-LABEL: define void @pass_agg_float_a8(%struct.agg_float_a8* noalias sret %{{.*}}, double %{{.*}})
-// SOFT-FLOAT-LABEL: define void @pass_agg_float_a8(%struct.agg_float_a8* noalias sret %{{.*}}, i64 %{{.*}})
+// HARD-FLOAT-LABEL: define void @pass_agg_float_a8(%struct.agg_float_a8* noalias sret align 8 %{{.*}}, double %{{.*}})
+// SOFT-FLOAT-LABEL: define void @pass_agg_float_a8(%struct.agg_float_a8* noalias sret align 8 %{{.*}}, i64 %{{.*}})
 
 struct agg_float_a16 { float a __attribute__((aligned (16))); };
 struct agg_float_a16 pass_agg_float_a16(struct agg_float_a16 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_float_a16(%struct.agg_float_a16* noalias sret %{{.*}}, %struct.agg_float_a16* %{{.*}})
+// CHECK-LABEL: define void @pass_agg_float_a16(%struct.agg_float_a16* noalias sret align 16 %{{.*}}, %struct.agg_float_a16* %{{.*}})
 
 
 // Verify that the following are *not* float-like aggregate types
 
 struct agg_nofloat1 { float a; float b; };
 struct agg_nofloat1 pass_agg_nofloat1(struct agg_nofloat1 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_nofloat1(%struct.agg_nofloat1* noalias sret %{{.*}}, i64 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_nofloat1(%struct.agg_nofloat1* noalias sret align 4 %{{.*}}, i64 %{{.*}})
 
 struct agg_nofloat2 { float a; int b; };
 struct agg_nofloat2 pass_agg_nofloat2(struct agg_nofloat2 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_nofloat2(%struct.agg_nofloat2* noalias sret %{{.*}}, i64 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_nofloat2(%struct.agg_nofloat2* noalias sret align 4 %{{.*}}, i64 %{{.*}})
 
 struct agg_nofloat3 { float a; int : 0; };
 struct agg_nofloat3 pass_agg_nofloat3(struct agg_nofloat3 arg) { return arg; }
-// CHECK-LABEL: define void @pass_agg_nofloat3(%struct.agg_nofloat3* noalias sret %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: define void @pass_agg_nofloat3(%struct.agg_nofloat3* noalias sret align 4 %{{.*}}, i32 %{{.*}})
 
 
 // Accessing variable argument lists
@@ -257,7 +257,7 @@ double va_double(__builtin_va_list l) { return __builtin_va_arg(l, double); }
 // CHECK: ret double [[RET]]
 
 long double va_longdouble(__builtin_va_list l) { return __builtin_va_arg(l, long double); }
-// CHECK-LABEL: define void @va_longdouble(fp128* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}})
+// CHECK-LABEL: define void @va_longdouble(fp128* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}})
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -283,7 +283,7 @@ long double va_longdouble(__builtin_va_list l) { return __builtin_va_arg(l, long
 // CHECK: ret void
 
 _Complex char va_complex_char(__builtin_va_list l) { return __builtin_va_arg(l, _Complex char); }
-// CHECK-LABEL: define void @va_complex_char({ i8, i8 }* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_complex_char({ i8, i8 }* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -307,7 +307,7 @@ _Complex char va_complex_char(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_1byte va_agg_1byte(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_1byte); }
-// CHECK-LABEL: define void @va_agg_1byte(%struct.agg_1byte* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_1byte(%struct.agg_1byte* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -330,7 +330,7 @@ struct agg_1byte va_agg_1byte(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_2byte va_agg_2byte(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_2byte); }
-// CHECK-LABEL: define void @va_agg_2byte(%struct.agg_2byte* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_2byte(%struct.agg_2byte* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -353,7 +353,7 @@ struct agg_2byte va_agg_2byte(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_3byte va_agg_3byte(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_3byte); }
-// CHECK-LABEL: define void @va_agg_3byte(%struct.agg_3byte* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_3byte(%struct.agg_3byte* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -377,7 +377,7 @@ struct agg_3byte va_agg_3byte(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_4byte va_agg_4byte(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_4byte); }
-// CHECK-LABEL: define void @va_agg_4byte(%struct.agg_4byte* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_4byte(%struct.agg_4byte* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -400,7 +400,7 @@ struct agg_4byte va_agg_4byte(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_8byte); }
-// CHECK-LABEL: define void @va_agg_8byte(%struct.agg_8byte* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_8byte(%struct.agg_8byte* noalias sret align 1 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -423,7 +423,7 @@ struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_float va_agg_float(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_float); }
-// CHECK-LABEL: define void @va_agg_float(%struct.agg_float* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_float(%struct.agg_float* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 1
 // SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
@@ -449,7 +449,7 @@ struct agg_float va_agg_float(__builtin_va_list l) { return __builtin_va_arg(l,
 // CHECK: ret void
 
 struct agg_double va_agg_double(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_double); }
-// CHECK-LABEL: define void @va_agg_double(%struct.agg_double* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_double(%struct.agg_double* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 1
 // SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
@@ -475,7 +475,7 @@ struct agg_double va_agg_double(__builtin_va_list l) { return __builtin_va_arg(l
 // CHECK: ret void
 
 struct agg_longdouble va_agg_longdouble(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_longdouble); }
-// CHECK-LABEL: define void @va_agg_longdouble(%struct.agg_longdouble* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_longdouble(%struct.agg_longdouble* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -499,7 +499,7 @@ struct agg_longdouble va_agg_longdouble(__builtin_va_list l) { return __builtin_
 // CHECK: ret void
 
 struct agg_float_a8 va_agg_float_a8(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_float_a8); }
-// CHECK-LABEL: define void @va_agg_float_a8(%struct.agg_float_a8* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_float_a8(%struct.agg_float_a8* noalias sret align 8 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 1
 // SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
@@ -525,7 +525,7 @@ struct agg_float_a8 va_agg_float_a8(__builtin_va_list l) { return __builtin_va_a
 // CHECK: ret void
 
 struct agg_float_a16 va_agg_float_a16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_float_a16); }
-// CHECK-LABEL: define void @va_agg_float_a16(%struct.agg_float_a16* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_float_a16(%struct.agg_float_a16* noalias sret align 16 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -549,7 +549,7 @@ struct agg_float_a16 va_agg_float_a16(__builtin_va_list l) { return __builtin_va
 // CHECK: ret void
 
 struct agg_nofloat1 va_agg_nofloat1(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_nofloat1); }
-// CHECK-LABEL: define void @va_agg_nofloat1(%struct.agg_nofloat1* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_nofloat1(%struct.agg_nofloat1* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -572,7 +572,7 @@ struct agg_nofloat1 va_agg_nofloat1(__builtin_va_list l) { return __builtin_va_a
 // CHECK: ret void
 
 struct agg_nofloat2 va_agg_nofloat2(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_nofloat2); }
-// CHECK-LABEL: define void @va_agg_nofloat2(%struct.agg_nofloat2* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_nofloat2(%struct.agg_nofloat2* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
@@ -595,7 +595,7 @@ struct agg_nofloat2 va_agg_nofloat2(__builtin_va_list l) { return __builtin_va_a
 // CHECK: ret void
 
 struct agg_nofloat3 va_agg_nofloat3(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_nofloat3); }
-// CHECK-LABEL: define void @va_agg_nofloat3(%struct.agg_nofloat3* noalias sret %{{.*}}, %struct.__va_list_tag* %{{.*}}
+// CHECK-LABEL: define void @va_agg_nofloat3(%struct.agg_nofloat3* noalias sret align 4 %{{.*}}, %struct.__va_list_tag* %{{.*}}
 // CHECK: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %{{.*}}, i32 0, i32 0
 // CHECK: [[REG_COUNT:%[^ ]+]] = load i64, i64* [[REG_COUNT_PTR]]
 // CHECK: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5

diff  --git a/clang/test/CodeGen/systemz-abi.cpp b/clang/test/CodeGen/systemz-abi.cpp
index d3088f5299c3..cb381e88dd8f 100644
--- a/clang/test/CodeGen/systemz-abi.cpp
+++ b/clang/test/CodeGen/systemz-abi.cpp
@@ -7,5 +7,5 @@
 
 struct agg_float_cpp { float a; int : 0; };
 struct agg_float_cpp pass_agg_float_cpp(struct agg_float_cpp arg) { return arg; }
-// CHECK-LABEL: define void @_Z18pass_agg_float_cpp13agg_float_cpp(%struct.agg_float_cpp* noalias sret %{{.*}}, float %{{.*}})
-// SOFT-FLOAT:  define void @_Z18pass_agg_float_cpp13agg_float_cpp(%struct.agg_float_cpp* noalias sret %{{.*}}, i32 %{{.*}})
+// CHECK-LABEL: define void @_Z18pass_agg_float_cpp13agg_float_cpp(%struct.agg_float_cpp* noalias sret align 4 %{{.*}}, float %{{.*}})
+// SOFT-FLOAT:  define void @_Z18pass_agg_float_cpp13agg_float_cpp(%struct.agg_float_cpp* noalias sret align 4 %{{.*}}, i32 %{{.*}})

diff  --git a/clang/test/CodeGen/systemz-inline-asm.c b/clang/test/CodeGen/systemz-inline-asm.c
index 7c273dac579e..2dc5023c55cb 100644
--- a/clang/test/CodeGen/systemz-inline-asm.c
+++ b/clang/test/CodeGen/systemz-inline-asm.c
@@ -123,7 +123,7 @@ double test_f64(double f, double g) {
 long double test_f128(long double f, long double g) {
   asm("axbr %0, %2" : "=f" (f) : "0" (f), "f" (g));
   return f;
-// CHECK: define void @test_f128(fp128* noalias nocapture sret [[DEST:%.*]], fp128* nocapture readonly %0, fp128* nocapture readonly %1)
+// CHECK: define void @test_f128(fp128* noalias nocapture sret align 8 [[DEST:%.*]], fp128* nocapture readonly %0, fp128* nocapture readonly %1)
 // CHECK: %f = load fp128, fp128* %0
 // CHECK: %g = load fp128, fp128* %1
 // CHECK: [[RESULT:%.*]] = tail call fp128 asm "axbr $0, $2", "=f,0,f"(fp128 %f, fp128 %g)

diff  --git a/clang/test/CodeGen/vectorcall.c b/clang/test/CodeGen/vectorcall.c
index 77db600a7f89..5e052990498f 100644
--- a/clang/test/CodeGen/vectorcall.c
+++ b/clang/test/CodeGen/vectorcall.c
@@ -86,8 +86,8 @@ struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;}
 // X64: define dso_local x86_vectorcallcc %struct.HVA4 @"\01hva6@@128"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b)
 
 struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;}
-// X32: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret %agg.result)
-// X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result)
+// X32: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* inreg noalias sret align 16 %agg.result)
+// X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret align 16 %agg.result)
 
 v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;}
 // X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> inreg %a, <4 x float> inreg %b, <4 x float> inreg %c, <4 x float> inreg %d, i32 inreg %e, <4 x float> inreg %f)

diff  --git a/clang/test/CodeGen/wasm-arguments.c b/clang/test/CodeGen/wasm-arguments.c
index 25978d8a0990..2f9d7e4b3eca 100644
--- a/clang/test/CodeGen/wasm-arguments.c
+++ b/clang/test/CodeGen/wasm-arguments.c
@@ -25,9 +25,9 @@ typedef struct {
 void struct_arg(s1 i) {}
 
 // Structs should be returned sret and not simplified by the frontend.
-// WEBASSEMBLY32: define void @struct_ret(%struct.s1* noalias sret %agg.result)
+// WEBASSEMBLY32: define void @struct_ret(%struct.s1* noalias sret align 4 %agg.result)
 // WEBASSEMBLY32: ret void
-// WEBASSEMBLY64: define void @struct_ret(%struct.s1* noalias sret %agg.result)
+// WEBASSEMBLY64: define void @struct_ret(%struct.s1* noalias sret align 4 %agg.result)
 // WEBASSEMBLY64: ret void
 
 // Except with the experimental multivalue ABI, which returns structs by value
@@ -103,9 +103,9 @@ union simple_union {
 void union_arg(union simple_union s) {}
 
 // Unions should be returned sret and not simplified by the frontend.
-// WEBASSEMBLY32: define void @union_ret(%union.simple_union* noalias sret %agg.result)
+// WEBASSEMBLY32: define void @union_ret(%union.simple_union* noalias sret align 4 %agg.result)
 // WEBASSEMBLY32: ret void
-// WEBASSEMBLY64: define void @union_ret(%union.simple_union* noalias sret %agg.result)
+// WEBASSEMBLY64: define void @union_ret(%union.simple_union* noalias sret align 4 %agg.result)
 // WEBASSEMBLY64: ret void
 
 // The experimental multivalue ABI returns them by value, though.
@@ -129,8 +129,8 @@ typedef struct {
 void bitfield_arg(bitfield1 bf1) {}
 
 // And returned via sret pointers.
-// WEBASSEMBLY32: define void @bitfield_ret(%struct.bitfield1* noalias sret %agg.result)
-// WEBASSEMBLY64: define void @bitfield_ret(%struct.bitfield1* noalias sret %agg.result)
+// WEBASSEMBLY32: define void @bitfield_ret(%struct.bitfield1* noalias sret align 4 %agg.result)
+// WEBASSEMBLY64: define void @bitfield_ret(%struct.bitfield1* noalias sret align 4 %agg.result)
 
 // Except, of course, in the experimental multivalue ABI
 // EXPERIMENTAL-MV: define %struct.bitfield1 @bitfield_ret()

diff  --git a/clang/test/CodeGen/wasm-varargs.c b/clang/test/CodeGen/wasm-varargs.c
index 23506875ac9d..ba1f2d632b4e 100644
--- a/clang/test/CodeGen/wasm-varargs.c
+++ b/clang/test/CodeGen/wasm-varargs.c
@@ -80,7 +80,7 @@ struct S test_struct(char *fmt, ...) {
   return v;
 }
 
-// CHECK:      define void @test_struct([[STRUCT_S:%[^,=]+]]*{{.*}} noalias sret [[AGG_RESULT:%.*]], i8*{{.*}} %fmt, ...) {{.*}} {
+// CHECK:      define void @test_struct([[STRUCT_S:%[^,=]+]]*{{.*}} noalias sret align 4 [[AGG_RESULT:%.*]], i8*{{.*}} %fmt, ...) {{.*}} {
 // CHECK:        [[FMT_ADDR:%[^,=]+]] = alloca i8*, align 4
 // CHECK-NEXT:   [[VA:%[^,=]+]] = alloca i8*, align 4
 // CHECK-NEXT:   store i8* %fmt, i8** [[FMT_ADDR]], align 4
@@ -112,7 +112,7 @@ struct S test_empty_struct(char *fmt, ...) {
   return v;
 }
 
-// CHECK:      define void @test_empty_struct([[STRUCT_S:%[^,=]+]]*{{.*}} noalias sret [[AGG_RESULT:%.*]], i8*{{.*}} %fmt, ...) {{.*}} {
+// CHECK:      define void @test_empty_struct([[STRUCT_S:%[^,=]+]]*{{.*}} noalias sret align 4 [[AGG_RESULT:%.*]], i8*{{.*}} %fmt, ...) {{.*}} {
 // CHECK:        [[FMT_ADDR:%[^,=]+]] = alloca i8*, align 4
 // CHECK-NEXT:   [[VA:%[^,=]+]] = alloca i8*, align 4
 // CHECK-NEXT:   [[U:%[^,=]+]] = alloca [[STRUCT_Z:%[^,=]+]], align 1

diff  --git a/clang/test/CodeGen/windows-struct-abi.c b/clang/test/CodeGen/windows-struct-abi.c
index 5ffc4fad6473..9fa175f13658 100644
--- a/clang/test/CodeGen/windows-struct-abi.c
+++ b/clang/test/CodeGen/windows-struct-abi.c
@@ -34,7 +34,7 @@ struct f4 {
 
 struct f4 return_f4(void) { while (1); }
 
-// CHECK: define dso_local void @return_f4(%struct.f4* noalias sret %agg.result)
+// CHECK: define dso_local void @return_f4(%struct.f4* noalias sret align 4 %agg.result)
 
 void receive_f4(struct f4 a0) { }
 

diff  --git a/clang/test/CodeGen/x86_32-arguments-darwin.c b/clang/test/CodeGen/x86_32-arguments-darwin.c
index 71b8a2b9fc84..c88c1b8603b6 100644
--- a/clang/test/CodeGen/x86_32-arguments-darwin.c
+++ b/clang/test/CodeGen/x86_32-arguments-darwin.c
@@ -71,7 +71,7 @@ struct s10 {
 // Small vectors and 1 x {i64,double} are returned in registers
 
 // CHECK: i32 @f11()
-// CHECK: void @f12(<2 x i32>* noalias sret %agg.result)
+// CHECK: void @f12(<2 x i32>* noalias sret align 8 %agg.result)
 // CHECK: i64 @f13()
 // CHECK: i64 @f14()
 // CHECK: <2 x i64> @f15()
@@ -93,11 +93,11 @@ T16 f16(void) { while (1) {} }
 // 128-bits).
 
 // CHECK: i32 @f17()
-// CHECK: void @f18(%{{.*}}* noalias sret %agg.result)
-// CHECK: void @f19(%{{.*}}* noalias sret %agg.result)
-// CHECK: void @f20(%{{.*}}* noalias sret %agg.result)
-// CHECK: void @f21(%{{.*}}* noalias sret %agg.result)
-// CHECK: void @f22(%{{.*}}* noalias sret %agg.result)
+// CHECK: void @f18(%{{.*}}* noalias sret align 8 %agg.result)
+// CHECK: void @f19(%{{.*}}* noalias sret align 8 %agg.result)
+// CHECK: void @f20(%{{.*}}* noalias sret align 8 %agg.result)
+// CHECK: void @f21(%{{.*}}* noalias sret align 16 %agg.result)
+// CHECK: void @f22(%{{.*}}* noalias sret align 16 %agg.result)
 struct { T11 a; } f17(void) { while (1) {} }
 struct { T12 a; } f18(void) { while (1) {} }
 struct { T13 a; } f19(void) { while (1) {} }
@@ -116,11 +116,11 @@ struct { struct {} a; struct { float a[1]; } b; } f25(void) { while (1) {} }
 
 // Small structures are handled recursively
 // CHECK: i32 @f26()
-// CHECK: void @f27(%struct.s27* noalias sret %agg.result)
+// CHECK: void @f27(%struct.s27* noalias sret align 1 %agg.result)
 struct s26 { struct { char a, b; } a; struct { char a, b; } b; } f26(void) { while (1) {} }
 struct s27 { struct { char a, b, c; } a; struct { char a; } b; } f27(void) { while (1) {} }
 
-// CHECK: void @f28(%struct.s28* noalias sret %agg.result)
+// CHECK: void @f28(%struct.s28* noalias sret align 4 %agg.result)
 struct s28 { int a; int b[]; } f28(void) { while (1) {} }
 
 // CHECK-LABEL: define i16 @f29()
@@ -150,7 +150,7 @@ struct s36 { struct { int : 0; } a[2][10]; char b; char c; } f36(void) { while (
 // CHECK-LABEL: define float @f37()
 struct s37 { float c[1][1]; } f37(void) { while (1) {} }
 
-// CHECK-LABEL: define void @f38(%struct.s38* noalias sret %agg.result)
+// CHECK-LABEL: define void @f38(%struct.s38* noalias sret align 2 %agg.result)
 struct s38 { char a[3]; short b; } f38(void) { while (1) {} }
 
 // CHECK-LABEL: define void @f39(%struct.s39* byval(%struct.s39) align 16 %x)

diff  --git a/clang/test/CodeGen/x86_32-arguments-iamcu.c b/clang/test/CodeGen/x86_32-arguments-iamcu.c
index e391c711ea10..a134f5d84a77 100644
--- a/clang/test/CodeGen/x86_32-arguments-iamcu.c
+++ b/clang/test/CodeGen/x86_32-arguments-iamcu.c
@@ -58,7 +58,7 @@ st4_t retSmallStruct(st4_t r) { return r; }
 // CHECK-LABEL: define i64 @retPaddedStruct(i32 %r.coerce0, i32 %r.coerce1)
 st5_t retPaddedStruct(st5_t r) { return r; }
 
-// CHECK-LABEL: define void @retLargeStruct(%struct.st12_t* noalias sret %agg.result, i32 %i1, %struct.st12_t* byval(%struct.st12_t) align 4 %r)
+// CHECK-LABEL: define void @retLargeStruct(%struct.st12_t* noalias sret align 4 %agg.result, i32 %i1, %struct.st12_t* byval(%struct.st12_t) align 4 %r)
 st12_t retLargeStruct(int i1, st12_t r) { return r; }
 
 // CHECK-LABEL: define i32 @varArgs(i32 %i1, ...)

diff  --git a/clang/test/CodeGen/x86_64-arguments-nacl.c b/clang/test/CodeGen/x86_64-arguments-nacl.c
index ea4483422dfe..e7287a90765b 100644
--- a/clang/test/CodeGen/x86_64-arguments-nacl.c
+++ b/clang/test/CodeGen/x86_64-arguments-nacl.c
@@ -61,7 +61,7 @@ void f12_1(struct s12 a0) {}
 
 // Check that sret parameter is accounted for when checking available integer
 // registers.
-// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
+// CHECK: define void @f13(%struct.s13_0* noalias sret align 8 %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
 
 struct s13_0 { long long f0[3]; };
 struct s13_1 { long long f0[2]; };

diff  --git a/clang/test/CodeGen/x86_64-arguments-win32.c b/clang/test/CodeGen/x86_64-arguments-win32.c
index b43107c65ef6..4f7c4ded4b16 100644
--- a/clang/test/CodeGen/x86_64-arguments-win32.c
+++ b/clang/test/CodeGen/x86_64-arguments-win32.c
@@ -27,5 +27,5 @@ void f6(_Complex double a) {}
 // CHECK-LABEL: define dso_local i64 @f7()
 _Complex float f7() { return 1.0; }
 
-// CHECK-LABEL: define dso_local void @f8({ double, double }* noalias sret %agg.result)
+// CHECK-LABEL: define dso_local void @f8({ double, double }* noalias sret align 8 %agg.result)
 _Complex double f8() { return 1.0; }

diff  --git a/clang/test/CodeGen/x86_64-arguments.c b/clang/test/CodeGen/x86_64-arguments.c
index 107571d8140b..273b2706f10a 100644
--- a/clang/test/CodeGen/x86_64-arguments.c
+++ b/clang/test/CodeGen/x86_64-arguments.c
@@ -47,7 +47,7 @@ void f7(e7 a0) {
 
 // Test merging/passing of upper eightbyte with X87 class.
 //
-// CHECK-LABEL: define void @f8_1(%union.u8* noalias sret %agg.result)
+// CHECK-LABEL: define void @f8_1(%union.u8* noalias sret align 16 %agg.result)
 // CHECK-LABEL: define void @f8_2(%union.u8* byval(%union.u8) align 16 %a0)
 union u8 {
   long double a;
@@ -63,7 +63,7 @@ struct s9 { int a; int b; int : 0; } f9(void) { while (1) {} }
 struct s10 { int a; int b; int : 0; };
 void f10(struct s10 a0) {}
 
-// CHECK-LABEL: define void @f11(%union.anon* noalias sret %agg.result)
+// CHECK-LABEL: define void @f11(%union.anon* noalias sret align 16 %agg.result)
 union { long double a; float b; } f11() { while (1) {} }
 
 // CHECK-LABEL: define i32 @f12_0()
@@ -74,7 +74,7 @@ void f12_1(struct s12 a0) {}
 
 // Check that sret parameter is accounted for when checking available integer
 // registers.
-// CHECK: define void @f13(%struct.s13_0* noalias sret %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
+// CHECK: define void @f13(%struct.s13_0* noalias sret align 8 %agg.result, i32 %a, i32 %b, i32 %c, i32 %d, {{.*}}* byval({{.*}}) align 8 %e, i32 %f)
 
 struct s13_0 { long long f0[3]; };
 struct s13_1 { long long f0[2]; };

diff  --git a/clang/test/CodeGenCXX/arm-cc.cpp b/clang/test/CodeGenCXX/arm-cc.cpp
index 6027746b9ae8..e738cd31fb54 100644
--- a/clang/test/CodeGenCXX/arm-cc.cpp
+++ b/clang/test/CodeGenCXX/arm-cc.cpp
@@ -16,5 +16,5 @@ void baz() {
   zed(a);
 }
 
-// CHECK: declare void @_Z3fooPv(%class.SMLoc* sret, i8*)
+// CHECK: declare void @_Z3fooPv(%class.SMLoc* sret align 4, i8*)
 // CHECK: declare void @_Z3zed5SMLoc(%class.SMLoc*)

diff  --git a/clang/test/CodeGenCXX/builtin-source-location.cpp b/clang/test/CodeGenCXX/builtin-source-location.cpp
index f8bfd7d940b9..cdc896209c85 100644
--- a/clang/test/CodeGenCXX/builtin-source-location.cpp
+++ b/clang/test/CodeGenCXX/builtin-source-location.cpp
@@ -65,7 +65,7 @@ SL const_init_global = SL::current();
 //
 // CHECK-GLOBAL-TWO: define internal void @__cxx_global_var_init()
 // CHECK-GLOBAL-TWO-NOT: ret
-// CHECK-GLOBAL-TWO: call void @_ZN15source_location11bad_currentEjjPKcS1_(%struct.source_location* sret @runtime_init_global,
+// CHECK-GLOBAL-TWO: call void @_ZN15source_location11bad_currentEjjPKcS1_(%struct.source_location* sret align 8 @runtime_init_global,
 // CHECK-GLOBAL-TWO-SAME: i32 1100, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
 #line 1100 "test_runtime_init.cpp"
 SL runtime_init_global = SL::bad_current();
@@ -77,7 +77,7 @@ extern "C" void test_function() {
 // CHECK-LOCAL-ONE-DAG: @[[FILE:.*]] = {{.*}}c"test_current.cpp\00"
 // CHECK-LOCAL-ONE-DAG: @[[FUNC:.*]] = {{.*}}c"test_function\00"
 //
-// CHECK-LOCAL-ONE:  call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %local,
+// CHECK-LOCAL-ONE:  call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %local,
 // CHECK-LOCAL-ONE-SAME: i32 2100, i32 {{[0-9]+}},
 // CHECK-LOCAL-ONE-SAME: {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
 #line 2100 "test_current.cpp"
@@ -102,7 +102,7 @@ struct TestInit {
 // CHECK-CTOR-GLOBAL: define internal void @__cxx_global_var_init.{{[0-9]+}}()
 // CHECK-CTOR-GLOBAL-NOT: ret
 //
-// CHECK-CTOR-GLOBAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP_ONE:[^,]*]],
+// CHECK-CTOR-GLOBAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[TMP_ONE:[^,]*]],
 // CHECK-CTOR-GLOBAL-SAME: i32 3400, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
 // CHECK-CTOR-GLOBAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* @GlobalInitVal, %struct.source_location* {{.*}}%[[TMP_ONE]])
 #line 3400 "GlobalInitVal.cpp"
@@ -117,7 +117,7 @@ extern "C" void test_init_function() {
 // CHECK-CTOR-LOCAL: define void @test_init_function()
 // CHECK-CTOR-LOCAL-NOT: ret
 //
-// CHECK-CTOR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
+// CHECK-CTOR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[TMP:[^,]*]],
 // CHECK-CTOR-LOCAL-SAME: i32 3500, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]],
 // CHECK-CTOR-LOCAL-NEXT: call void @_ZN8TestInitC1E15source_location(%struct.TestInit* %init_local, %struct.source_location* {{.*}}%[[TMP]])
 #line 3500 "LocalInitVal.cpp"
@@ -153,7 +153,7 @@ extern "C" void test_init_function_constexpr() {
 // CHECK-CONSTEXPR-LOCAL-DAG: @[[FILE:.*]] = {{.*}}c"ConstexprLocal.cpp\00"
 //
 // CHECK-CONSTEXPR-LOCAL: define void @test_init_function_constexpr()
-// CHECK-CONSTEXPR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
+// CHECK-CONSTEXPR-LOCAL: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[TMP:[^,]*]],
 // CHECK-CONSTEXPR-LOCAL-SAME: i32 4600, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]]
 // CHECK-CONSTEXPR-LOCAL: call void @_ZN17TestInitConstexprC1E15source_location(%struct.TestInitConstexpr* %local_val, {{.*}}%[[TMP]])
 #line 4600 "ConstexprLocal.cpp"
@@ -189,7 +189,7 @@ extern "C" void test_agg_init() {
 //
 // CHECK-AGG-BRACE: define void @test_agg_init()
 // CHECK-AGG-BRACE: %[[I2:.*]] = getelementptr inbounds %struct.TestInitAgg, %struct.TestInitAgg* %local_brace_init, i32 0, i32 1
-// CHECK-AGG-BRACE-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[I2]],
+// CHECK-AGG-BRACE-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[I2]],
 // CHECK-AGG-BRACE-SAME: i32 5700, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]]
 #line 5600 "BraceInitStart.cpp"
   TestInitAgg local_brace_init{
@@ -203,7 +203,7 @@ extern "C" void test_agg_init() {
 //
 // CHECK-AGG-EQUAL: define void @test_agg_init()
 // CHECK-AGG-EQUAL: %[[I2:.*]] = getelementptr inbounds %struct.TestInitAgg, %struct.TestInitAgg* %local_equal_init, i32 0, i32 1
-// CHECK-AGG-EQUAL-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[I2]],
+// CHECK-AGG-EQUAL-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[I2]],
 // CHECK-AGG-EQUAL-SAME: i32 5900, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]]
 #line 5800 "EqualInitStart.cpp"
   TestInitAgg local_equal_init =
@@ -220,11 +220,11 @@ extern "C" void test_agg_init() {
 // CHECK-AGG-LIST: define void @test_agg_init()
 //
 // CHECK-AGG-LIST: %[[I1:.*]] =  getelementptr inbounds %struct.TestInitAgg, %struct.TestInitAgg* %local_list_init, i32 0, i32 0
-// CHECK-AGG-LIST-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[I1]],
+// CHECK-AGG-LIST-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[I1]],
 // CHECK-AGG-LIST-SAME: i32 6100, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE_ELEM]], {{[^@]*}}@[[FUNC]]
 //
 // CHECK-AGG-LIST: %[[I2:.*]] = getelementptr inbounds %struct.TestInitAgg, %struct.TestInitAgg* %local_list_init, i32 0, i32 1
-// CHECK-AGG-LIST-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[I2]],
+// CHECK-AGG-LIST-NEXT: call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[I2]],
 // CHECK-AGG-LIST-SAME: i32 6200, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE_DEFAULT]], {{[^@]*}}@[[FUNC]]
 #line 6000 "InitListStart.cpp"
   TestInitAgg local_list_init =
@@ -258,7 +258,7 @@ void test_template() {
 // CHECK-TEMPL-NEXT: entry:
 // CHECK-TEMPL-NOT: ret
 //
-// CHECK-TEMPL:  call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret %[[TMP:[^,]*]],
+// CHECK-TEMPL:  call void @_ZN15source_location7currentEjjPKcS1_(%struct.source_location* sret align 8 %[[TMP:[^,]*]],
 // CHECK-TEMPL-SAME: i32 7300, i32 {{[0-9]+}}, {{[^@]*}}@[[FILE]], {{[^@]*}}@[[FUNC]]
 #line 7300 "local_templ.cpp"
   TestTemplate<T, V> local_templ;

diff  --git a/clang/test/CodeGenCXX/call-with-static-chain.cpp b/clang/test/CodeGenCXX/call-with-static-chain.cpp
index ac1149b52edd..17e676433e1a 100644
--- a/clang/test/CodeGenCXX/call-with-static-chain.cpp
+++ b/clang/test/CodeGenCXX/call-with-static-chain.cpp
@@ -25,8 +25,8 @@ void test() {
   // CHECK64: call i32 bitcast (i32 (i64, i64, i64, i64, i64, i64, %struct.A*)* @f1 to i32 (i8*, i64, i64, i64, i64, i64, i64, %struct.A*)*)(i8* nest bitcast (i32 (i64, i64, i64, i64, i64, i64, %struct.A*)* @f1 to i8*)
   __builtin_call_with_static_chain(f1(a, a, a, a), f1);
 
-  // CHECK32: call void bitcast (void (%struct.B*)* @f2 to void (%struct.B*, i8*)*)(%struct.B* sret %{{[0-9a-z]+}}, i8* nest bitcast (void (%struct.B*)* @f2 to i8*))
-  // CHECK64: call void bitcast (void (%struct.B*)* @f2 to void (%struct.B*, i8*)*)(%struct.B* sret %{{[0-9a-z]+}}, i8* nest bitcast (void (%struct.B*)* @f2 to i8*))
+  // CHECK32: call void bitcast (void (%struct.B*)* @f2 to void (%struct.B*, i8*)*)(%struct.B* sret align 4 %{{[0-9a-z]+}}, i8* nest bitcast (void (%struct.B*)* @f2 to i8*))
+  // CHECK64: call void bitcast (void (%struct.B*)* @f2 to void (%struct.B*, i8*)*)(%struct.B* sret align 8 %{{[0-9a-z]+}}, i8* nest bitcast (void (%struct.B*)* @f2 to i8*))
   __builtin_call_with_static_chain(f2(), f2);
 
   // CHECK32: call i64 bitcast (i64 ()* @f3 to i64 (i8*)*)(i8* nest bitcast (i64 ()* @f3 to i8*))

diff  --git a/clang/test/CodeGenCXX/conditional-gnu-ext.cpp b/clang/test/CodeGenCXX/conditional-gnu-ext.cpp
index 613dd65ee7c9..ec6d09799418 100644
--- a/clang/test/CodeGenCXX/conditional-gnu-ext.cpp
+++ b/clang/test/CodeGenCXX/conditional-gnu-ext.cpp
@@ -94,7 +94,7 @@ namespace test3 {
   B test1() {
     // CHECK-LABEL:    define void @_ZN5test35test1Ev(
     // CHECK:      [[TEMP:%.*]] = alloca [[B]],
-    // CHECK:      call  void @_ZN5test312test1_helperEv([[B]]* sret [[TEMP]])
+    // CHECK:      call  void @_ZN5test312test1_helperEv([[B]]* sret align 1 [[TEMP]])
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[TEMP]])
     // CHECK-NEXT: br i1 [[BOOL]]
     // CHECK:      call void @_ZN5test31BC1ERKS0_([[B]]* [[RESULT:%.*]], [[B]]* dereferenceable({{[0-9]+}}) [[TEMP]])
@@ -115,7 +115,7 @@ namespace test3 {
     // CHECK-NEXT: [[T0:%.*]] = load [[B]]*, [[B]]** [[X]]
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[T0]])
     // CHECK-NEXT: br i1 [[BOOL]]
-    // CHECK:      call void @_ZN5test31BcvNS_1AEEv([[A:%.*]]* sret [[RESULT:%.*]], [[B]]* [[T0]])
+    // CHECK:      call void @_ZN5test31BcvNS_1AEEv([[A:%.*]]* sret align 1 [[RESULT:%.*]], [[B]]* [[T0]])
     // CHECK-NEXT: br label
     // CHECK:      call void @_ZN5test31AC1Ev([[A]]* [[RESULT]])
     // CHECK-NEXT: br label
@@ -126,10 +126,10 @@ namespace test3 {
   A test3() {
     // CHECK-LABEL:    define void @_ZN5test35test3Ev(
     // CHECK:      [[TEMP:%.*]] = alloca [[B]],
-    // CHECK:      call  void @_ZN5test312test3_helperEv([[B]]* sret [[TEMP]])
+    // CHECK:      call  void @_ZN5test312test3_helperEv([[B]]* sret align 1 [[TEMP]])
     // CHECK-NEXT: [[BOOL:%.*]] = call zeroext i1 @_ZN5test31BcvbEv([[B]]* [[TEMP]])
     // CHECK-NEXT: br i1 [[BOOL]]
-    // CHECK:      call void @_ZN5test31BcvNS_1AEEv([[A]]* sret [[RESULT:%.*]], [[B]]* [[TEMP]])
+    // CHECK:      call void @_ZN5test31BcvNS_1AEEv([[A]]* sret align 1 [[RESULT:%.*]], [[B]]* [[TEMP]])
     // CHECK-NEXT: br label
     // CHECK:      call void @_ZN5test31AC1Ev([[A]]* [[RESULT]])
     // CHECK-NEXT: br label

diff  --git a/clang/test/CodeGenCXX/cxx1z-copy-omission.cpp b/clang/test/CodeGenCXX/cxx1z-copy-omission.cpp
index b33a21808175..dd821949772a 100644
--- a/clang/test/CodeGenCXX/cxx1z-copy-omission.cpp
+++ b/clang/test/CodeGenCXX/cxx1z-copy-omission.cpp
@@ -19,7 +19,7 @@ void g() {
   // CHECK: %[[A:.*]] = alloca
   // CHECK-NOT: alloca
   // CHECK-NOT: call
-  // CHECK: call {{.*}} @_Z1fv({{.*}}* sret %[[A]])
+  // CHECK: call {{.*}} @_Z1fv({{.*}}* sret align 4 %[[A]])
   A a = A( A{ f() } );
   // CHECK-NOT: call
 
@@ -40,7 +40,7 @@ void h() {
   // CHECK-NOT: alloca
   // CHECK-NOT: call
 
-  // CHECK: call {{.*}} @_Z1fv({{.*}}* sret %[[A]])
+  // CHECK: call {{.*}} @_Z1fv({{.*}}* sret align 4 %[[A]])
   // CHECK-NOT: call
   // CHECK: call {{.*}} @_Z1f1A({{.*}}* %[[A]])
   f(f());

diff  --git a/clang/test/CodeGenCXX/cxx1z-lambda-star-this.cpp b/clang/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
index 114791c6558b..fc13c197076f 100644
--- a/clang/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
+++ b/clang/test/CodeGenCXX/cxx1z-lambda-star-this.cpp
@@ -10,7 +10,7 @@ namespace ns1 {
 int X = A{}.foo()();
 } //end ns1
 
-//CHECK: @"?foo at A@@QAE?A?<auto>@@XZ"(%struct.A* %this, %class.anon* noalias sret %[[A_LAMBDA_RETVAL:.*]])
+//CHECK: @"?foo at A@@QAE?A?<auto>@@XZ"(%struct.A* %this, %class.anon* noalias sret align 8 %[[A_LAMBDA_RETVAL:.*]])
 // get the first object with the closure type, which is of type 'struct.A'
 //CHECK: %[[I0:.+]] = getelementptr inbounds %[[A_LAMBDA]], %[[A_LAMBDA]]* %[[A_LAMBDA_RETVAL]], i32 0, i32 0
 //CHECK: %[[I1:.+]] = bitcast %struct.A* %[[I0]] to i8*
@@ -26,6 +26,6 @@ struct B {
 namespace ns2 {
 int X = B{}.bar()();
 }
-//CHECK: @"?bar at B@@QAE?A?<auto>@@XZ"(%struct.B* %this, %class.anon.0* noalias sret %agg.result)
+//CHECK: @"?bar at B@@QAE?A?<auto>@@XZ"(%struct.B* %this, %class.anon.0* noalias sret align 4 %agg.result)
 //CHECK: %[[I20:.+]] = getelementptr inbounds %class.anon.0, %class.anon.0* %agg.result, i32 0, i32 0
 //CHECK: store %struct.B* %this1, %struct.B** %[[I20]], align 4

diff  --git a/clang/test/CodeGenCXX/exceptions.cpp b/clang/test/CodeGenCXX/exceptions.cpp
index 90db4023a9f4..97bc94d78c1a 100644
--- a/clang/test/CodeGenCXX/exceptions.cpp
+++ b/clang/test/CodeGenCXX/exceptions.cpp
@@ -146,12 +146,12 @@ namespace test1 {
     // CHECK:      [[NEW:%.*]] = call noalias nonnull i8* @_Znwm(i64 8)
     // CHECK-NEXT: store i1 true, i1* [[ACTIVE]]
     // CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[NEW]] to [[A]]*
-    // CHECK-NEXT: invoke void @_ZN5test15makeBEv([[B:%.*]]* sret [[T0:%.*]])
+    // CHECK-NEXT: invoke void @_ZN5test15makeBEv([[B:%.*]]* sret align 4 [[T0:%.*]])
     // CHECK:      [[T1:%.*]] = invoke i32 @_ZN5test11BcviEv([[B]]* [[T0]])
     // CHECK:      invoke void @_ZN5test11AC1Ei([[A]]* [[CAST]], i32 [[T1]])
     // CHECK:      store i1 false, i1* [[ACTIVE]]
     // CHECK-NEXT: store [[A]]* [[CAST]], [[A]]** [[X]], align 8
-    // CHECK:      invoke void @_ZN5test15makeBEv([[B:%.*]]* sret [[T2:%.*]])
+    // CHECK:      invoke void @_ZN5test15makeBEv([[B:%.*]]* sret align 4 [[T2:%.*]])
     // CHECK:      [[RET:%.*]] = load [[A]]*, [[A]]** [[X]], align 8
 
     // CHECK98:      invoke void @_ZN5test11BD1Ev([[B]]* [[T2]])
@@ -239,7 +239,7 @@ namespace test3 {
     // CHECK-NEXT: store i8* [[FOO]], i8** [[SAVED1]]
     // CHECK-NEXT: store i1 true, i1* [[CLEANUPACTIVE]]
     // CHECK-NEXT: [[CAST:%.*]] = bitcast i8* [[NEW]] to [[A]]*
-    // CHECK-NEXT: invoke void @_ZN5test35makeAEv([[A]]* sret [[CAST]])
+    // CHECK-NEXT: invoke void @_ZN5test35makeAEv([[A]]* sret align 8 [[CAST]])
     // CHECK: br label
     //   -> cond.end
             new(foo(),10.0) A(makeA()) :

diff  --git a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
index 05fb7f1d20a4..51a4549d38d7 100644
--- a/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
+++ b/clang/test/CodeGenCXX/homogeneous-aggregates.cpp
@@ -38,10 +38,10 @@ struct I2 : Base2 {};
 struct I3 : Base2 {};
 struct D5 : I1, I2, I3 {}; // homogeneous aggregate
 
-// PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
-// ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, [3 x i64] %x.coerce)
-// ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
-// X64: define dso_local x86_vectorcallcc void @"\01_Z7func_D12D1@@24"(%struct.D1* noalias sret %agg.result, %struct.D1* %x)
+// PPC: define void @_Z7func_D12D1(%struct.D1* noalias sret align 8 %agg.result, [3 x i64] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc void @_Z7func_D12D1(%struct.D1* noalias sret align 8 %agg.result, [3 x i64] %x.coerce)
+// ARM64: define void @_Z7func_D12D1(%struct.D1* noalias sret align 8 %agg.result, %struct.D1* %x)
+// X64: define dso_local x86_vectorcallcc void @"\01_Z7func_D12D1@@24"(%struct.D1* noalias sret align 8 %agg.result, %struct.D1* %x)
 D1 CC func_D1(D1 x) { return x; }
 
 // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
@@ -50,9 +50,9 @@ D1 CC func_D1(D1 x) { return x; }
 // X64: define dso_local x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(%struct.D2 inreg %x.coerce)
 D2 CC func_D2(D2 x) { return x; }
 
-// PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
-// ARM32: define arm_aapcs_vfpcc void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, [4 x i64] %x.coerce)
-// ARM64: define void @_Z7func_D32D3(%struct.D3* noalias sret %agg.result, %struct.D3* %x)
+// PPC: define void @_Z7func_D32D3(%struct.D3* noalias sret align 8 %agg.result, [4 x i64] %x.coerce)
+// ARM32: define arm_aapcs_vfpcc void @_Z7func_D32D3(%struct.D3* noalias sret align 8 %agg.result, [4 x i64] %x.coerce)
+// ARM64: define void @_Z7func_D32D3(%struct.D3* noalias sret align 8 %agg.result, %struct.D3* %x)
 D3 CC func_D3(D3 x) { return x; }
 
 // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)

diff  --git a/clang/test/CodeGenCXX/lambda-expressions.cpp b/clang/test/CodeGenCXX/lambda-expressions.cpp
index 566132ad64e3..c75f84f03871 100644
--- a/clang/test/CodeGenCXX/lambda-expressions.cpp
+++ b/clang/test/CodeGenCXX/lambda-expressions.cpp
@@ -194,8 +194,8 @@ namespace pr28595 {
 // CHECK-NEXT: call i32 @"_ZZ1fvENK3$_6clEii"
 // CHECK-NEXT: ret i32
 
-// CHECK-LABEL: define internal void @"_ZZ1hvEN4$_118__invokeEv"(%struct.A* noalias sret %agg.result) {{.*}} {
-// CHECK: call void @"_ZZ1hvENK4$_11clEv"(%struct.A* sret %agg.result,
+// CHECK-LABEL: define internal void @"_ZZ1hvEN4$_118__invokeEv"(%struct.A* noalias sret align 1 %agg.result) {{.*}} {
+// CHECK: call void @"_ZZ1hvENK4$_11clEv"(%struct.A* sret align 1 %agg.result,
 // CHECK-NEXT: ret void
 struct A { ~A(); };
 void h() {

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-byval-sret.cpp b/clang/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
index 2c940d22010b..a92049c3a799 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-byval-sret.cpp
@@ -49,7 +49,7 @@ A B::qux(A x) {
 }
 
 // CHECK-LABEL: define dso_local x86_fastcallcc void @"?qux at B@@QAI?AUA@@U2@@Z"
-// CHECK:       (%struct.B* inreg %this, %struct.A* inreg noalias sret %agg.result, <{ %struct.A }>* inalloca %0)
+// CHECK:       (%struct.B* inreg %this, %struct.A* inreg noalias sret align 4 %agg.result, <{ %struct.A }>* inalloca %0)
 // CHECK:   ret void
 
 int main() {
@@ -67,4 +67,4 @@ int main() {
 // CHECK: call x86_stdcallcc %struct.A* @"?baz at B@@QAG?AUA@@U2@@Z"
 // CHECK:       (<{ %struct.B*, %struct.A*, %struct.A }>* inalloca %{{[^,]*}})
 // CHECK: call x86_fastcallcc void @"?qux at B@@QAI?AUA@@U2@@Z"
-// CHECK:       (%struct.B* inreg %{{[^,]*}}, %struct.A* inreg sret %{{.*}}, <{ %struct.A }>* inalloca %{{[^,]*}})
+// CHECK:       (%struct.B* inreg %{{[^,]*}}, %struct.A* inreg sret align 4 %{{.*}}, <{ %struct.A }>* inalloca %{{[^,]*}})

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp b/clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
index ed4e1fbb36fd..0ca68cccb790 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-byval-thunks.cpp
@@ -86,10 +86,10 @@ C::C() {} // force emission
 // CHECK32-NEXT: ret %"struct.sret_thunk::Agg"* %[[rv]]
 
 // CHECK64-LABEL: define linkonce_odr dso_local void @"?foo at C@sret_thunk@@W7EAA?AUAgg at 2@U32@@Z"
-// CHECK64:             (%"struct.sret_thunk::C"* %this, %"struct.sret_thunk::Agg"* noalias sret %agg.result, %"struct.sret_thunk::Agg"* %x)
+// CHECK64:             (%"struct.sret_thunk::C"* %this, %"struct.sret_thunk::Agg"* noalias sret align 4 %agg.result, %"struct.sret_thunk::Agg"* %x)
 // CHECK64:   getelementptr i8, i8* %{{.*}}, i32 -8
 // CHECK64:   call void @"?foo at C@sret_thunk@@UEAA?AUAgg at 2@U32@@Z"
-// CHECK64:       (%"struct.sret_thunk::C"* %{{.*}}, %"struct.sret_thunk::Agg"* sret %agg.result, %"struct.sret_thunk::Agg"* %x)
+// CHECK64:       (%"struct.sret_thunk::C"* %{{.*}}, %"struct.sret_thunk::Agg"* sret align 4 %agg.result, %"struct.sret_thunk::Agg"* %x)
 // CHECK64-NOT: call
 // CHECK64:   ret void
 }

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp b/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
index 5a8bdf78100f..534aa7f80469 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-cdecl-method-sret.cpp
@@ -19,9 +19,9 @@ S C::variadic_sret(const char *f, ...) { return S(); }
 S C::cdecl_sret() { return S(); }
 S C::byval_and_sret(S a) { return S(); }
 
-// CHECK: define dso_local void @"?variadic_sret at C@@QAA?AUS@@PBDZZ"(%struct.C* %this, %struct.S* noalias sret %agg.result, i8* %f, ...)
-// CHECK: define dso_local void @"?cdecl_sret at C@@QAA?AUS@@XZ"(%struct.C* %this, %struct.S* noalias sret %agg.result)
-// CHECK: define dso_local void @"?byval_and_sret at C@@QAA?AUS@@U2@@Z"(%struct.C* %this, %struct.S* noalias sret %agg.result, %struct.S* byval(%struct.S) align 4 %a)
+// CHECK: define dso_local void @"?variadic_sret at C@@QAA?AUS@@PBDZZ"(%struct.C* %this, %struct.S* noalias sret align 4 %agg.result, i8* %f, ...)
+// CHECK: define dso_local void @"?cdecl_sret at C@@QAA?AUS@@XZ"(%struct.C* %this, %struct.S* noalias sret align 4 %agg.result)
+// CHECK: define dso_local void @"?byval_and_sret at C@@QAA?AUS@@U2@@Z"(%struct.C* %this, %struct.S* noalias sret align 4 %agg.result, %struct.S* byval(%struct.S) align 4 %a)
 
 int main() {
   C c;
@@ -41,4 +41,4 @@ struct A {
 S A::f(int x) {
   return S();
 }
-// CHECK-LABEL: define dso_local x86_fastcallcc void @"?f at A@@QAI?AUS@@H at Z"(%struct.A* inreg %this, %struct.S* inreg noalias sret %agg.result, i32 %x)
+// CHECK-LABEL: define dso_local x86_fastcallcc void @"?f at A@@QAI?AUS@@H at Z"(%struct.A* inreg %this, %struct.S* inreg noalias sret align 4 %agg.result, i32 %x)

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp b/clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
index 7e8619b8b0ec..60fa5c799111 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-eh-cleanups.cpp
@@ -18,9 +18,9 @@ void HasEHCleanup() {
 // WIN32-LABEL: define dso_local void @"?HasEHCleanup@@YAXXZ"() {{.*}} {
 // WIN32:   %[[base:.*]] = call i8* @llvm.stacksave()
 //    If this call throws, we have to restore the stack.
-// WIN32:   call void @"?getA@@YA?AUA@@XZ"(%struct.A* sret %{{.*}})
+// WIN32:   call void @"?getA@@YA?AUA@@XZ"(%struct.A* sret align 4 %{{.*}})
 //    If this call throws, we have to cleanup the first temporary.
-// WIN32:   invoke void @"?getA@@YA?AUA@@XZ"(%struct.A* sret %{{.*}})
+// WIN32:   invoke void @"?getA@@YA?AUA@@XZ"(%struct.A* sret align 4 %{{.*}})
 //    If this call throws, we have to cleanup the stacksave.
 // WIN32:   call i32 @"?TakesTwo@@YAHUA@@0 at Z"
 // WIN32:   call void @llvm.stackrestore

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
index 9fb9f39cb083..8c8d4b7383d6 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
@@ -84,45 +84,45 @@ void call_bools_and_chars() {
 
 // Returning structs that fit into a register.
 Small small_return() { return Small(); }
-// LINUX-LABEL: define void @_Z12small_returnv(%struct.Small* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z12small_returnv(%struct.Small* noalias sret align 4 %agg.result)
 // WIN32: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
 // WIN64: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
 
 Medium medium_return() { return Medium(); }
-// LINUX-LABEL: define void @_Z13medium_returnv(%struct.Medium* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z13medium_returnv(%struct.Medium* noalias sret align 4 %agg.result)
 // WIN32: define dso_local i64 @"?medium_return@@YA?AUMedium@@XZ"()
 // WIN64: define dso_local i64 @"?medium_return@@YA?AUMedium@@XZ"()
 
 // Returning structs that fit into a register but are not POD.
 SmallCpp11NotCpp03Pod small_non_pod_return() { return SmallCpp11NotCpp03Pod(); }
-// LINUX-LABEL: define void @_Z20small_non_pod_returnv(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
-// WIN32: define dso_local void @"?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
-// WIN64: define dso_local void @"?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z20small_non_pod_returnv(%struct.SmallCpp11NotCpp03Pod* noalias sret align 4 %agg.result)
+// WIN32: define dso_local void @"?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret align 4 %agg.result)
+// WIN64: define dso_local void @"?small_non_pod_return@@YA?AUSmallCpp11NotCpp03Pod@@XZ"(%struct.SmallCpp11NotCpp03Pod* noalias sret align 4 %agg.result)
 
 SmallWithCtor small_with_ctor_return() { return SmallWithCtor(); }
-// LINUX-LABEL: define void @_Z22small_with_ctor_returnv(%struct.SmallWithCtor* noalias sret %agg.result)
-// WIN32: define dso_local void @"?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
-// WIN64: define dso_local void @"?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z22small_with_ctor_returnv(%struct.SmallWithCtor* noalias sret align 4 %agg.result)
+// WIN32: define dso_local void @"?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret align 4 %agg.result)
+// WIN64: define dso_local void @"?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret align 4 %agg.result)
 // FIXME: The 'sret' mark here doesn't seem to be enough to convince LLVM to
 // preserve the hidden sret pointer in R0 across the function.
-// WOA: define dso_local arm_aapcs_vfpcc void @"?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret %agg.result)
+// WOA: define dso_local arm_aapcs_vfpcc void @"?small_with_ctor_return@@YA?AUSmallWithCtor@@XZ"(%struct.SmallWithCtor* noalias sret align 4 %agg.result)
 
 SmallWithVftable small_with_vftable_return() { return SmallWithVftable(); }
-// LINUX-LABEL: define void @_Z25small_with_vftable_returnv(%struct.SmallWithVftable* noalias sret %agg.result)
-// WIN32: define dso_local void @"?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret %agg.result)
-// WIN64: define dso_local void @"?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z25small_with_vftable_returnv(%struct.SmallWithVftable* noalias sret align 4 %agg.result)
+// WIN32: define dso_local void @"?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret align 4 %agg.result)
+// WIN64: define dso_local void @"?small_with_vftable_return@@YA?AUSmallWithVftable@@XZ"(%struct.SmallWithVftable* noalias sret align 8 %agg.result)
 
 MediumWithCopyCtor medium_with_copy_ctor_return() { return MediumWithCopyCtor(); }
-// LINUX-LABEL: define void @_Z28medium_with_copy_ctor_returnv(%struct.MediumWithCopyCtor* noalias sret %agg.result)
-// WIN32: define dso_local void @"?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
-// WIN64: define dso_local void @"?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
-// WOA: define dso_local arm_aapcs_vfpcc void @"?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z28medium_with_copy_ctor_returnv(%struct.MediumWithCopyCtor* noalias sret align 4 %agg.result)
+// WIN32: define dso_local void @"?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret align 4 %agg.result)
+// WIN64: define dso_local void @"?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret align 4 %agg.result)
+// WOA: define dso_local arm_aapcs_vfpcc void @"?medium_with_copy_ctor_return@@YA?AUMediumWithCopyCtor@@XZ"(%struct.MediumWithCopyCtor* noalias sret align 4 %agg.result)
 
 // Returning a large struct that doesn't fit into a register.
 Big big_return() { return Big(); }
-// LINUX-LABEL: define void @_Z10big_returnv(%struct.Big* noalias sret %agg.result)
-// WIN32: define dso_local void @"?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret %agg.result)
-// WIN64: define dso_local void @"?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret %agg.result)
+// LINUX-LABEL: define void @_Z10big_returnv(%struct.Big* noalias sret align 4 %agg.result)
+// WIN32: define dso_local void @"?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret align 4 %agg.result)
+// WIN64: define dso_local void @"?big_return@@YA?AUBig@@XZ"(%struct.Big* noalias sret align 4 %agg.result)
 
 
 void small_arg(Small s) {}
@@ -181,7 +181,7 @@ void small_arg_with_dtor(SmallWithDtor s) {}
 
 // Test that the eligible non-aggregate is passed directly, but returned
 // indirectly on ARM64 Windows.
-// WOA64: define dso_local void @"?small_arg_with_private_member@@YA?AUSmallWithPrivate@@U1@@Z"(%struct.SmallWithPrivate* inreg noalias sret %agg.result, i64 %s.coerce) {{.*}} {
+// WOA64: define dso_local void @"?small_arg_with_private_member@@YA?AUSmallWithPrivate@@U1@@Z"(%struct.SmallWithPrivate* inreg noalias sret align 4 %agg.result, i64 %s.coerce) {{.*}} {
 SmallWithPrivate small_arg_with_private_member(SmallWithPrivate s) { return s; }
 
 void call_small_arg_with_dtor() {
@@ -281,24 +281,24 @@ void pass_ref_field() {
 class Class {
  public:
   Small thiscall_method_small() { return Small(); }
-  // LINUX: define {{.*}} void @_ZN5Class21thiscall_method_smallEv(%struct.Small* noalias sret %agg.result, %class.Class* %this)
-  // WIN32: define {{.*}} x86_thiscallcc void @"?thiscall_method_small at Class@@QAE?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
-  // WIN64: define linkonce_odr dso_local void @"?thiscall_method_small at Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
+  // LINUX: define {{.*}} void @_ZN5Class21thiscall_method_smallEv(%struct.Small* noalias sret align 4 %agg.result, %class.Class* %this)
+  // WIN32: define {{.*}} x86_thiscallcc void @"?thiscall_method_small at Class@@QAE?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret align 4 %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"?thiscall_method_small at Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret align 4 %agg.result)
 
   SmallWithCtor thiscall_method_small_with_ctor() { return SmallWithCtor(); }
-  // LINUX: define {{.*}} void @_ZN5Class31thiscall_method_small_with_ctorEv(%struct.SmallWithCtor* noalias sret %agg.result, %class.Class* %this)
-  // WIN32: define {{.*}} x86_thiscallcc void @"?thiscall_method_small_with_ctor at Class@@QAE?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret %agg.result)
-  // WIN64: define linkonce_odr dso_local void @"?thiscall_method_small_with_ctor at Class@@QEAA?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret %agg.result)
+  // LINUX: define {{.*}} void @_ZN5Class31thiscall_method_small_with_ctorEv(%struct.SmallWithCtor* noalias sret align 4 %agg.result, %class.Class* %this)
+  // WIN32: define {{.*}} x86_thiscallcc void @"?thiscall_method_small_with_ctor at Class@@QAE?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret align 4 %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"?thiscall_method_small_with_ctor at Class@@QEAA?AUSmallWithCtor@@XZ"(%class.Class* %this, %struct.SmallWithCtor* noalias sret align 4 %agg.result)
 
   Small __cdecl cdecl_method_small() { return Small(); }
-  // LINUX: define {{.*}} void @_ZN5Class18cdecl_method_smallEv(%struct.Small* noalias sret %agg.result, %class.Class* %this)
-  // WIN32: define {{.*}} void @"?cdecl_method_small at Class@@QAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
-  // WIN64: define linkonce_odr dso_local void @"?cdecl_method_small at Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret %agg.result)
+  // LINUX: define {{.*}} void @_ZN5Class18cdecl_method_smallEv(%struct.Small* noalias sret align 4 %agg.result, %class.Class* %this)
+  // WIN32: define {{.*}} void @"?cdecl_method_small at Class@@QAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret align 4 %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"?cdecl_method_small at Class@@QEAA?AUSmall@@XZ"(%class.Class* %this, %struct.Small* noalias sret align 4 %agg.result)
 
   Big __cdecl cdecl_method_big() { return Big(); }
-  // LINUX: define {{.*}} void @_ZN5Class16cdecl_method_bigEv(%struct.Big* noalias sret %agg.result, %class.Class* %this)
-  // WIN32: define {{.*}} void @"?cdecl_method_big at Class@@QAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret %agg.result)
-  // WIN64: define linkonce_odr dso_local void @"?cdecl_method_big at Class@@QEAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret %agg.result)
+  // LINUX: define {{.*}} void @_ZN5Class16cdecl_method_bigEv(%struct.Big* noalias sret align 4 %agg.result, %class.Class* %this)
+  // WIN32: define {{.*}} void @"?cdecl_method_big at Class@@QAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret align 4 %agg.result)
+  // WIN64: define linkonce_odr dso_local void @"?cdecl_method_big at Class@@QEAA?AUBig@@XZ"(%class.Class* %this, %struct.Big* noalias sret align 4 %agg.result)
 
   void thiscall_method_arg(Empty s) {}
   // LINUX: define {{.*}} void @_ZN5Class19thiscall_method_argE5Empty(%class.Class* %this)

diff  --git a/clang/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp b/clang/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
index 607ec816aefb..1ab60a526128 100644
--- a/clang/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
+++ b/clang/test/CodeGenCXX/microsoft-abi-vmemptr-conflicts.cpp
@@ -65,7 +65,7 @@ void f(C *c) {
 
 // CHECK-LABEL: define dso_local void @"?f at sret@@YAXPAUC at 1@@Z"(%"struct.sret::C"* %c)
 // CHECK: call x86_thiscallcc i32 bitcast (void (%"struct.sret::C"*, ...)* @"??_9C at sret@@$BA at AE" to i32 (%"struct.sret::C"*)*)(%"struct.sret::C"* %{{.*}})
-// CHECK: call x86_thiscallcc void bitcast (void (%"struct.sret::C"*, ...)* @"??_9C at sret@@$BA at AE" to void (%"struct.sret::C"*, %"struct.sret::Big"*)*)(%"struct.sret::C"* %{{.*}}, %"struct.sret::Big"* sret %{{.*}})
+// CHECK: call x86_thiscallcc void bitcast (void (%"struct.sret::C"*, ...)* @"??_9C at sret@@$BA at AE" to void (%"struct.sret::C"*, %"struct.sret::Big"*)*)(%"struct.sret::C"* %{{.*}}, %"struct.sret::Big"* sret align 4 %{{.*}})
 
 // CHECK-LABEL: define linkonce_odr x86_thiscallcc void @"??_9C at sret@@$BA at AE"(%"struct.sret::C"* %this, ...) {{.*}} comdat
 // CHECK: musttail call x86_thiscallcc void (%"struct.sret::C"*, ...) %{{.*}}(%"struct.sret::C"* %{{.*}}, ...)

diff  --git a/clang/test/CodeGenCXX/regcall.cpp b/clang/test/CodeGenCXX/regcall.cpp
index bdf76964bf23..9eca868fc31d 100644
--- a/clang/test/CodeGenCXX/regcall.cpp
+++ b/clang/test/CodeGenCXX/regcall.cpp
@@ -74,8 +74,8 @@ bool __regcall operator ==(const test_class&, const test_class&){ --x; return fa
 // CHECK-WIN32-DAG: define dso_local x86_regcallcc zeroext i1 @"??8 at Yw_NABVtest_class@@0 at Z"
 
 test_class __regcall operator""_test_class (unsigned long long) { ++x; return test_class{};}
-// CHECK-LIN64-DAG: define x86_regcallcc void @_Zli11_test_classy(%class.test_class* noalias sret %agg.result, i64 %0)
-// CHECK-LIN32-DAG: define x86_regcallcc void @_Zli11_test_classy(%class.test_class* inreg noalias sret %agg.result, i64 %0)
+// CHECK-LIN64-DAG: define x86_regcallcc void @_Zli11_test_classy(%class.test_class* noalias sret align 4 %agg.result, i64 %0)
+// CHECK-LIN32-DAG: define x86_regcallcc void @_Zli11_test_classy(%class.test_class* inreg noalias sret align 4 %agg.result, i64 %0)
 // CHECK-WIN64-DAG: ??__K_test_class@@Yw?AVtest_class@@_K at Z"
 // CHECK-WIN32-DAG: ??__K_test_class@@Yw?AVtest_class@@_K at Z"
 
@@ -99,7 +99,7 @@ void force_gen() {
 long double _Complex __regcall foo(long double _Complex f) {
   return f;
 }
-// CHECK-LIN64-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16 %f)
-// CHECK-LIN32-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* inreg noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 4 %f)
+// CHECK-LIN64-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* noalias sret align 16 %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 16 %f)
+// CHECK-LIN32-DAG: define x86_regcallcc void @_Z15__regcall3__fooCe({ x86_fp80, x86_fp80 }* inreg noalias sret align 4 %agg.result, { x86_fp80, x86_fp80 }* byval({ x86_fp80, x86_fp80 }) align 4 %f)
 // CHECK-WIN64-DAG: define dso_local x86_regcallcc { double, double } @"?foo@@YwU?$_Complex at O@__clang@@U12@@Z"(double %f.0, double %f.1)
 // CHECK-WIN32-DAG: define dso_local x86_regcallcc { double, double } @"?foo@@YwU?$_Complex at O@__clang@@U12@@Z"(double %f.0, double %f.1)

diff  --git a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp
index 4e824d94f510..7d86ea8447b4 100644
--- a/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp
+++ b/clang/test/CodeGenCXX/stack-reuse-miscompile.cpp
@@ -39,7 +39,7 @@ const char * f(S s)
 // CHECK: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[T3i8]])
 // CHECK: [[T5:%.*]] = call %class.T* @_ZN1TC1E1S(%class.T* [[T3]], [2 x i32] %{{.*}})
 //
-// CHECK: call void @_ZNK1T6concatERKS_(%class.T* sret [[T1]], %class.T* [[T2]], %class.T* dereferenceable(16) [[T3]])
+// CHECK: call void @_ZNK1T6concatERKS_(%class.T* sret align 4 [[T1]], %class.T* [[T2]], %class.T* dereferenceable(16) [[T3]])
 // CHECK: [[T6:%.*]] = call i8* @_ZNK1T3strEv(%class.T* [[T1]])
 //
 // CHECK: call void @llvm.lifetime.end.p0i8(

diff  --git a/clang/test/CodeGenCXX/stack-reuse.cpp b/clang/test/CodeGenCXX/stack-reuse.cpp
index 35dcb5b349c3..94e5e3d9b364 100644
--- a/clang/test/CodeGenCXX/stack-reuse.cpp
+++ b/clang/test/CodeGenCXX/stack-reuse.cpp
@@ -135,7 +135,7 @@ int large_combiner_test(S_large s) {
 // CHECK: [[T2:%.*]] = alloca %struct.Combiner
 // CHECK: [[T1:%.*]] = alloca %struct.Combiner
 // CHECK: [[T3:%.*]] = call %struct.Combiner* @_ZN8CombinerC1E7S_large(%struct.Combiner* nonnull [[T1]], [9 x i32] %s.coerce)
-// CHECK: call void @_ZN8Combiner1fEv(%struct.Combiner* nonnull sret [[T2]], %struct.Combiner* nonnull [[T1]])
+// CHECK: call void @_ZN8Combiner1fEv(%struct.Combiner* nonnull sret align 4 [[T2]], %struct.Combiner* nonnull [[T1]])
 // CHECK: [[T4:%.*]] = getelementptr inbounds %struct.Combiner, %struct.Combiner* [[T2]], i32 0, i32 0, i32 0, i32 0
 // CHECK: [[T5:%.*]] = load i32, i32* [[T4]]
 // CHECK: ret i32 [[T5]]

diff  --git a/clang/test/CodeGenCXX/temporaries.cpp b/clang/test/CodeGenCXX/temporaries.cpp
index d15e0fa05bd8..175b475c8cd7 100644
--- a/clang/test/CodeGenCXX/temporaries.cpp
+++ b/clang/test/CodeGenCXX/temporaries.cpp
@@ -403,13 +403,13 @@ namespace Elision {
     // CHECK-NEXT: call void @_ZN7Elision1AC1Ev([[A]]* [[I]])
     A i = (foo(), A());
 
-    // CHECK-NEXT: call void @_ZN7Elision4fooAEv([[A]]* sret [[T0]])
+    // CHECK-NEXT: call void @_ZN7Elision4fooAEv([[A]]* sret align 8 [[T0]])
     // CHECK-NEXT: call void @_ZN7Elision1AC1Ev([[A]]* [[J]])
     // CHECK-NEXT: call void @_ZN7Elision1AD1Ev([[A]]* [[T0]])
     A j = (fooA(), A());
 
     // CHECK-NEXT: call void @_ZN7Elision1AC1Ev([[A]]* [[T1]])
-    // CHECK-NEXT: call void @_ZN7Elision4fooAEv([[A]]* sret [[K]])
+    // CHECK-NEXT: call void @_ZN7Elision4fooAEv([[A]]* sret align 8 [[K]])
     // CHECK-NEXT: call void @_ZN7Elision1AD1Ev([[A]]* [[T1]])
     A k = (A(), fooA());
 
@@ -436,7 +436,7 @@ namespace Elision {
     // CHECK-NEXT: call void @_ZN7Elision1AD1Ev([[A]]* [[I]])
   }
 
-  // CHECK: define void @_ZN7Elision5test2Ev([[A]]* noalias sret
+  // CHECK: define void @_ZN7Elision5test2Ev([[A]]* noalias sret align 8
   A test2() {
     // CHECK:      call void @_ZN7Elision3fooEv()
     // CHECK-NEXT: call void @_ZN7Elision1AC1Ev([[A]]* [[RET:%.*]])
@@ -444,7 +444,7 @@ namespace Elision {
     return (foo(), A());
   }
 
-  // CHECK: define void @_ZN7Elision5test3EiNS_1AE([[A]]* noalias sret
+  // CHECK: define void @_ZN7Elision5test3EiNS_1AE([[A]]* noalias sret align 8
   A test3(int v, A x) {
     if (v < 5)
     // CHECK:      call void @_ZN7Elision1AC1Ev([[A]]* [[RET:%.*]])
@@ -485,7 +485,7 @@ namespace Elision {
   }
 
   // rdar://problem/8433352
-  // CHECK: define void @_ZN7Elision5test5Ev([[A]]* noalias sret
+  // CHECK: define void @_ZN7Elision5test5Ev([[A]]* noalias sret align 8
   struct B { A a; B(); };
   A test5() {
     // CHECK:      [[AT0:%.*]] = alloca [[A]], align 8
@@ -523,7 +523,7 @@ namespace Elision {
   void test6(const C *x) {
     // CHECK:      [[T0:%.*]] = alloca [[A]], align 8
     // CHECK:      [[X:%.*]] = load [[C]]*, [[C]]** {{%.*}}, align 8
-    // CHECK-NEXT: call void @_ZNK7Elision1CcvNS_1AEEv([[A]]* sret [[T0]], [[C]]* [[X]])
+    // CHECK-NEXT: call void @_ZNK7Elision1CcvNS_1AEEv([[A]]* sret align 8 [[T0]], [[C]]* [[X]])
     // CHECK-NEXT: call void @_ZNK7Elision1A3fooEv([[A]]* [[T0]])
     // CHECK-NEXT: call void @_ZN7Elision1AD1Ev([[A]]* [[T0]])
     // CHECK-NEXT: ret void

diff  --git a/clang/test/CodeGenCXX/thiscall-struct-return.cpp b/clang/test/CodeGenCXX/thiscall-struct-return.cpp
index a6be5aa494e1..35d5cc479177 100644
--- a/clang/test/CodeGenCXX/thiscall-struct-return.cpp
+++ b/clang/test/CodeGenCXX/thiscall-struct-return.cpp
@@ -34,8 +34,8 @@ void test( void ) {
 // CHECK: call void @_ZN1CC1Ev(%class.C* [[C:%.+]])
   C c;
 
-// CHECK: call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret %{{.+}}, %class.C* [[C]])
+// CHECK: call x86_thiscallcc void @_ZNK1C5SmallEv(%struct.S* sret align 4 %{{.+}}, %class.C* [[C]])
   (void)c.Small();
-// CHECK: call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret %{{.+}}, %class.C* [[C]])
+// CHECK: call x86_thiscallcc void @_ZNK1C6MediumEv(%struct.M* sret align 4 %{{.+}}, %class.C* [[C]])
   (void)c.Medium();
 }

diff  --git a/clang/test/CodeGenCXX/thunk-returning-memptr.cpp b/clang/test/CodeGenCXX/thunk-returning-memptr.cpp
index 0b7870c6d658..63bb3d68472d 100644
--- a/clang/test/CodeGenCXX/thunk-returning-memptr.cpp
+++ b/clang/test/CodeGenCXX/thunk-returning-memptr.cpp
@@ -23,5 +23,5 @@ C::C() {}
 // Because of the tail call, the return value cannot be copied into a local
 // alloca. (PR39901)
 
-// CHECK-LABEL: define linkonce_odr void @_ZThn4_N1C1fEv({ i32, i32 }* noalias sret %agg.result, %struct.C* %this)
-// CHECK: tail call void @_ZN1C1fEv({ i32, i32 }* sret %agg.result
+// CHECK-LABEL: define linkonce_odr void @_ZThn4_N1C1fEv({ i32, i32 }* noalias sret align 4 %agg.result, %struct.C* %this)
+// CHECK: tail call void @_ZN1C1fEv({ i32, i32 }* sret align 4 %agg.result

diff  --git a/clang/test/CodeGenCXX/thunks.cpp b/clang/test/CodeGenCXX/thunks.cpp
index fe7d656eb7e5..b5c2852f8770 100644
--- a/clang/test/CodeGenCXX/thunks.cpp
+++ b/clang/test/CodeGenCXX/thunks.cpp
@@ -206,13 +206,13 @@ namespace Test6 {
   // CHECK-LABEL: define void @_ZThn16_N5Test66Thunks1fEv
 	// CHECK-DBG-NOT: dbg.declare
   // CHECK-NOT: memcpy
-  // CHECK: {{call void @_ZN5Test66Thunks1fEv.*sret}}
+  // CHECK: {{call void @_ZN5Test66Thunks1fEv.*sret align 1}}
   // CHECK: ret void
   X Thunks::f() { return X(); }
 
-  // WIN64-LABEL: define linkonce_odr dso_local void @"?f at Thunks@Test6@@WBA at EAA?AUX at 2@XZ"({{.*}} sret %{{.*}})
+  // WIN64-LABEL: define linkonce_odr dso_local void @"?f at Thunks@Test6@@WBA at EAA?AUX at 2@XZ"({{.*}} sret align 1 %{{.*}})
   // WIN64-NOT: memcpy
-  // WIN64: tail call void @"?f at Thunks@Test6@@UEAA?AUX at 2@XZ"({{.*}} sret %{{.*}})
+  // WIN64: tail call void @"?f at Thunks@Test6@@UEAA?AUX at 2@XZ"({{.*}} sret align 1 %{{.*}})
 }
 
 namespace Test7 {

diff  --git a/clang/test/CodeGenCXX/trivial_abi.cpp b/clang/test/CodeGenCXX/trivial_abi.cpp
index 2cf07b22581a..23c589dacd7e 100644
--- a/clang/test/CodeGenCXX/trivial_abi.cpp
+++ b/clang/test/CodeGenCXX/trivial_abi.cpp
@@ -126,7 +126,7 @@ void testIgnoredSmall() {
 void testParamLarge(Large a) noexcept {
 }
 
-// CHECK: define void @_Z15testReturnLargev(%[[STRUCT_LARGE:.*]]* noalias sret %[[AGG_RESULT:.*]])
+// CHECK: define void @_Z15testReturnLargev(%[[STRUCT_LARGE:.*]]* noalias sret align 8 %[[AGG_RESULT:.*]])
 // CHECK: %[[CALL:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeC1Ev(%[[STRUCT_LARGE]]* %[[AGG_RESULT]])
 // CHECK: ret void
 // CHECK: }
@@ -153,7 +153,7 @@ void testCallLarge0() {
 
 // CHECK: define void @_Z14testCallLarge1v()
 // CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_LARGE:.*]], align 8
-// CHECK: call void @_Z15testReturnLargev(%[[STRUCT_LARGE]]* sret %[[AGG_TMP]])
+// CHECK: call void @_Z15testReturnLargev(%[[STRUCT_LARGE]]* sret align 8 %[[AGG_TMP]])
 // CHECK: call void @_Z14testParamLarge5Large(%[[STRUCT_LARGE]]* %[[AGG_TMP]])
 // CHECK: ret void
 // CHECK: }
@@ -164,7 +164,7 @@ void testCallLarge1() {
 
 // CHECK: define void @_Z16testIgnoredLargev()
 // CHECK: %[[AGG_TMP_ENSURED:.*]] = alloca %[[STRUCT_LARGE:.*]], align 8
-// CHECK: call void @_Z15testReturnLargev(%[[STRUCT_LARGE]]* sret %[[AGG_TMP_ENSURED]])
+// CHECK: call void @_Z15testReturnLargev(%[[STRUCT_LARGE]]* sret align 8 %[[AGG_TMP_ENSURED]])
 // CHECK: %[[CALL:.*]] = call %[[STRUCT_LARGE]]* @_ZN5LargeD1Ev(%[[STRUCT_LARGE]]* %[[AGG_TMP_ENSURED]])
 // CHECK: ret void
 // CHECK: }
@@ -186,7 +186,7 @@ Trivial testReturnHasTrivial() {
   return t;
 }
 
-// CHECK: define void @_Z23testReturnHasNonTrivialv(%[[STRUCT_NONTRIVIAL:.*]]* noalias sret %[[AGG_RESULT:.*]])
+// CHECK: define void @_Z23testReturnHasNonTrivialv(%[[STRUCT_NONTRIVIAL:.*]]* noalias sret align 4 %[[AGG_RESULT:.*]])
 // CHECK: %[[CALL:.*]] = call %[[STRUCT_NONTRIVIAL]]* @_ZN10NonTrivialC1Ev(%[[STRUCT_NONTRIVIAL]]* %[[AGG_RESULT]])
 // CHECK: ret void
 // CHECK: }

diff  --git a/clang/test/CodeGenCXX/unknown-anytype.cpp b/clang/test/CodeGenCXX/unknown-anytype.cpp
index 42ed472380b1..0a7ab53b7af6 100644
--- a/clang/test/CodeGenCXX/unknown-anytype.cpp
+++ b/clang/test/CodeGenCXX/unknown-anytype.cpp
@@ -71,7 +71,7 @@ struct Test7 {
 };
 extern "C" __unknown_anytype test7_any(int);
 Test7 test7() {
-  // COMMON: call void @test7_any({{%.*}}* sret {{%.*}}, i32 5)
+  // COMMON: call void @test7_any({{%.*}}* sret align 1 {{%.*}}, i32 5)
   return (Test7) test7_any(5);
 }
 

diff  --git a/clang/test/CodeGenCXX/wasm-args-returns.cpp b/clang/test/CodeGenCXX/wasm-args-returns.cpp
index c547eb85390d..3c57961eb2fc 100644
--- a/clang/test/CodeGenCXX/wasm-args-returns.cpp
+++ b/clang/test/CodeGenCXX/wasm-args-returns.cpp
@@ -30,52 +30,52 @@ struct two_fields {
   double d, e;
 };
 test(two_fields);
-// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret %{{.*}}, %struct.two_fields* nocapture readonly byval(%struct.two_fields) align 8 %{{.*}})
+// CHECK: define void @_Z7forward10two_fields(%struct.two_fields* noalias nocapture sret align 8 %{{.*}}, %struct.two_fields* nocapture readonly byval(%struct.two_fields) align 8 %{{.*}})
 //
 // CHECK: define void @_Z15test_two_fieldsv()
 // CHECK: %[[tmp:.*]] = alloca %struct.two_fields, align 8
-// CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret %[[tmp]])
+// CHECK: call void @_Z14def_two_fieldsv(%struct.two_fields* nonnull sret align 8 %[[tmp]])
 // CHECK: call void @_Z3use10two_fields(%struct.two_fields* nonnull byval(%struct.two_fields) align 8 %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use10two_fields(%struct.two_fields* byval(%struct.two_fields) align 8)
-// CHECK: declare void @_Z14def_two_fieldsv(%struct.two_fields* sret)
+// CHECK: declare void @_Z14def_two_fieldsv(%struct.two_fields* sret align 8)
 
 struct copy_ctor {
   double d;
   copy_ctor(copy_ctor const &);
 };
 test(copy_ctor);
-// CHECK: define void @_Z7forward9copy_ctor(%struct.copy_ctor* noalias sret %{{.*}}, %struct.copy_ctor* nonnull %{{.*}})
+// CHECK: define void @_Z7forward9copy_ctor(%struct.copy_ctor* noalias sret align 8 %{{.*}}, %struct.copy_ctor* nonnull %{{.*}})
 //
 // CHECK: declare %struct.copy_ctor* @_ZN9copy_ctorC1ERKS_(%struct.copy_ctor* returned, %struct.copy_ctor* dereferenceable(8))
 //
 // CHECK: define void @_Z14test_copy_ctorv()
 // CHECK: %[[tmp:.*]] = alloca %struct.copy_ctor, align 8
-// CHECK: call void @_Z13def_copy_ctorv(%struct.copy_ctor* nonnull sret %[[tmp]])
+// CHECK: call void @_Z13def_copy_ctorv(%struct.copy_ctor* nonnull sret align 8 %[[tmp]])
 // CHECK: call void @_Z3use9copy_ctor(%struct.copy_ctor* nonnull %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use9copy_ctor(%struct.copy_ctor*)
-// CHECK: declare void @_Z13def_copy_ctorv(%struct.copy_ctor* sret)
+// CHECK: declare void @_Z13def_copy_ctorv(%struct.copy_ctor* sret align 8)
 
 struct __attribute__((aligned(16))) aligned_copy_ctor {
   double d, e;
   aligned_copy_ctor(aligned_copy_ctor const &);
 };
 test(aligned_copy_ctor);
-// CHECK: define void @_Z7forward17aligned_copy_ctor(%struct.aligned_copy_ctor* noalias sret %{{.*}}, %struct.aligned_copy_ctor* nonnull %{{.*}})
+// CHECK: define void @_Z7forward17aligned_copy_ctor(%struct.aligned_copy_ctor* noalias sret align 16 %{{.*}}, %struct.aligned_copy_ctor* nonnull %{{.*}})
 //
 // CHECK: declare %struct.aligned_copy_ctor* @_ZN17aligned_copy_ctorC1ERKS_(%struct.aligned_copy_ctor* returned, %struct.aligned_copy_ctor* dereferenceable(16))
 //
 // CHECK: define void @_Z22test_aligned_copy_ctorv()
 // CHECK: %[[tmp:.*]] = alloca %struct.aligned_copy_ctor, align 16
-// CHECK: call void @_Z21def_aligned_copy_ctorv(%struct.aligned_copy_ctor* nonnull sret %[[tmp]])
+// CHECK: call void @_Z21def_aligned_copy_ctorv(%struct.aligned_copy_ctor* nonnull sret align 16 %[[tmp]])
 // CHECK: call void @_Z3use17aligned_copy_ctor(%struct.aligned_copy_ctor* nonnull %[[tmp]])
 // CHECK: ret void
 //
 // CHECK: declare void @_Z3use17aligned_copy_ctor(%struct.aligned_copy_ctor*)
-// CHECK: declare void @_Z21def_aligned_copy_ctorv(%struct.aligned_copy_ctor* sret)
+// CHECK: declare void @_Z21def_aligned_copy_ctorv(%struct.aligned_copy_ctor* sret align 16)
 
 struct empty {};
 test(empty);

diff  --git a/clang/test/CodeGenCXX/x86_32-arguments.cpp b/clang/test/CodeGenCXX/x86_32-arguments.cpp
index 830168635b52..c7ff59e943d2 100644
--- a/clang/test/CodeGenCXX/x86_32-arguments.cpp
+++ b/clang/test/CodeGenCXX/x86_32-arguments.cpp
@@ -6,7 +6,7 @@ struct S {
   short s;
 };
 
-// CHECK-LABEL: define void @_Z1fv(%struct.S* noalias sret %
+// CHECK-LABEL: define void @_Z1fv(%struct.S* noalias sret align 2 %
 S f() { return S(); }
 // CHECK-LABEL: define void @_Z1f1S(%struct.S* %0)
 void f(S) { }
@@ -18,7 +18,7 @@ class C {
   double c;
 };
 
-// CHECK-LABEL: define void @_Z1gv(%class.C* noalias sret %
+// CHECK-LABEL: define void @_Z1gv(%class.C* noalias sret align 4 %
 C g() { return C(); }
 
 // CHECK-LABEL: define void @_Z1f1C(%class.C* %0) 
@@ -103,13 +103,13 @@ struct s7_1 { double x; };
 struct s7 : s7_0, s7_1 { };
 s7 f7() { return s7(); }
 
-// CHECK-LABEL: define void @_Z2f8v(%struct.s8* noalias sret %agg.result)
+// CHECK-LABEL: define void @_Z2f8v(%struct.s8* noalias sret align 4 %agg.result)
 struct s8_0 { };
 struct s8_1 { double x; };
 struct s8 { s8_0 a; s8_1 b; };
 s8 f8() { return s8(); }
 
-// CHECK-LABEL: define void @_Z2f9v(%struct.s9* noalias sret %agg.result)
+// CHECK-LABEL: define void @_Z2f9v(%struct.s9* noalias sret align 4 %agg.result)
 struct s9_0 { unsigned : 0; };
 struct s9_1 { double x; };
 struct s9 { s9_0 a; s9_1 b; };

diff  --git a/clang/test/CodeGenCXX/x86_64-arguments.cpp b/clang/test/CodeGenCXX/x86_64-arguments.cpp
index e90590778895..f7a898b220af 100644
--- a/clang/test/CodeGenCXX/x86_64-arguments.cpp
+++ b/clang/test/CodeGenCXX/x86_64-arguments.cpp
@@ -176,7 +176,7 @@ namespace test9 {
   // CHECK: define void @_ZN5test93fooEPNS_1SEPNS_1TE([[S:%.*]]* %0, [[T:%.*]]* %1)
   void foo(S*, T*) {}
 
-  // CHECK: define void @_ZN5test91aEiiiiNS_1TEPv([[S]]* noalias sret {{%.*}}, i32 %0, i32 %1, i32 %2, i32 %3, [[T]]* byval([[T]]) align 8 %4, i8* %5)
+  // CHECK: define void @_ZN5test91aEiiiiNS_1TEPv([[S]]* noalias sret align 8 {{%.*}}, i32 %0, i32 %1, i32 %2, i32 %3, [[T]]* byval([[T]]) align 8 %4, i8* %5)
   S a(int, int, int, int, T, void*) {
     return S();
   }
@@ -186,7 +186,7 @@ namespace test9 {
     return sret;
   }
 
-  // CHECK: define void @_ZN5test91cEiiiNS_1TEPv([[S]]* noalias sret {{%.*}}, i32 %0, i32 %1, i32 %2, i8* {{%.*}}, i8* {{%.*}}, i8* %3)
+  // CHECK: define void @_ZN5test91cEiiiNS_1TEPv([[S]]* noalias sret align 8 {{%.*}}, i32 %0, i32 %1, i32 %2, i8* {{%.*}}, i8* {{%.*}}, i8* %3)
   S c(int, int, int, T, void*) {
     return S();
   }

diff  --git a/clang/test/CodeGenCoroutines/coro-await.cpp b/clang/test/CodeGenCoroutines/coro-await.cpp
index 86bacc766db3..99097f376aa5 100644
--- a/clang/test/CodeGenCoroutines/coro-await.cpp
+++ b/clang/test/CodeGenCoroutines/coro-await.cpp
@@ -130,7 +130,7 @@ extern "C" void f1(int) {
   // CHECK: %[[PROMISE:.+]] = alloca %"struct.std::experimental::coroutine_traits<void, int>::promise_type"
   // CHECK: %[[FRAME:.+]] = call i8* @llvm.coro.begin(
   co_yield 42;
-  // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJviEE12promise_type11yield_valueEi(%struct.suspend_maybe* sret %[[AWAITER:.+]], %"struct.std::experimental::coroutine_traits<void, int>::promise_type"* %[[PROMISE]], i32 42)
+  // CHECK: call void @_ZNSt12experimental16coroutine_traitsIJviEE12promise_type11yield_valueEi(%struct.suspend_maybe* sret align 4 %[[AWAITER:.+]], %"struct.std::experimental::coroutine_traits<void, int>::promise_type"* %[[PROMISE]], i32 42)
 
   // See if we need to suspend:
   // --------------------------
@@ -197,20 +197,20 @@ extern "C" void UseAggr(Aggr&&);
 extern "C" void TestAggr() {
   UseAggr(co_await AggrAwaiter{});
   Whatever();
-  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret %[[AwaitResume:.+]],
+  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret align 4 %[[AwaitResume:.+]],
   // CHECK: call void @UseAggr(%struct.Aggr* dereferenceable(12) %[[AwaitResume]])
   // CHECK: call void @_ZN4AggrD1Ev(%struct.Aggr* %[[AwaitResume]])
   // CHECK: call void @Whatever()
 
   co_await AggrAwaiter{};
   Whatever();
-  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret %[[AwaitResume2:.+]],
+  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret align 4 %[[AwaitResume2:.+]],
   // CHECK: call void @_ZN4AggrD1Ev(%struct.Aggr* %[[AwaitResume2]])
   // CHECK: call void @Whatever()
 
   Aggr Val = co_await AggrAwaiter{};
   Whatever();
-  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret %[[AwaitResume3:.+]],
+  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret align 4 %[[AwaitResume3:.+]],
   // CHECK: call void @Whatever()
   // CHECK: call void @_ZN4AggrD1Ev(%struct.Aggr* %[[AwaitResume3]])
 }
@@ -253,7 +253,7 @@ extern "C" void TestOpAwait() {
 
   co_await MyAgg{};
   // CHECK: call void @_ZN5MyAggawEv(%struct.MyAgg* %
-  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret %
+  // CHECK: call void @_ZN11AggrAwaiter12await_resumeEv(%struct.Aggr* sret align 4 %
 }
 
 // CHECK-LABEL: EndlessLoop(

diff  --git a/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp b/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp
index 8a0b234e3ae2..42856f4479ec 100644
--- a/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp
+++ b/clang/test/CodeGenCoroutines/coro-gro-nrvo.cpp
@@ -34,14 +34,14 @@ struct coro {
 };
 
 // Verify that the NRVO is applied to the Gro object.
-// CHECK-LABEL: define void @_Z1fi(%struct.coro* noalias sret %agg.result, i32 %0)
+// CHECK-LABEL: define void @_Z1fi(%struct.coro* noalias sret align 8 %agg.result, i32 %0)
 coro f(int) {
 // CHECK: %call = call noalias nonnull i8* @_Znwm(
 // CHECK-NEXT: br label %[[CoroInit:.*]]
 
 // CHECK: {{.*}}[[CoroInit]]:
 // CHECK: store i1 false, i1* %gro.active
-// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro* sret %agg.result
+// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro* sret align 8 %agg.result
 // CHECK-NEXT: store i1 true, i1* %gro.active
   co_return;
 }
@@ -65,7 +65,7 @@ struct coro_two {
 };
 
 // Verify that the NRVO is applied to the Gro object.
-// CHECK-LABEL: define void @_Z1hi(%struct.coro_two* noalias sret %agg.result, i32 %0)
+// CHECK-LABEL: define void @_Z1hi(%struct.coro_two* noalias sret align 8 %agg.result, i32 %0)
  coro_two h(int) {
 
 // CHECK: %call = call noalias i8* @_ZnwmRKSt9nothrow_t
@@ -73,12 +73,12 @@ struct coro_two {
 // CHECK-NEXT: br i1 %[[CheckNull]], label %[[InitOnSuccess:.*]], label %[[InitOnFailure:.*]]
 
 // CHECK: {{.*}}[[InitOnFailure]]:
-// CHECK-NEXT: call void @{{.*get_return_object_on_allocation_failureEv}}(%struct.coro_two* sret %agg.result
+// CHECK-NEXT: call void @{{.*get_return_object_on_allocation_failureEv}}(%struct.coro_two* sret align 8 %agg.result
 // CHECK-NEXT: br label %[[RetLabel:.*]]
 
 // CHECK: {{.*}}[[InitOnSuccess]]:
 // CHECK: store i1 false, i1* %gro.active
-// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro_two* sret %agg.result
+// CHECK: call void @{{.*get_return_objectEv}}(%struct.coro_two* sret align 8 %agg.result
 // CHECK-NEXT: store i1 true, i1* %gro.active
 
 // CHECK: [[RetLabel]]:

diff  --git a/clang/test/CodeGenObjC/arc.m b/clang/test/CodeGenObjC/arc.m
index 0eb69b23c8d2..375ad8ed7b41 100644
--- a/clang/test/CodeGenObjC/arc.m
+++ b/clang/test/CodeGenObjC/arc.m
@@ -1538,14 +1538,14 @@ void test70(id i) {
 void test71(void) {
   // CHECK: %[[T:[^ ]+]] = bitcast %struct.AggDtor* %[[TMP1:[^ ]+]] to i8*
   // CHECK: call void @llvm.lifetime.start.p0i8({{[^,]+}}, i8* %[[T]])
-  // CHECK: call void @getAggDtor(%struct.AggDtor* sret %[[TMP1]])
+  // CHECK: call void @getAggDtor(%struct.AggDtor* sret align 8 %[[TMP1]])
   // CHECK: %[[T:[^ ]+]] = bitcast %struct.AggDtor* %[[TMP1]] to i8**
   // CHECK: call void @__destructor_8_s40(i8** %[[T]])
   // CHECK: %[[T:[^ ]+]] = bitcast %struct.AggDtor* %[[TMP1:[^ ]+]] to i8*
   // CHECK: call void @llvm.lifetime.end.p0i8({{[^,]+}}, i8* %[[T]])
   // CHECK: %[[T:[^ ]+]] = bitcast %struct.AggDtor* %[[TMP2:[^ ]+]] to i8*
   // CHECK: call void @llvm.lifetime.start.p0i8({{[^,]+}}, i8* %[[T]])
-  // CHECK: call void @getAggDtor(%struct.AggDtor* sret %[[TMP2]])
+  // CHECK: call void @getAggDtor(%struct.AggDtor* sret align 8 %[[TMP2]])
   // CHECK: %[[T:[^ ]+]] = bitcast %struct.AggDtor* %[[TMP2]] to i8**
   // CHECK: call void @__destructor_8_s40(i8** %[[T]])
   // CHECK: %[[T:[^ ]+]] = bitcast %struct.AggDtor* %[[TMP2:[^ ]+]] to i8*

diff  --git a/clang/test/CodeGenObjC/direct-method.m b/clang/test/CodeGenObjC/direct-method.m
index e53c99bc0f5e..5bb84de1ddb5 100644
--- a/clang/test/CodeGenObjC/direct-method.m
+++ b/clang/test/CodeGenObjC/direct-method.m
@@ -120,7 +120,7 @@ + (struct my_complex_struct)classGetComplex __attribute__((objc_direct)) {
 
 // CHECK-LABEL: define hidden void @"\01-[Root getAggregate]"(
 - (struct my_aggregate_struct)getAggregate __attribute__((objc_direct)) {
-  // CHECK: %struct.my_aggregate_struct* noalias sret [[RETVAL:%[^,]*]],
+  // CHECK: %struct.my_aggregate_struct* noalias sret align 4 [[RETVAL:%[^,]*]],
 
   // loading parameters
   // CHECK-LABEL: entry:

diff  --git a/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m b/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m
index 1733a019026c..8d66485959a8 100644
--- a/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m
+++ b/clang/test/CodeGenObjC/nontrivial-c-struct-exception.m
@@ -41,8 +41,8 @@ void testStrongException(void) {
 // CHECK: define void @testWeakException()
 // CHECK: %[[AGG_TMP:.*]] = alloca %[[STRUCT_WEAK]], align 8
 // CHECK: %[[AGG_TMP1:.*]] = alloca %[[STRUCT_WEAK]], align 8
-// CHECK: call void @genWeak(%[[STRUCT_WEAK]]* sret %[[AGG_TMP]])
-// CHECK: invoke void @genWeak(%[[STRUCT_WEAK]]* sret %[[AGG_TMP1]])
+// CHECK: call void @genWeak(%[[STRUCT_WEAK]]* sret align 8 %[[AGG_TMP]])
+// CHECK: invoke void @genWeak(%[[STRUCT_WEAK]]* sret align 8 %[[AGG_TMP1]])
 
 // CHECK: call void @calleeWeak(%[[STRUCT_WEAK]]* %[[AGG_TMP]], %[[STRUCT_WEAK]]* %[[AGG_TMP1]])
 // CHECK: ret void

diff  --git a/clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m b/clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
index 53ff433989e2..93f348185412 100644
--- a/clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
+++ b/clang/test/CodeGenObjC/objc-non-trivial-struct-nrvo.m
@@ -37,7 +37,7 @@ Trivial testTrivial(void) {
 
 void func1(TrivialBig *);
 
-// CHECK: define void @testTrivialBig(%[[STRUCT_TRIVIALBIG]]* noalias sret %[[AGG_RESULT:.*]])
+// CHECK: define void @testTrivialBig(%[[STRUCT_TRIVIALBIG]]* noalias sret align 4 %[[AGG_RESULT:.*]])
 // CHECK: call void @func1(%[[STRUCT_TRIVIALBIG]]* %[[AGG_RESULT]])
 // CHECK-NEXT: ret void
 
@@ -69,7 +69,7 @@ Strong testStrong(void) {
   return a;
 }
 
-// CHECK: define void @testWeak(%[[STRUCT_WEAK]]* noalias sret %[[AGG_RESULT:.*]])
+// CHECK: define void @testWeak(%[[STRUCT_WEAK]]* noalias sret align 8 %[[AGG_RESULT:.*]])
 // CHECK: %[[NRVO:.*]] = alloca i1, align 1
 // CHECK: %[[V0:.*]] = bitcast %[[STRUCT_WEAK]]* %[[AGG_RESULT]] to i8**
 // CHECK: call void @__default_constructor_8_w0(i8** %[[V0]])
@@ -105,7 +105,7 @@ Weak testWeak2(int c) {
     return b;
 }
 
-// CHECK: define internal void @"\01-[C1 foo1]"(%[[STRUCT_WEAK]]* noalias sret %[[AGG_RESULT:.*]], %{{.*}}* %{{.*}}, i8* %{{.*}})
+// CHECK: define internal void @"\01-[C1 foo1]"(%[[STRUCT_WEAK]]* noalias sret align 8 %[[AGG_RESULT:.*]], %{{.*}}* %{{.*}}, i8* %{{.*}})
 // CHECK: %[[NRVO:.*]] = alloca i1, align 1
 // CHECK: %[[V0:.*]] = bitcast %[[STRUCT_WEAK]]* %[[AGG_RESULT]] to i8**
 // CHECK: call void @__default_constructor_8_w0(i8** %[[V0]])

diff  --git a/clang/test/CodeGenObjC/stret-1.m b/clang/test/CodeGenObjC/stret-1.m
index 1122c28a468b..f25c40438e59 100644
--- a/clang/test/CodeGenObjC/stret-1.m
+++ b/clang/test/CodeGenObjC/stret-1.m
@@ -14,19 +14,19 @@ int main(int argc, const char **argv)
 {
     struct stret s;
     s = [(id)(argc&~255) method];
-    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T0:%[^,]+]]
+    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret align 4 [[T0:%[^,]+]]
     // CHECK: [[T0P:%.*]] = bitcast %struct.stret* [[T0]] to i8*
     // CHECK: call void @llvm.memset.p0i8.i64(i8* align 4 [[T0P]], i8 0, i64 400, i1 false)
 
     s = [Test method];
-    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T1:%[^,]+]]
+    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret align 4 [[T1:%[^,]+]]
     // CHECK-NOT: call void @llvm.memset.p0i8.i64(
 
     [(id)(argc&~255) method];
-    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T1:%[^,]+]]
+    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret align 4 [[T1:%[^,]+]]
     // CHECK-NOT: call void @llvm.memset.p0i8.i64(
 
     [Test method];
-    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret [[T1:%[^,]+]]
+    // CHECK: call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (%struct.stret*, i8*, i8*)*)(%struct.stret* sret align 4 [[T1:%[^,]+]]
     // CHECK-NOT: call void @llvm.memset.p0i8.i64(
 }

diff  --git a/clang/test/CodeGenObjC/weak-in-c-struct.m b/clang/test/CodeGenObjC/weak-in-c-struct.m
index 001a7ed96dec..90c799298253 100644
--- a/clang/test/CodeGenObjC/weak-in-c-struct.m
+++ b/clang/test/CodeGenObjC/weak-in-c-struct.m
@@ -179,7 +179,7 @@ void test_argument_Weak(Weak *a) {
   calleeWeak(*a);
 }
 
-// COMMON: define void @test_return_Weak(%[[STRUCT_WEAK]]* noalias sret %[[AGG_RESULT:.*]], %[[STRUCT_WEAK]]* %[[A:.*]])
+// COMMON: define void @test_return_Weak(%[[STRUCT_WEAK]]* noalias sret align {{.*}} %[[AGG_RESULT:.*]], %[[STRUCT_WEAK]]* %[[A:.*]])
 // COMMON: %[[A_ADDR:.*]] = alloca %[[STRUCT_WEAK]]*
 // COMMON: store %[[STRUCT_WEAK]]* %[[A]], %[[STRUCT_WEAK]]** %[[A_ADDR]]
 // COMMON: %[[V0:.*]] = load %[[STRUCT_WEAK]]*, %[[STRUCT_WEAK]]** %[[A_ADDR]]

diff  --git a/clang/test/CodeGenObjCXX/objc-struct-cxx-abi.mm b/clang/test/CodeGenObjCXX/objc-struct-cxx-abi.mm
index dd9b88b0234d..8bb694705fef 100644
--- a/clang/test/CodeGenObjCXX/objc-struct-cxx-abi.mm
+++ b/clang/test/CodeGenObjCXX/objc-struct-cxx-abi.mm
@@ -90,7 +90,7 @@ void testCallStrongWeak(StrongWeak *a) {
   testParamStrongWeak(*a);
 }
 
-// CHECK: define void @_Z20testReturnStrongWeakP10StrongWeak(%[[STRUCT_STRONGWEAK:.*]]* noalias sret %[[AGG_RESULT:.*]], %[[STRUCT_STRONGWEAK]]* %[[A:.*]])
+// CHECK: define void @_Z20testReturnStrongWeakP10StrongWeak(%[[STRUCT_STRONGWEAK:.*]]* noalias sret align 8 %[[AGG_RESULT:.*]], %[[STRUCT_STRONGWEAK]]* %[[A:.*]])
 // CHECK: %[[A_ADDR:.*]] = alloca %[[STRUCT_STRONGWEAK]]*, align 8
 // CHECK: store %[[STRUCT_STRONGWEAK]]* %[[A]], %[[STRUCT_STRONGWEAK]]** %[[A_ADDR]], align 8
 // CHECK: %[[V0:.*]] = load %[[STRUCT_STRONGWEAK]]*, %[[STRUCT_STRONGWEAK]]** %[[A_ADDR]], align 8

diff  --git a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
index cdbf28bbcad8..35cc54c50d6f 100644
--- a/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
+++ b/clang/test/CodeGenOpenCL/addr-space-struct-arg.cl
@@ -43,7 +43,7 @@ struct LargeStructTwoMember {
 struct LargeStructOneMember g_s;
 #endif
 
-// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in)
+// X86-LABEL: define void @foo(%struct.Mat4X4* noalias sret align 4 %agg.result, %struct.Mat3X3* byval(%struct.Mat3X3) align 4 %in)
 // AMDGCN-LABEL: define %struct.Mat4X4 @foo([9 x i32] %in.coerce)
 Mat4X4 __attribute__((noinline)) foo(Mat3X3 in) {
   Mat4X4 out;
@@ -63,8 +63,8 @@ kernel void ker(global Mat3X3 *in, global Mat4X4 *out) {
   out[0] = foo(in[1]);
 }
 
-// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in)
-// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in)
+// X86-LABEL: define void @foo_large(%struct.Mat64X64* noalias sret align 4 %agg.result, %struct.Mat32X32* byval(%struct.Mat32X32) align 4 %in)
+// AMDGCN-LABEL: define void @foo_large(%struct.Mat64X64 addrspace(5)* noalias sret align 4 %agg.result, %struct.Mat32X32 addrspace(5)* byval(%struct.Mat32X32) align 4 %in)
 Mat64X64 __attribute__((noinline)) foo_large(Mat32X32 in) {
   Mat64X64 out;
   return out;

diff  --git a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
index 0a7f289cb2f7..fd46d3cce22e 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-abi-struct-coerce.cl
@@ -404,14 +404,14 @@ struct_arr16 func_ret_struct_arr16()
   return s;
 }
 
-// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret %agg.result)
+// CHECK: define void @func_ret_struct_arr32(%struct.struct_arr32 addrspace(5)* noalias nocapture sret align 4 %agg.result)
 struct_arr32 func_ret_struct_arr32()
 {
   struct_arr32 s = { 0 };
   return s;
 }
 
-// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret %agg.result)
+// CHECK: define void @func_ret_struct_arr33(%struct.struct_arr33 addrspace(5)* noalias nocapture sret align 4 %agg.result)
 struct_arr33 func_ret_struct_arr33()
 {
   struct_arr33 s = { 0 };
@@ -440,7 +440,7 @@ 
diff erent_size_type_pair func_
diff erent_size_type_pair_ret()
   return s;
 }
 
-// CHECK: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret %agg.result)
+// CHECK: define void @func_flexible_array_ret(%struct.flexible_array addrspace(5)* noalias nocapture sret align 4 %agg.result)
 flexible_array func_flexible_array_ret()
 {
   flexible_array s = { 0 };

diff  --git a/clang/test/CodeGenOpenCLCXX/addrspace-of-this.cl b/clang/test/CodeGenOpenCLCXX/addrspace-of-this.cl
index 07e3b0b7314e..16495d38b942 100644
--- a/clang/test/CodeGenOpenCLCXX/addrspace-of-this.cl
+++ b/clang/test/CodeGenOpenCLCXX/addrspace-of-this.cl
@@ -114,7 +114,7 @@ __kernel void test__global() {
 // Test the address space of 'this' when invoking the operator+
 // COMMON: [[C1GEN:%[.a-z0-9]+]] = addrspacecast %class.C* %c1 to %class.C addrspace(4)*
 // COMMON: [[C2GEN:%[.a-z0-9]+]] = addrspacecast %class.C* %c2 to %class.C addrspace(4)*
-// COMMON: call spir_func void @_ZNU3AS41CplERU3AS4KS_(%class.C* sret %c3, %class.C addrspace(4)* [[C1GEN]], %class.C addrspace(4)* dereferenceable(4) [[C2GEN]])
+// COMMON: call spir_func void @_ZNU3AS41CplERU3AS4KS_(%class.C* sret align 4 %c3, %class.C addrspace(4)* [[C1GEN]], %class.C addrspace(4)* dereferenceable(4) [[C2GEN]])
 
 // Test the address space of 'this' when invoking the move constructor
 // COMMON: [[C4GEN:%[.a-z0-9]+]] = addrspacecast %class.C* %c4 to %class.C addrspace(4)*
@@ -134,7 +134,7 @@ __kernel void test__global() {
 
 // Tests address space of inline members
 //COMMON: @_ZNU3AS41C3getEv(%class.C addrspace(4)* %this)
-//COMMON: @_ZNU3AS41CplERU3AS4KS_(%class.C* noalias sret %agg.result, %class.C addrspace(4)* %this
+//COMMON: @_ZNU3AS41CplERU3AS4KS_(%class.C* noalias sret align 4 %agg.result, %class.C addrspace(4)* %this
 #define TEST(AS)             \
   __kernel void test##AS() { \
     AS C c;                  \

diff  --git a/clang/test/Modules/templates.mm b/clang/test/Modules/templates.mm
index 78206a980a8f..9d4e4b9d1617 100644
--- a/clang/test/Modules/templates.mm
+++ b/clang/test/Modules/templates.mm
@@ -125,7 +125,7 @@ void testWithAttributes() {
 
 // Check that returnNonTrivial doesn't return Class0<S0> directly in registers.
 
-// CHECK: declare void @_Z16returnNonTrivialv(%struct.Class0* sret)
+// CHECK: declare void @_Z16returnNonTrivialv(%struct.Class0* sret align 8)
 
 @import template_nontrivial0;
 @import template_nontrivial1;