r361372 - [ARM][AArch64] Fix incorrect handling of alignment in va_arg code generation

John Brawn via cfe-commits cfe-commits at lists.llvm.org
Wed May 22 04:42:54 PDT 2019


Author: john.brawn
Date: Wed May 22 04:42:54 2019
New Revision: 361372

URL: http://llvm.org/viewvc/llvm-project?rev=361372&view=rev
Log:
[ARM][AArch64] Fix incorrect handling of alignment in va_arg code generation

Overaligned and underaligned types (i.e. types where the alignment has been
increased or decreased using the aligned and packed attributes) weren't being
correctly handled in all cases, as the unadjusted alignment should be used.

This patch also adjusts getTypeUnadjustedAlign to correctly handle typedefs of
non-aggregate types, which it appears it never had to handle before.

Differential Revision: https://reviews.llvm.org/D62152

Added:
    cfe/trunk/test/CodeGen/arm-varargs.c
Modified:
    cfe/trunk/lib/AST/ASTContext.cpp
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/aarch64-varargs.c

Modified: cfe/trunk/lib/AST/ASTContext.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ASTContext.cpp?rev=361372&r1=361371&r2=361372&view=diff
==============================================================================
--- cfe/trunk/lib/AST/ASTContext.cpp (original)
+++ cfe/trunk/lib/AST/ASTContext.cpp Wed May 22 04:42:54 2019
@@ -2130,7 +2130,7 @@ unsigned ASTContext::getTypeUnadjustedAl
     const ASTRecordLayout &Layout = getASTObjCInterfaceLayout(ObjCI->getDecl());
     UnadjustedAlign = toBits(Layout.getUnadjustedAlignment());
   } else {
-    UnadjustedAlign = getTypeAlign(T);
+    UnadjustedAlign = getTypeAlign(T->getUnqualifiedDesugaredType());
   }
 
   MemoizedUnadjustedAlign[T] = UnadjustedAlign;

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=361372&r1=361371&r2=361372&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Wed May 22 04:42:54 2019
@@ -5278,13 +5278,13 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(A
   llvm::BasicBlock *OnStackBlock = CGF.createBasicBlock("vaarg.on_stack");
   llvm::BasicBlock *ContBlock = CGF.createBasicBlock("vaarg.end");
 
-  auto TyInfo = getContext().getTypeInfoInChars(Ty);
-  CharUnits TyAlign = TyInfo.second;
+  CharUnits TySize = getContext().getTypeSizeInChars(Ty);
+  CharUnits TyAlign = getContext().getTypeUnadjustedAlignInChars(Ty);
 
   Address reg_offs_p = Address::invalid();
   llvm::Value *reg_offs = nullptr;
   int reg_top_index;
-  int RegSize = IsIndirect ? 8 : TyInfo.first.getQuantity();
+  int RegSize = IsIndirect ? 8 : TySize.getQuantity();
   if (!IsFPR) {
     // 3 is the field number of __gr_offs
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
@@ -5412,8 +5412,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(A
     CharUnits SlotSize = BaseAddr.getAlignment();
     if (CGF.CGM.getDataLayout().isBigEndian() && !IsIndirect &&
         (IsHFA || !isAggregateTypeForABI(Ty)) &&
-        TyInfo.first < SlotSize) {
-      CharUnits Offset = SlotSize - TyInfo.first;
+        TySize < SlotSize) {
+      CharUnits Offset = SlotSize - TySize;
       BaseAddr = CGF.Builder.CreateConstInBoundsByteGEP(BaseAddr, Offset);
     }
 
@@ -5455,7 +5455,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(A
   if (IsIndirect)
     StackSize = StackSlotSize;
   else
-    StackSize = TyInfo.first.alignTo(StackSlotSize);
+    StackSize = TySize.alignTo(StackSlotSize);
 
   llvm::Value *StackSizeC = CGF.Builder.getSize(StackSize);
   llvm::Value *NewStack =
@@ -5465,8 +5465,8 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(A
   CGF.Builder.CreateStore(NewStack, stack_p);
 
   if (CGF.CGM.getDataLayout().isBigEndian() && !isAggregateTypeForABI(Ty) &&
-      TyInfo.first < StackSlotSize) {
-    CharUnits Offset = StackSlotSize - TyInfo.first;
+      TySize < StackSlotSize) {
+    CharUnits Offset = StackSlotSize - TySize;
     OnStackAddr = CGF.Builder.CreateConstInBoundsByteGEP(OnStackAddr, Offset);
   }
 
@@ -5484,7 +5484,7 @@ Address AArch64ABIInfo::EmitAAPCSVAArg(A
 
   if (IsIndirect)
     return Address(CGF.Builder.CreateLoad(ResAddr, "vaarg.addr"),
-                   TyInfo.second);
+                   TyAlign);
 
   return ResAddr;
 }
@@ -6210,19 +6210,19 @@ Address ARMABIInfo::EmitVAArg(CodeGenFun
     return Addr;
   }
 
-  auto TyInfo = getContext().getTypeInfoInChars(Ty);
-  CharUnits TyAlignForABI = TyInfo.second;
+  CharUnits TySize = getContext().getTypeSizeInChars(Ty);
+  CharUnits TyAlignForABI = getContext().getTypeUnadjustedAlignInChars(Ty);
 
   // Use indirect if size of the illegal vector is bigger than 16 bytes.
   bool IsIndirect = false;
   const Type *Base = nullptr;
   uint64_t Members = 0;
-  if (TyInfo.first > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
+  if (TySize > CharUnits::fromQuantity(16) && isIllegalVectorType(Ty)) {
     IsIndirect = true;
 
   // ARMv7k passes structs bigger than 16 bytes indirectly, in space
   // allocated by the caller.
-  } else if (TyInfo.first > CharUnits::fromQuantity(16) &&
+  } else if (TySize > CharUnits::fromQuantity(16) &&
              getABIKind() == ARMABIInfo::AAPCS16_VFP &&
              !isHomogeneousAggregate(Ty, Base, Members)) {
     IsIndirect = true;
@@ -6242,8 +6242,8 @@ Address ARMABIInfo::EmitVAArg(CodeGenFun
   } else {
     TyAlignForABI = CharUnits::fromQuantity(4);
   }
-  TyInfo.second = TyAlignForABI;
 
+  std::pair<CharUnits, CharUnits> TyInfo = { TySize, TyAlignForABI };
   return emitVoidPtrVAArg(CGF, VAListAddr, Ty, IsIndirect, TyInfo,
                           SlotSize, /*AllowHigherAlign*/ true);
 }

Modified: cfe/trunk/test/CodeGen/aarch64-varargs.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-varargs.c?rev=361372&r1=361371&r2=361372&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-varargs.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-varargs.c Wed May 22 04:42:54 2019
@@ -235,6 +235,653 @@ struct hfa simple_hfa(void) {
 // CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.hfa* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
 }
 
+// Over and under alignment on fundamental types has no effect on parameter
+// passing, so the code generated for va_arg should be the same as for
+// non-aligned fundamental types.
+
+typedef int underaligned_int __attribute__((packed,aligned(2)));
+underaligned_int underaligned_int_test() {
+// CHECK-LABEL: define i32 @underaligned_int_test()
+  return va_arg(the_list, underaligned_int);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4
+// CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32*
+// CHECK-LE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4
+// CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32*
+// CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i32* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32, i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+typedef int overaligned_int __attribute__((aligned(32)));
+overaligned_int overaligned_int_test() {
+// CHECK-LABEL: define i32 @overaligned_int_test()
+  return va_arg(the_list, overaligned_int);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4
+// CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32*
+// CHECK-LE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i32*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4
+// CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32*
+// CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i32* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i32, i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+typedef long long underaligned_long_long  __attribute__((packed,aligned(2)));
+underaligned_long_long underaligned_long_long_test() {
+// CHECK-LABEL: define i64 @underaligned_long_long_test()
+  return va_arg(the_list, underaligned_long_long);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i64*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i64*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i64* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i64, i64* [[ADDR]]
+// CHECK: ret i64 [[RESULT]]
+}
+
+typedef long long overaligned_long_long  __attribute__((aligned(32)));
+overaligned_long_long overaligned_long_long_test() {
+// CHECK-LABEL: define i64 @overaligned_long_long_test()
+  return va_arg(the_list, overaligned_long_long);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i64*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i64*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i64* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i64, i64* [[ADDR]]
+// CHECK: ret i64 [[RESULT]]
+}
+
+typedef __int128 underaligned_int128  __attribute__((packed,aligned(2)));
+underaligned_int128 underaligned_int128_test() {
+// CHECK-LABEL: define i128 @underaligned_int128_test()
+  return va_arg(the_list, underaligned_int128);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
+// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
+// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
+// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
+// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i128* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128, i128* [[ADDR]]
+// CHECK: ret i128 [[RESULT]]
+}
+
+typedef __int128 overaligned_int128  __attribute__((aligned(32)));
+overaligned_int128 overaligned_int128_test() {
+// CHECK-LABEL: define i128 @overaligned_int128_test()
+  return va_arg(the_list, overaligned_int128);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
+// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
+// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
+// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
+// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi i128* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+// CHECK: [[RESULT:%[a-z_0-9]+]] = load i128, i128* [[ADDR]]
+// CHECK: ret i128 [[RESULT]]
+}
+
+// The way that attributes applied to a struct change parameter passing is a
+// little strange, in that the alignment due to attributes is used when
+// calculating the size of the struct, but the alignment is based only on the
+// alignment of the members (which can be affected by attributes). What this
+// means is:
+//  * The only effect of the aligned attribute on a struct is to increase its
+//    size if the alignment is greater than the member alignment.
+//  * The packed attribute is considered as applying to the members, so it will
+//    affect the alignment.
+// Additionally the alignment can't go below 8 or above 16, so it's only
+// __int128 that can be affected by a change in alignment.
+
+typedef struct __attribute__((packed,aligned(2))) {
+  int val;
+} underaligned_int_struct;
+underaligned_int_struct underaligned_int_struct_test() {
+// CHECK-LABEL: define i64 @underaligned_int_struct_test()
+  return va_arg(the_list, underaligned_int_struct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int_struct*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int_struct*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.underaligned_int_struct* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct __attribute__((aligned(16))) {
+  int val;
+} overaligned_int_struct;
+overaligned_int_struct overaligned_int_struct_test() {
+// CHECK-LABEL: define i128 @overaligned_int_struct_test()
+  return va_arg(the_list, overaligned_int_struct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int_struct*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_int_struct*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.overaligned_int_struct* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct __attribute__((packed,aligned(2))) {
+  long long val;
+} underaligned_long_long_struct;
+underaligned_long_long_struct underaligned_long_long_struct_test() {
+// CHECK-LABEL: define i64 @underaligned_long_long_struct_test()
+  return va_arg(the_list, underaligned_long_long_struct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_long_long_struct*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_long_long_struct*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.underaligned_long_long_struct* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct __attribute__((aligned(16))) {
+  long long val;
+} overaligned_long_long_struct;
+overaligned_long_long_struct overaligned_long_long_struct_test() {
+// CHECK-LABEL: define i128 @overaligned_long_long_struct_test()
+  return va_arg(the_list, overaligned_long_long_struct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_long_long_struct*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_long_long_struct*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.overaligned_long_long_struct* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct __attribute__((packed,aligned(2))) {
+  __int128 val;
+} underaligned_int128_struct;
+underaligned_int128_struct underaligned_int128_struct_test() {
+// CHECK-LABEL: define [2 x i64] @underaligned_int128_struct_test()
+  return va_arg(the_list, underaligned_int128_struct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int128_struct*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int128_struct*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.underaligned_int128_struct* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+// Overaligning to 32 bytes causes it to be passed indirectly via a pointer
+typedef struct __attribute__((aligned(32))) {
+  __int128 val;
+} overaligned_int128_struct;
+overaligned_int128_struct overaligned_int128_struct_test() {
+// CHECK-LABEL: define void @overaligned_int128_struct_test(%struct.overaligned_int128_struct* noalias sret %agg.result)
+  return va_arg(the_list, overaligned_int128_struct);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int128_struct**
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_int128_struct**
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.overaligned_int128_struct** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+// Overaligning or underaligning a struct member changes both its alignment and
+// size when passed as an argument.
+
+typedef struct {
+  int val __attribute__((packed,aligned(2)));
+} underaligned_int_struct_member;
+underaligned_int_struct_member underaligned_int_struct_member_test() {
+// CHECK-LABEL: define i64 @underaligned_int_struct_member_test()
+  return va_arg(the_list, underaligned_int_struct_member);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int_struct_member*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int_struct_member*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.underaligned_int_struct_member* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct {
+  int val __attribute__((aligned(16)));
+} overaligned_int_struct_member;
+overaligned_int_struct_member overaligned_int_struct_member_test() {
+// CHECK-LABEL: define i128 @overaligned_int_struct_member_test()
+  return va_arg(the_list, overaligned_int_struct_member);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
+// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int_struct_member*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
+// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
+// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
+// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to %struct.overaligned_int_struct_member*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.overaligned_int_struct_member* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct {
+  long long val __attribute__((packed,aligned(2)));
+} underaligned_long_long_struct_member;
+underaligned_long_long_struct_member underaligned_long_long_struct_member_test() {
+// CHECK-LABEL: define i64 @underaligned_long_long_struct_member_test()
+  return va_arg(the_list, underaligned_long_long_struct_member);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_long_long_struct_member*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_long_long_struct_member*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.underaligned_long_long_struct_member* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct {
+  long long val __attribute__((aligned(16)));
+} overaligned_long_long_struct_member;
+overaligned_long_long_struct_member overaligned_long_long_struct_member_test() {
+// CHECK-LABEL: define i128 @overaligned_long_long_struct_member_test()
+  return va_arg(the_list, overaligned_long_long_struct_member);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15
+// CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_long_long_struct_member*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64
+// CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15
+// CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16
+// CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8*
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to %struct.overaligned_long_long_struct_member*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.overaligned_long_long_struct_member* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+typedef struct {
+  __int128 val __attribute__((packed,aligned(2)));
+} underaligned_int128_struct_member;
+underaligned_int128_struct_member underaligned_int128_struct_member_test() {
+// CHECK-LABEL: define [2 x i64] @underaligned_int128_struct_member_test()
+  return va_arg(the_list, underaligned_int128_struct_member);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int128_struct_member*
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int128_struct_member*
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.underaligned_int128_struct_member* [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
+// Overaligning to 32 bytes causes it to be passed indirectly via a pointer
+typedef struct {
+  __int128 val __attribute__((aligned(32)));
+} overaligned_int128_struct_member;
+overaligned_int128_struct_member overaligned_int128_struct_member_test() {
+// CHECK-LABEL: define void @overaligned_int128_struct_member_test(%struct.overaligned_int128_struct_member* noalias sret %agg.result)
+  return va_arg(the_list, overaligned_int128_struct_member);
+// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0
+// CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]]
+
+// CHECK: [[VAARG_MAYBE_REG]]
+// CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8
+// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3)
+// CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0
+// CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]]
+
+// CHECK: [[VAARG_IN_REG]]
+// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1)
+// CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]]
+// CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int128_struct_member**
+// CHECK: br label %[[VAARG_END:[a-z._0-9]+]]
+
+// CHECK: [[VAARG_ON_STACK]]
+// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8
+// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0)
+// CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_int128_struct_member**
+// CHECK: br label %[[VAARG_END]]
+
+// CHECK: [[VAARG_END]]
+// CHECK: [[ADDR:%[a-z._0-9]+]] = phi %struct.overaligned_int128_struct_member** [ [[FROMREG_ADDR]], %[[VAARG_IN_REG]] ], [ [[FROMSTACK_ADDR]], %[[VAARG_ON_STACK]] ]
+}
+
 void check_start(int n, ...) {
 // CHECK-LABEL: define void @check_start(i32 %n, ...)
 

Added: cfe/trunk/test/CodeGen/arm-varargs.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-varargs.c?rev=361372&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/arm-varargs.c (added)
+++ cfe/trunk/test/CodeGen/arm-varargs.c Wed May 22 04:42:54 2019
@@ -0,0 +1,322 @@
+// RUN: %clang_cc1 -triple arm-none-eabi -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-LE %s
+// RUN: %clang_cc1 -triple armeb-none-eabi -emit-llvm -o - %s | FileCheck --check-prefix=CHECK --check-prefix=CHECK-BE %s
+
+#include <stdarg.h>
+
+// Obviously there's more than one way to implement va_arg. This test should at
+// least prevent unintentional regressions caused by refactoring.
+
+va_list the_list;
+
+int simple_int(void) {
+// CHECK-LABEL: define i32 @simple_int
+  return va_arg(the_list, int);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 4
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to i32*
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i32, i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+struct bigstruct {
+  int a[10];
+};
+
+struct bigstruct simple_struct(void) {
+// CHECK-LABEL: define void @simple_struct(%struct.bigstruct* noalias sret %agg.result)
+  return va_arg(the_list, struct bigstruct);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 40
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.bigstruct*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.bigstruct* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.bigstruct* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 40, i1 false)
+// CHECK: ret void
+}
+
+struct aligned_bigstruct {
+  float a;
+  long double b;
+};
+
+struct aligned_bigstruct simple_aligned_struct(void) {
+// CHECK-LABEL: define void @simple_aligned_struct(%struct.aligned_bigstruct* noalias sret %agg.result)
+  return va_arg(the_list, struct aligned_bigstruct);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 16
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to %struct.aligned_bigstruct*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.aligned_bigstruct* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.aligned_bigstruct* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[DEST_ADDR]], i8* align 8 [[SRC_ADDR]], i32 16, i1 false)
+// CHECK: ret void
+}
+
+double simple_double(void) {
+// CHECK-LABEL: define double @simple_double
+  return va_arg(the_list, double);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 8
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to double*
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load double, double* [[ADDR]]
+// CHECK: ret double [[RESULT]]
+}
+
+struct hfa {
+  float a, b;
+};
+
+struct hfa simple_hfa(void) {
+// CHECK-LABEL: define void @simple_hfa(%struct.hfa* noalias sret %agg.result)
+  return va_arg(the_list, struct hfa);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.hfa*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.hfa* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.hfa* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 8, i1 false)
+// CHECK: ret void
+}
+
+// Over and under alignment on fundamental types has no effect on parameter
+// passing, so the code generated for va_arg should be the same as for
+// non-aligned fundamental types.
+
+typedef int underaligned_int __attribute__((packed,aligned(2)));
+underaligned_int underaligned_int_test() {
+// CHECK-LABEL: define i32 @underaligned_int_test()
+  return va_arg(the_list, underaligned_int);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 4
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to i32*
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i32, i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+typedef int overaligned_int __attribute__((aligned(32)));
+overaligned_int overaligned_int_test() {
+// CHECK-LABEL: define i32 @overaligned_int_test()
+  return va_arg(the_list, overaligned_int);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 4
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to i32*
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i32, i32* [[ADDR]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+typedef long long underaligned_long_long  __attribute__((packed,aligned(2)));
+underaligned_long_long underaligned_long_long_test() {
+// CHECK-LABEL: define i64 @underaligned_long_long_test()
+  return va_arg(the_list, underaligned_long_long);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 8
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to i64*
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i64, i64* [[ADDR]]
+// CHECK: ret i64 [[RESULT]]
+}
+
+typedef long long overaligned_long_long  __attribute__((aligned(32)));
+overaligned_long_long overaligned_long_long_test() {
+// CHECK-LABEL: define i64 @overaligned_long_long_test()
+  return va_arg(the_list, overaligned_long_long);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 8
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to i64*
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i64, i64* [[ADDR]]
+// CHECK: ret i64 [[RESULT]]
+}
+
+// The way that attributes applied to a struct change parameter passing is a
+// little strange, in that the alignment due to attributes is used when
+// calculating the size of the struct, but the alignment is based only on the
+// alignment of the members (which can be affected by attributes). What this
+// means is:
+//  * The only effect of the aligned attribute on a struct is to increase its
+//    size if the alignment is greater than the member alignment.
+//  * The packed attribute is considered as applying to the members, so it will
+//    affect the alignment.
+// Additionally the alignment can't go below 4 or above 8, so it's only
+// long long and double that can be affected by a change in alignment.
+
+typedef struct __attribute__((packed,aligned(2))) {
+  int val;
+} underaligned_int_struct;
+underaligned_int_struct underaligned_int_struct_test() {
+// CHECK-LABEL: define i32 @underaligned_int_struct_test()
+  return va_arg(the_list, underaligned_int_struct);
+// CHECK: [[RETVAL:%[a-z0-9._]+]] = alloca %struct.underaligned_int_struct, align 2
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 4
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.underaligned_int_struct*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_int_struct* [[RETVAL]] to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_int_struct* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 4, i1 false)
+// CHECK: [[COERCE:%[a-z0-9._]+]] = getelementptr inbounds %struct.underaligned_int_struct, %struct.underaligned_int_struct* [[RETVAL]], i32 0, i32 0
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i32, i32* [[COERCE]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+typedef struct __attribute__((aligned(16))) {
+  int val;
+} overaligned_int_struct;
+overaligned_int_struct overaligned_int_struct_test() {
+// CHECK-LABEL: define void @overaligned_int_struct_test(%struct.overaligned_int_struct* noalias sret %agg.result)
+  return va_arg(the_list, overaligned_int_struct);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 16
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.overaligned_int_struct*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_int_struct* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_int_struct* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 16, i1 false)
+// CHECK: ret void
+}
+
+typedef struct __attribute__((packed,aligned(2))) {
+  long long val;
+} underaligned_long_long_struct;
+underaligned_long_long_struct underaligned_long_long_struct_test() {
+// CHECK-LABEL: define void @underaligned_long_long_struct_test(%struct.underaligned_long_long_struct* noalias sret %agg.result)
+  return va_arg(the_list, underaligned_long_long_struct);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.underaligned_long_long_struct*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_long_long_struct* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_long_long_struct* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 8, i1 false)
+// CHECK: ret void
+}
+
+typedef struct __attribute__((aligned(16))) {
+  long long val;
+} overaligned_long_long_struct;
+overaligned_long_long_struct overaligned_long_long_struct_test() {
+// CHECK-LABEL: define void @overaligned_long_long_struct_test(%struct.overaligned_long_long_struct* noalias sret %agg.result)
+  return va_arg(the_list, overaligned_long_long_struct);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 16
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to %struct.overaligned_long_long_struct*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_long_long_struct* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_long_long_struct* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[DEST_ADDR]], i8* align 8 [[SRC_ADDR]], i32 16, i1 false)
+// CHECK: ret void
+}
+
+// Overaligning or underaligning a struct member changes both its alignment and
+// size when passed as an argument.
+
+typedef struct {
+  int val __attribute__((packed,aligned(2)));
+} underaligned_int_struct_member;
+underaligned_int_struct_member underaligned_int_struct_member_test() {
+// CHECK-LABEL: define i32 @underaligned_int_struct_member_test()
+  return va_arg(the_list, underaligned_int_struct_member);
+// CHECK: [[RETVAL:%[a-z0-9._]+]] = alloca %struct.underaligned_int_struct_member, align 2
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 4
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.underaligned_int_struct_member*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_int_struct_member* [[RETVAL]] to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_int_struct_member* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 4, i1 false)
+// CHECK: [[COERCE:%[a-z0-9._]+]] = getelementptr inbounds %struct.underaligned_int_struct_member, %struct.underaligned_int_struct_member* [[RETVAL]], i32 0, i32 0
+// CHECK: [[RESULT:%[a-z0-9._]+]] = load i32, i32* [[COERCE]]
+// CHECK: ret i32 [[RESULT]]
+}
+
+typedef struct {
+  int val __attribute__((aligned(16)));
+} overaligned_int_struct_member;
+overaligned_int_struct_member overaligned_int_struct_member_test() {
+// CHECK-LABEL: define void @overaligned_int_struct_member_test(%struct.overaligned_int_struct_member* noalias sret %agg.result)
+  return va_arg(the_list, overaligned_int_struct_member);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 16
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to %struct.overaligned_int_struct_member*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_int_struct_member* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_int_struct_member* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[DEST_ADDR]], i8* align 8 [[SRC_ADDR]], i32 16, i1 false)
+// CHECK: ret void
+}
+
+typedef struct {
+  long long val __attribute__((packed,aligned(2)));
+} underaligned_long_long_struct_member;
+underaligned_long_long_struct_member underaligned_long_long_struct_member_test() {
+// CHECK-LABEL: define void @underaligned_long_long_struct_member_test(%struct.underaligned_long_long_struct_member* noalias sret %agg.result)
+  return va_arg(the_list, underaligned_long_long_struct_member);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR]], i32 8
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR]] to %struct.underaligned_long_long_struct_member*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_long_long_struct_member* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.underaligned_long_long_struct_member* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 [[DEST_ADDR]], i8* align 4 [[SRC_ADDR]], i32 8, i1 false)
+// CHECK: ret void
+}
+
+typedef struct {
+  long long val __attribute__((aligned(16)));
+} overaligned_long_long_struct_member;
+overaligned_long_long_struct_member overaligned_long_long_struct_member_test() {
+// CHECK-LABEL: define void @overaligned_long_long_struct_member_test(%struct.overaligned_long_long_struct_member* noalias sret %agg.result)
+  return va_arg(the_list, overaligned_long_long_struct_member);
+// CHECK: [[CUR:%[a-z0-9._]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[CUR_INT:%[a-z0-9._]+]] = ptrtoint i8* [[CUR]] to i32
+// CHECK: [[CUR_INT_ADD:%[a-z0-9._]+]] = add i32 [[CUR_INT]], 7
+// CHECK: [[CUR_INT_ALIGNED:%[a-z0-9._]+]] = and i32 [[CUR_INT_ADD]], -8
+// CHECK: [[CUR_ALIGNED:%[a-z0-9._]+]] = inttoptr i32 [[CUR_INT_ALIGNED]] to i8*
+// CHECK: [[NEXT:%[a-z0-9._]+]] = getelementptr inbounds i8, i8* [[CUR_ALIGNED]], i32 16
+// CHECK: store i8* [[NEXT]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0), align 4
+// CHECK: [[ADDR:%[a-z0-9._]+]] = bitcast i8* [[CUR_ALIGNED]] to %struct.overaligned_long_long_struct_member*
+// CHECK: [[DEST_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_long_long_struct_member* %agg.result to i8*
+// CHECK: [[SRC_ADDR:%[a-z0-9._]+]] = bitcast %struct.overaligned_long_long_struct_member* [[ADDR]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[DEST_ADDR]], i8* align 8 [[SRC_ADDR]], i32 16, i1 false)
+// CHECK: ret void
+}
+
+void check_start(int n, ...) {
+// CHECK-LABEL: define void @check_start(i32 %n, ...)
+
+  va_list the_list;
+  va_start(the_list, n);
+// CHECK: [[THE_LIST:%[a-z0-9._]+]] = alloca %struct.__va_list
+// CHECK: [[VOIDP_THE_LIST:%[a-z0-9._]+]] = bitcast %struct.__va_list* [[THE_LIST]] to i8*
+// CHECK: call void @llvm.va_start(i8* [[VOIDP_THE_LIST]])
+}




More information about the cfe-commits mailing list