r222904 - AArch64: simplify PCS mapping.

Thu Nov 27 13:02:50 PST 2014

Author: tnorthover
Date: Thu Nov 27 15:02:49 2014
New Revision: 222904

URL: http://llvm.org/viewvc/llvm-project?rev=222904&view=rev
Log:
AArch64: simplify PCS mapping.

Now that LLVM can count the registers needed to implement AAPCS rules, we don't
need to duplicate that logic here. This means we can drop the explicit padding
and also use more natural types in many cases (e.g. "struct { float arr[3]; }"
used to end up as "[2 x double]" to avoid holes on the stack.

The one wrinkle is that AAPCS va_arg was also using the register counting
machinery. But the local replacement isn't too bad.

Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/arm-aapcs-vfp.c
    cfe/trunk/test/CodeGen/arm-homogenous.c
    cfe/trunk/test/CodeGen/arm64-aapcs-arguments.c
    cfe/trunk/test/CodeGen/arm64-arguments.c
    cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=222904&r1=222903&r2=222904&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Thu Nov 27 15:02:49 2014
@@ -3813,9 +3813,7 @@ private:
   bool isDarwinPCS() const { return Kind == DarwinPCS; }
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
-                                  bool &IsHA, unsigned &AllocatedGPR,
-                                  bool &IsSmallAggr, bool IsNamedArg) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
@@ -3823,68 +3821,11 @@ private:
   bool isIllegalVectorType(QualType Ty) const;
 
   void computeInfo(CGFunctionInfo &FI) const override {
-    // To correctly handle Homogeneous Aggregate, we need to keep track of the
-    // number of SIMD and Floating-point registers allocated so far.
-    // If the argument is an HFA or an HVA and there are sufficient unallocated
-    // SIMD and Floating-point registers, then the argument is allocated to SIMD
-    // and Floating-point Registers (with one register per member of the HFA or
-    // HVA). Otherwise, the NSRN is set to 8.
-    unsigned AllocatedVFP = 0;
-
-    // To correctly handle small aggregates, we need to keep track of the number
-    // of GPRs allocated so far. If the small aggregate can't all fit into
-    // registers, it will be on stack. We don't allow the aggregate to be
-    // partially in registers.
-    unsigned AllocatedGPR = 0;
-
-    // Find the number of named arguments. Variadic arguments get special
-    // treatment with the Darwin ABI.
-    unsigned NumRequiredArgs = FI.getNumRequiredArgs();
-
     if (!getCXXABI().classifyReturnType(FI))
       FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
-    unsigned ArgNo = 0;
-    for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-         it != ie; ++it, ++ArgNo) {
-      unsigned PreAllocation = AllocatedVFP, PreGPR = AllocatedGPR;
-      bool IsHA = false, IsSmallAggr = false;
-      const unsigned NumVFPs = 8;
-      const unsigned NumGPRs = 8;
-      bool IsNamedArg = ArgNo < NumRequiredArgs;
-      it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA,
-                                      AllocatedGPR, IsSmallAggr, IsNamedArg);
-
-      // Under AAPCS the 64-bit stack slot alignment means we can't pass HAs
-      // as sequences of floats since they'll get "holes" inserted as
-      // padding by the back end.
-      if (IsHA && AllocatedVFP > NumVFPs && !isDarwinPCS() &&
-          getContext().getTypeAlign(it->type) < 64) {
-        uint32_t NumStackSlots = getContext().getTypeSize(it->type);
-        NumStackSlots = llvm::RoundUpToAlignment(NumStackSlots, 64) / 64;
-
-        llvm::Type *CoerceTy = llvm::ArrayType::get(
-            llvm::Type::getDoubleTy(getVMContext()), NumStackSlots);
-        it->info = ABIArgInfo::getDirect(CoerceTy);
-      }
-
-      // If we do not have enough VFP registers for the HA, any VFP registers
-      // that are unallocated are marked as unavailable. To achieve this, we add
-      // padding of (NumVFPs - PreAllocation) floats.
-      if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
-        it->info.setPaddingType(PaddingTy);
-      }
-
-      // If we do not have enough GPRs for the small aggregate, any GPR regs
-      // that are unallocated are marked as unavailable.
-      if (IsSmallAggr && AllocatedGPR > NumGPRs && PreGPR < NumGPRs) {
-        llvm::Type *PaddingTy = llvm::ArrayType::get(
-            llvm::Type::getInt32Ty(getVMContext()), NumGPRs - PreGPR);
-        it->info =
-            ABIArgInfo::getDirect(it->info.getCoerceToType(), 0, PaddingTy);
-      }
-    }
+
+    for (auto &it : FI.arguments())
+      it.info = classifyArgumentType(it.type);
   }
 
   llvm::Value *EmitDarwinVAArg(llvm::Value *VAListAddr, QualType Ty,
@@ -3915,12 +3856,7 @@ public:
 };
 }
 
-ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
-                                                unsigned &AllocatedVFP,
-                                                bool &IsHA,
-                                                unsigned &AllocatedGPR,
-                                                bool &IsSmallAggr,
-                                                bool IsNamedArg) const {
+ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
@@ -3928,48 +3864,26 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
     uint64_t Size = getContext().getTypeSize(Ty);
     if (Size <= 32) {
       llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
-      AllocatedGPR++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 64) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
-      AllocatedVFP++;
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType =
           llvm::VectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
-      AllocatedVFP++;
       return ABIArgInfo::getDirect(ResType);
     }
-    AllocatedGPR++;
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
-  if (Ty->isVectorType())
-    // Size of a legal vector should be either 64 or 128.
-    AllocatedVFP++;
-  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->getKind() == BuiltinType::Half ||
-        BT->getKind() == BuiltinType::Float ||
-        BT->getKind() == BuiltinType::Double ||
-        BT->getKind() == BuiltinType::LongDouble)
-      AllocatedVFP++;
-  }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
       Ty = EnumTy->getDecl()->getIntegerType();
 
-    if (!Ty->isFloatingType() && !Ty->isVectorType()) {
-      unsigned Alignment = getContext().getTypeAlign(Ty);
-      if (!isDarwinPCS() && Alignment > 64)
-        AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
-      int RegsNeeded = getContext().getTypeSize(Ty) > 64 ? 2 : 1;
-      AllocatedGPR += RegsNeeded;
-    }
     return (Ty->isPromotableIntegerType() && isDarwinPCS()
                 ? ABIArgInfo::getExtend()
                 : ABIArgInfo::getDirect());
@@ -3978,9 +3892,8 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
   // Structures with either a non-trivial destructor or a non-trivial
   // copy constructor are always indirect.
   if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) {
-    AllocatedGPR++;
     return ABIArgInfo::getIndirect(0, /*ByVal=*/RAA ==
-                                          CGCXXABI::RAA_DirectInMemory);
+                                   CGCXXABI::RAA_DirectInMemory);
   }
 
   // Empty records are always ignored on Darwin, but actually passed in C++ mode
@@ -3989,7 +3902,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
     if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
       return ABIArgInfo::getIgnore();
 
-    ++AllocatedGPR;
     return ABIArgInfo::getDirect(llvm::Type::getInt8Ty(getVMContext()));
   }
 
@@ -3997,28 +3909,16 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
   const Type *Base = nullptr;
   uint64_t Members = 0;
   if (isHomogeneousAggregate(Ty, Base, Members)) {
-    IsHA = true;
-    if (!IsNamedArg && isDarwinPCS()) {
-      // With the Darwin ABI, variadic arguments are always passed on the stack
-      // and should not be expanded. Treat variadic HFAs as arrays of doubles.
-      uint64_t Size = getContext().getTypeSize(Ty);
-      llvm::Type *BaseTy = llvm::Type::getDoubleTy(getVMContext());
-      return ABIArgInfo::getDirect(llvm::ArrayType::get(BaseTy, Size / 64));
-    }
-    AllocatedVFP += Members;
-    return ABIArgInfo::getExpand();
+    return ABIArgInfo::getDirect(
+        llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
   }
 
   // Aggregates <= 16 bytes are passed directly in registers or on the stack.
   uint64_t Size = getContext().getTypeSize(Ty);
   if (Size <= 128) {
     unsigned Alignment = getContext().getTypeAlign(Ty);
-    if (!isDarwinPCS() && Alignment > 64)
-      AllocatedGPR = llvm::RoundUpToAlignment(AllocatedGPR, Alignment / 64);
-
     Size = 64 * ((Size + 63) / 64); // round up to multiple of 8 bytes
-    AllocatedGPR += Size / 64;
-    IsSmallAggr = true;
+
     // We use a pair of i64 for 16-byte aggregate with 8-byte alignment.
     // For aggregates with 16-byte alignment, we use i128.
     if (Alignment < 128 && Size == 128) {
@@ -4028,7 +3928,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgum
     return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(), Size));
   }
 
-  AllocatedGPR++;
   return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 }
 
@@ -4104,14 +4003,25 @@ bool AArch64ABIInfo::isHomogeneousAggreg
   return Members <= 4;
 }
 
-llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr, QualType Ty,
-                                          CodeGenFunction &CGF) const {
-  unsigned AllocatedGPR = 0, AllocatedVFP = 0;
-  bool IsHA = false, IsSmallAggr = false;
-  ABIArgInfo AI = classifyArgumentType(Ty, AllocatedVFP, IsHA, AllocatedGPR,
-                                       IsSmallAggr, false /*IsNamedArg*/);
+llvm::Value *AArch64ABIInfo::EmitAAPCSVAArg(llvm::Value *VAListAddr,
+                                            QualType Ty,
+                                            CodeGenFunction &CGF) const {
+  ABIArgInfo AI = classifyArgumentType(Ty);
   bool IsIndirect = AI.isIndirect();
 
+  llvm::Type *BaseTy = CGF.ConvertType(Ty);
+  if (IsIndirect)
+    BaseTy = llvm::PointerType::getUnqual(BaseTy);
+  else if (AI.getCoerceToType())
+    BaseTy = AI.getCoerceToType();
+
+  unsigned NumRegs = 1;
+  if (llvm::ArrayType *ArrTy = dyn_cast<llvm::ArrayType>(BaseTy)) {
+    BaseTy = ArrTy->getElementType();
+    NumRegs = ArrTy->getNumElements();
+  }
+  bool IsFPR = BaseTy->isFloatingPointTy() || BaseTy->isVectorTy();
+
   // The AArch64 va_list type and handling is specified in the Procedure Call
   // Standard, section B.4:
   //
@@ -4131,21 +4041,19 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
 
   llvm::Value *reg_offs_p = nullptr, *reg_offs = nullptr;
   int reg_top_index;
-  int RegSize;
-  if (AllocatedGPR) {
-    assert(!AllocatedVFP && "Arguments never split between int & VFP regs");
+  int RegSize = IsIndirect ? 8 : getContext().getTypeSize(Ty) / 8;
+  if (!IsFPR) {
     // 3 is the field number of __gr_offs
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 3, "gr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "gr_offs");
     reg_top_index = 1; // field number for __gr_top
-    RegSize = 8 * AllocatedGPR;
+    RegSize = llvm::RoundUpToAlignment(RegSize, 8);
   } else {
-    assert(!AllocatedGPR && "Argument must go in VFP or int regs");
     // 4 is the field number of __vr_offs.
     reg_offs_p = CGF.Builder.CreateStructGEP(VAListAddr, 4, "vr_offs_p");
     reg_offs = CGF.Builder.CreateLoad(reg_offs_p, "vr_offs");
     reg_top_index = 2; // field number for __vr_top
-    RegSize = 16 * AllocatedVFP;
+    RegSize = 16 * NumRegs;
   }
 
   //=======================================
@@ -4169,7 +4077,7 @@ llvm::Value *AArch64ABIInfo::EmitAAPCSVA
   // Integer arguments may need to correct register alignment (for example a
   // "struct { __int128 a; };" gets passed in x_2N, x_{2N+1}). In this case we
   // align __gr_offs to calculate the potential address.
-  if (AllocatedGPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
+  if (!IsFPR && !IsIndirect && Ctx.getTypeAlign(Ty) > 64) {
     int Align = Ctx.getTypeAlign(Ty) / 8;
 
     reg_offs = CGF.Builder.CreateAdd(

Modified: cfe/trunk/test/CodeGen/arm-aapcs-vfp.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-aapcs-vfp.c?rev=222904&r1=222903&r2=222904&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-aapcs-vfp.c (original)
+++ cfe/trunk/test/CodeGen/arm-aapcs-vfp.c Thu Nov 27 15:02:49 2014
@@ -29,7 +29,7 @@ struct homogeneous_struct {
   float f4;
 };
 // CHECK: define arm_aapcs_vfpcc %struct.homogeneous_struct @test_struct(%struct.homogeneous_struct %{{.*}})
-// CHECK64: define %struct.homogeneous_struct @test_struct(float %{{.*}}, float %{{.*}}, float %{{.*}}, float %{{.*}})
+// CHECK64: define %struct.homogeneous_struct @test_struct([4 x float] %{{.*}})
 extern struct homogeneous_struct struct_callee(struct homogeneous_struct);
 struct homogeneous_struct test_struct(struct homogeneous_struct arg) {
   return struct_callee(arg);
@@ -44,7 +44,7 @@ struct nested_array {
   double d[4];
 };
 // CHECK: define arm_aapcs_vfpcc void @test_array(%struct.nested_array %{{.*}})
-// CHECK64: define void @test_array(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}})
+// CHECK64: define void @test_array([4 x double] %{{.*}})
 extern void array_callee(struct nested_array);
 void test_array(struct nested_array arg) {
   array_callee(arg);
@@ -52,7 +52,7 @@ void test_array(struct nested_array arg)
 
 extern void complex_callee(__complex__ double);
 // CHECK: define arm_aapcs_vfpcc void @test_complex({ double, double } %{{.*}})
-// CHECK64: define void @test_complex(double %{{.*}}, double %{{.*}})
+// CHECK64: define void @test_complex([2 x double] %cd.coerce)
 void test_complex(__complex__ double cd) {
   complex_callee(cd);
 }
@@ -98,7 +98,7 @@ void test_hetero(struct heterogeneous_st
 
 // Neon multi-vector types are homogeneous aggregates.
 // CHECK: define arm_aapcs_vfpcc <16 x i8> @f0(%struct.int8x16x4_t %{{.*}})
-// CHECK64: define <16 x i8> @f0(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+// CHECK64: define <16 x i8> @f0([4 x <16 x i8>] %{{.*}})
 int8x16_t f0(int8x16x4_t v4) {
   return vaddq_s8(v4.val[0], v4.val[3]);
 }
@@ -112,7 +112,7 @@ struct neon_struct {
   int16x4_t v4;
 };
 // CHECK: define arm_aapcs_vfpcc void @test_neon(%struct.neon_struct %{{.*}})
-// CHECK64: define void @test_neon(<8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <2 x i32> %{{.*}}, <4 x i16> %{{.*}})
+// CHECK64: define void @test_neon([4 x <8 x i8>] %{{.*}})
 extern void neon_callee(struct neon_struct);
 void test_neon(struct neon_struct arg) {
   neon_callee(arg);

Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=222904&r1=222903&r2=222904&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
+++ cfe/trunk/test/CodeGen/arm-homogenous.c Thu Nov 27 15:02:49 2014
@@ -5,7 +5,7 @@
 // RUN:  -ffreestanding -emit-llvm -w -o - %s | FileCheck -check-prefix=CHECK64 %s
 
 // RUN: %clang_cc1 -triple arm64-linux-gnu -ffreestanding -emit-llvm -w -o - %s \
-// RUN:   | FileCheck --check-prefix=CHECK64-AAPCS %s
+// RUN:   | FileCheck --check-prefix=CHECK64 %s
 typedef long long int64_t;
 typedef unsigned int uint32_t;
 
@@ -176,9 +176,7 @@ void test_struct_of_four_doubles(void) {
 // CHECK: test_struct_of_four_doubles
 // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, %struct.struct_of_four_doubles {{.*}}, %struct.struct_of_four_doubles {{.*}}, double {{.*}})
 // CHECK64: test_struct_of_four_doubles
-// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
-// CHECK64-AAPCS: test_struct_of_four_doubles
-// CHECK64-AAPCS: call void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [3 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+// CHECK64: call void @takes_struct_of_four_doubles(double {{.*}}, [4 x double] {{.*}}, [4 x double] {{.*}}, double {{.*}})
   takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
 }
 
@@ -212,9 +210,7 @@ void test_struct_of_vecs(void) {
 // CHECK: test_struct_of_vecs
 // CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, %struct.struct_of_vecs {{.*}}, %struct.struct_of_vecs {{.*}}, double {{.*}})
 // CHECK64: test_struct_of_vecs
-// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
-// CHECK64-AAPCS: test_struct_of_vecs
-// CHECK64-AAPCS: call void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [3 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
+// CHECK64: call void @takes_struct_of_vecs(double {{.*}}, [4 x <8 x i8>] {{.*}}, [4 x <8 x i8>] {{.*}}, double {{.*}})
   takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
 }
 

Modified: cfe/trunk/test/CodeGen/arm64-aapcs-arguments.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm64-aapcs-arguments.c?rev=222904&r1=222903&r2=222904&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm64-aapcs-arguments.c (original)
+++ cfe/trunk/test/CodeGen/arm64-aapcs-arguments.c Thu Nov 27 15:02:49 2014
@@ -17,7 +17,7 @@ void test2(int x0, Small x2_x3, int x4,
 // stack in order to avoid holes. Make sure we get all of them, and not just the
 // first:
 
-// CHECK: void @test3(float %s0_s3.0, float %s0_s3.1, float %s0_s3.2, float %s0_s3.3, float %s4, [3 x float], [2 x double] %sp.coerce, [2 x double] %sp16.coerce)
+// CHECK: void @test3([4 x float] %s0_s3.coerce, float %s4, [4 x float] %sp.coerce, [4 x float] %sp16.coerce)
 typedef struct { float arr[4]; } HFA;
 void test3(HFA s0_s3, float s4, HFA sp, HFA sp16) {
 }
@@ -28,7 +28,7 @@ void test3(HFA s0_s3, float s4, HFA sp,
 // fp128] or something, but leaving them as-is retains more information for
 // users to debug.
 
-//  CHECK: void @test4(<16 x i8> %v0_v2.0, <16 x i8> %v0_v2.1, <16 x i8> %v0_v2.2, <16 x i8> %v3_v5.0, <16 x i8> %v3_v5.1, <16 x i8> %v3_v5.2, [2 x float], <16 x i8> %sp.0, <16 x i8> %sp.1, <16 x i8> %sp.2, double %sp48, <16 x i8> %sp64.0, <16 x i8> %sp64.1, <16 x i8> %sp64.2)
+//  CHECK: void @test4([3 x <16 x i8>] %v0_v2.coerce, [3 x <16 x i8>] %v3_v5.coerce, [3 x <16 x i8>] %sp.coerce, double %sp48, [3 x <16 x i8>] %sp64.coerce)
 typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
 typedef struct { int8x16_t arr[3]; } BigHFA;
 void test4(BigHFA v0_v2, BigHFA v3_v5, BigHFA sp, double sp48, BigHFA sp64) {
@@ -46,6 +46,6 @@ unsigned char test5(unsigned char a, sig
 __fp16 test_half(__fp16 A) { }
 
 // __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
-// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+// CHECK: define %struct.HFA_half @test_half_hfa([4 x half] %{{.*}})
 struct HFA_half { __fp16 a[4]; };
 struct HFA_half test_half_hfa(struct HFA_half A) { }

Modified: cfe/trunk/test/CodeGen/arm64-arguments.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm64-arguments.c?rev=222904&r1=222903&r2=222904&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm64-arguments.c (original)
+++ cfe/trunk/test/CodeGen/arm64-arguments.c Thu Nov 27 15:02:49 2014
@@ -123,8 +123,7 @@ void f31(struct s31 s) { }
 
 struct s32 { double x; };
 void f32(struct s32 s) { }
-// Expand Homogeneous Aggregate.
-// CHECK: @f32(double %{{.*}})
+// CHECK: @f32([1 x double] %{{.*}})
 
 // A composite type larger than 16 bytes should be passed indirectly.
 struct s33 { char buf[32*32]; };
@@ -197,7 +196,7 @@ typedef struct s35 s35_with_align;
 
 typedef __attribute__((neon_vector_type(4))) float float32x4_t;
 float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
-// CHECK: define <4 x float> @f35(i32 %i, float %s1.0, float %s1.1, float %s1.2, float %s1.3, float %s2.0, float %s2.1, float %s2.2, float %s2.3)
+// CHECK: define <4 x float> @f35(i32 %i, [4 x float] %s1.coerce, [4 x float] %s2.coerce)
 // CHECK: %s1 = alloca %struct.s35, align 16
 // CHECK: %s2 = alloca %struct.s35, align 16
 // CHECK: %[[a:.*]] = bitcast %struct.s35* %s1 to <4 x float>*
@@ -598,24 +597,24 @@ int caller43_stack() {
 __attribute__ ((noinline))
 int f40_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
               s40_no_align s1, s40_no_align s2) {
-// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
+// CHECK: define i32 @f40_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [2 x i64] %s1.coerce, [2 x i64] %s2.coerce)
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 }
 int caller40_split() {
 // CHECK: define i32 @caller40_split()
-// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
+// CHECK: call i32 @f40_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [2 x i64] %{{.*}} [2 x i64] %{{.*}})
   return f40_split(1, 2, 3, 4, 5, 6, 7, g40, g40_2);
 }
 
 __attribute__ ((noinline))
 int f41_split(int i, int i2, int i3, int i4, int i5, int i6, int i7,
               s41_with_align s1, s41_with_align s2) {
-// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, [1 x i32], i128 %s1.coerce, i128 %s2.coerce)
+// CHECK: define i32 @f41_split(i32 %i, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i128 %s1.coerce, i128 %s2.coerce)
   return s1.i + s2.i + i + i2 + i3 + i4 + i5 + i6 + i7 + s1.s + s2.s;
 }
 int caller41_split() {
 // CHECK: define i32 @caller41_split()
-// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, [1 x i32] undef, i128 %{{.*}}, i128 %{{.*}})
+// CHECK: call i32 @f41_split(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i128 %{{.*}}, i128 %{{.*}})
   return f41_split(1, 2, 3, 4, 5, 6, 7, g41, g41_2);
 }
 
@@ -642,7 +641,7 @@ float test_hfa(int n, ...) {
 
 float test_hfa_call(struct HFA *a) {
 // CHECK-LABEL: define float @test_hfa_call(%struct.HFA* %a)
-// CHECK: call float (i32, ...)* @test_hfa(i32 1, [2 x double] {{.*}})
+// CHECK: call float (i32, ...)* @test_hfa(i32 1, [4 x float] {{.*}})
   test_hfa(1, *a);
 }
 

Modified: cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp?rev=222904&r1=222903&r2=222904&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp (original)
+++ cfe/trunk/test/CodeGenCXX/homogeneous-aggregates.cpp Thu Nov 27 15:02:49 2014
@@ -46,7 +46,7 @@ D1 CC func_D1(D1 x) { return x; }
 
 // PPC: define [3 x double] @_Z7func_D22D2([3 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D2 @_Z7func_D22D2(%struct.D2 %x.coerce)
-// ARM64: define %struct.D2 @_Z7func_D22D2(double %x.0, double %x.1, double %x.2)
+// ARM64: define %struct.D2 @_Z7func_D22D2([3 x double] %x.coerce)
 // X64: define x86_vectorcallcc %struct.D2 @"\01_Z7func_D22D2@@24"(double %x.0, double %x.1, double %x.2)
 D2 CC func_D2(D2 x) { return x; }
 
@@ -57,7 +57,7 @@ D3 CC func_D3(D3 x) { return x; }
 
 // PPC: define [4 x double] @_Z7func_D42D4([4 x double] %x.coerce)
 // ARM32: define arm_aapcs_vfpcc %struct.D4 @_Z7func_D42D4(%struct.D4 %x.coerce)
-// ARM64: define %struct.D4 @_Z7func_D42D4(double %x.0, double %x.1, double %x.2, double %x.3)
+// ARM64: define %struct.D4 @_Z7func_D42D4([4 x double] %x.coerce)
 D4 CC func_D4(D4 x) { return x; }
 
 D5 CC func_D5(D5 x) { return x; }
@@ -67,17 +67,9 @@ D5 CC func_D5(D5 x) { return x; }
 // The C++ multiple inheritance expansion case is a little more complicated, so
 // do some extra checking.
 //
-// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5(double %x.0, double %x.1, double %x.2)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.0, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.1, double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: store double %x.2, double*
+// ARM64-LABEL: define %struct.D5 @_Z7func_D52D5([3 x double] %x.coerce)
+// ARM64: bitcast %struct.D5* %{{.*}} to [3 x double]*
+// ARM64: store [3 x double] %x.coerce, [3 x double]*
 
 void call_D5(D5 *p) {
   func_D5(*p);
@@ -86,21 +78,8 @@ void call_D5(D5 *p) {
 // Check the call site.
 //
 // ARM64-LABEL: define void @_Z7call_D5P2D5(%struct.D5* %p)
-// ARM64: bitcast %struct.D5* %{{.*}} to %struct.I1*
-// ARM64: bitcast %struct.I1* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 8
-// ARM64: bitcast i8* %{{.*}} to %struct.I2*
-// ARM64: bitcast %struct.I2* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: getelementptr inbounds i8* %{{.*}}, i64 16
-// ARM64: bitcast i8* %{{.*}} to %struct.I3*
-// ARM64: bitcast %struct.I3* %{{.*}} to %struct.Base2*
-// ARM64: getelementptr inbounds %struct.Base2* %{{.*}}, i32 0, i32 0
-// ARM64: load double*
-// ARM64: call %struct.D5 @_Z7func_D52D5(double %{{.*}}, double %{{.*}}, double %{{.*}})
+// ARM64: load [3 x double]*
+// ARM64: call %struct.D5 @_Z7func_D52D5([3 x double] %{{.*}})
 
 struct Empty { };
 struct Float1 { float x; };
@@ -108,7 +87,7 @@ struct Float2 { float y; };
 struct HVAWithEmptyBase : Float1, Empty, Float2 { float z; };
 
 // PPC: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
-// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z15with_empty_base16HVAWithEmptyBase([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z15with_empty_base16HVAWithEmptyBase(%struct.HVAWithEmptyBase %a.coerce)
 void CC with_empty_base(HVAWithEmptyBase a) {}
 
@@ -121,7 +100,7 @@ struct HVAWithEmptyBitField : Float1, Fl
 };
 
 // PPC: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
-// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField(float %a.0, float %a.1, float %a.2)
+// ARM64: define void @_Z19with_empty_bitfield20HVAWithEmptyBitField([3 x float] %a.coerce)
 // ARM32: define arm_aapcs_vfpcc void @_Z19with_empty_bitfield20HVAWithEmptyBitField(%struct.HVAWithEmptyBitField %a.coerce)
 // X64: define x86_vectorcallcc void @"\01_Z19with_empty_bitfield20HVAWithEmptyBitField@@16"(float %a.0, float %a.1, float %a.2)
 void CC with_empty_bitfield(HVAWithEmptyBitField a) {}