r213494 - [PowerPC] Support the ELFv2 ABI

Sun Jul 20 17:48:10 PDT 2014

Author: uweigand
Date: Sun Jul 20 19:48:09 2014
New Revision: 213494

URL: http://llvm.org/viewvc/llvm-project?rev=213494&view=rev
Log:
[PowerPC] Support the ELFv2 ABI

This patch implements clang support for the PowerPC ELFv2 ABI.
Together with a series of companion patches in LLVM, this makes
clang/LLVM fully usable on powerpc64le-linux.

Most of the ELFv2 ABI changes are fully implemented on the LLVM side.
On the clang side, we only need to implement some changes in how
aggregate types are passed by value.   Specifically, we need to:
- pass (and return) "homogeneous" floating-point or vector aggregates in
  FPRs and VRs (this is similar to the ARM homogeneous aggregate ABI)
- return aggregates of up to 16 bytes in one or two GPRs

The second piece is trivial to implement in any case.  To implement
the first piece, this patch makes use of infrastructure recently
enabled in the LLVM PowerPC back-end to support passing array types
directly, where the array element type encodes properties needed to
handle homogeneous aggregates correctly.

Specifically, the array element type encodes:
- whether the parameter should be passed in FPRs, VRs, or just
  GPRs/stack slots  (for float / vector / integer element types,
  respectively)
- what the alignment requirements of the parameter are when passed in
  GPRs/stack slots  (8 for float / 16 for vector / the element type
  size for integer element types) -- this corresponds to the
  "byval align" field

With this support in place, the clang part simply needs to *detect*
whether an aggregate type implements a float / vector homogeneous
aggregate as defined by the ELFv2 ABI, and if so, pass/return it
as array type using the appropriate float / vector element type.


Added:
    cfe/trunk/test/CodeGen/ppc64le-aggregates.c
Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=213494&r1=213493&r2=213494&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Sun Jul 20 19:48:09 2014
@@ -2898,12 +2898,24 @@ PPC32TargetCodeGenInfo::initDwarfEHRegSi
 namespace {
 /// PPC64_SVR4_ABIInfo - The 64-bit PowerPC ELF (SVR4) ABI information.
 class PPC64_SVR4_ABIInfo : public DefaultABIInfo {
+public:
+  enum ABIKind {
+    ELFv1 = 0,
+    ELFv2
+  };
+
+private:
+  static const unsigned GPRBits = 64;
+  ABIKind Kind;
 
 public:
-  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
+  PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT, ABIKind Kind)
+    : DefaultABIInfo(CGT), Kind(Kind) {}
 
   bool isPromotableTypeForABI(QualType Ty) const;
   bool isAlignedParamType(QualType Ty) const;
+  bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                              uint64_t &Members) const;
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType Ty) const;
@@ -2941,8 +2953,9 @@ public:
 
 class PPC64_SVR4_TargetCodeGenInfo : public TargetCodeGenInfo {
 public:
-  PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT)
-    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT)) {}
+  PPC64_SVR4_TargetCodeGenInfo(CodeGenTypes &CGT,
+                               PPC64_SVR4_ABIInfo::ABIKind Kind)
+    : TargetCodeGenInfo(new PPC64_SVR4_ABIInfo(CGT, Kind)) {}
 
   int getDwarfEHStackPointer(CodeGen::CodeGenModule &M) const override {
     // This is recovered from gcc output.
@@ -3019,6 +3032,13 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(Q
       AlignAsType = EltType;
   }
 
+  // Likewise for ELFv2 homogeneous aggregates.
+  const Type *Base = nullptr;
+  uint64_t Members = 0;
+  if (!AlignAsType && Kind == ELFv2 &&
+      isAggregateTypeForABI(Ty) && isHomogeneousAggregate(Ty, Base, Members))
+    AlignAsType = Base;
+
   // With special case aggregates, only vector base types need alignment.
   if (AlignAsType)
     return AlignAsType->isVectorType();
@@ -3031,6 +3051,99 @@ PPC64_SVR4_ABIInfo::isAlignedParamType(Q
   return false;
 }
 
+/// isHomogeneousAggregate - Return true if a type is an ELFv2 homogeneous
+/// aggregate.  Base is set to the base element type, and Members is set
+/// to the number of base elements.
+bool
+PPC64_SVR4_ABIInfo::isHomogeneousAggregate(QualType Ty, const Type *&Base,
+                                           uint64_t &Members) const {
+  if (const ConstantArrayType *AT = getContext().getAsConstantArrayType(Ty)) {
+    uint64_t NElements = AT->getSize().getZExtValue();
+    if (NElements == 0)
+      return false;
+    if (!isHomogeneousAggregate(AT->getElementType(), Base, Members))
+      return false;
+    Members *= NElements;
+  } else if (const RecordType *RT = Ty->getAs<RecordType>()) {
+    const RecordDecl *RD = RT->getDecl();
+    if (RD->hasFlexibleArrayMember())
+      return false;
+
+    Members = 0;
+    for (const auto *FD : RD->fields()) {
+      // Ignore (non-zero arrays of) empty records.
+      QualType FT = FD->getType();
+      while (const ConstantArrayType *AT =
+             getContext().getAsConstantArrayType(FT)) {
+        if (AT->getSize().getZExtValue() == 0)
+          return false;
+        FT = AT->getElementType();
+      }
+      if (isEmptyRecord(getContext(), FT, true))
+        continue;
+
+      // For compatibility with GCC, ignore empty bitfields in C++ mode.
+      if (getContext().getLangOpts().CPlusPlus &&
+          FD->isBitField() && FD->getBitWidthValue(getContext()) == 0)
+        continue;
+
+      uint64_t FldMembers;
+      if (!isHomogeneousAggregate(FD->getType(), Base, FldMembers))
+        return false;
+
+      Members = (RD->isUnion() ?
+                 std::max(Members, FldMembers) : Members + FldMembers);
+    }
+
+    if (!Base)
+      return false;
+
+    // Ensure there is no padding.
+    if (getContext().getTypeSize(Base) * Members !=
+        getContext().getTypeSize(Ty))
+      return false;
+  } else {
+    Members = 1;
+    if (const ComplexType *CT = Ty->getAs<ComplexType>()) {
+      Members = 2;
+      Ty = CT->getElementType();
+    }
+
+    // Homogeneous aggregates for ELFv2 must have base types of float,
+    // double, long double, or 128-bit vectors.
+    if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+      if (BT->getKind() != BuiltinType::Float &&
+          BT->getKind() != BuiltinType::Double &&
+          BT->getKind() != BuiltinType::LongDouble)
+        return false;
+    } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
+      if (getContext().getTypeSize(VT) != 128)
+        return false;
+    } else {
+      return false;
+    }
+
+    // The base type must be the same for all members.  Types that
+    // agree in both total size and mode (float vs. vector) are
+    // treated as being equivalent here.
+    const Type *TyPtr = Ty.getTypePtr();
+    if (!Base)
+      Base = TyPtr;
+
+    if (Base->isVectorType() != TyPtr->isVectorType() ||
+        getContext().getTypeSize(Base) != getContext().getTypeSize(TyPtr))
+      return false;
+  }
+
+  // Vector types require one register, floating point types require one
+  // or two registers depending on their size.
+  uint32_t NumRegs = Base->isVectorType() ? 1 :
+                       (getContext().getTypeSize(Base) + 63) / 64;
+
+  // Homogeneous Aggregates may occupy at most 8 registers.
+  return (Members > 0 && Members * NumRegs <= 8);
+}
+
 ABIArgInfo
 PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
   if (Ty->isAnyComplexType())
@@ -3054,6 +3167,18 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
 
     uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
     uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
+
+    // ELFv2 homogeneous aggregates are passed as array types.
+    const Type *Base = nullptr;
+    uint64_t Members = 0;
+    if (Kind == ELFv2 &&
+        isHomogeneousAggregate(Ty, Base, Members)) {
+      llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
+      llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // All other aggregates are passed ByVal.
     return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
                                    /*Realign=*/TyAlign > ABIAlign);
   }
@@ -3082,8 +3207,36 @@ PPC64_SVR4_ABIInfo::classifyReturnType(Q
     }
   }
 
-  if (isAggregateTypeForABI(RetTy))
+  if (isAggregateTypeForABI(RetTy)) {
+    // ELFv2 homogeneous aggregates are returned as array types.
+    const Type *Base = nullptr;
+    uint64_t Members = 0;
+    if (Kind == ELFv2 &&
+        isHomogeneousAggregate(RetTy, Base, Members)) {
+      llvm::Type *BaseTy = CGT.ConvertType(QualType(Base, 0));
+      llvm::Type *CoerceTy = llvm::ArrayType::get(BaseTy, Members);
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // ELFv2 small aggregates are returned in up to two registers.
+    uint64_t Bits = getContext().getTypeSize(RetTy);
+    if (Kind == ELFv2 && Bits <= 2 * GPRBits) {
+      if (Bits == 0)
+        return ABIArgInfo::getIgnore();
+
+      llvm::Type *CoerceTy;
+      if (Bits > GPRBits) {
+        CoerceTy = llvm::IntegerType::get(getVMContext(), GPRBits);
+        CoerceTy = llvm::StructType::get(CoerceTy, CoerceTy, NULL);
+      } else
+        CoerceTy = llvm::IntegerType::get(getVMContext(),
+                                          llvm::RoundUpToAlignment(Bits, 8));
+      return ABIArgInfo::getDirect(CoerceTy);
+    }
+
+    // All other aggregates are returned indirectly.
     return ABIArgInfo::getIndirect(0);
+  }
 
   return (isPromotableTypeForABI(RetTy) ?
           ABIArgInfo::getExtend() : ABIArgInfo::getDirect());
@@ -6609,13 +6762,20 @@ const TargetCodeGenInfo &CodeGenModule::
   case llvm::Triple::ppc:
     return *(TheTargetCodeGenInfo = new PPC32TargetCodeGenInfo(Types));
   case llvm::Triple::ppc64:
-    if (Triple.isOSBinFormatELF())
-      return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types));
-    else
+    if (Triple.isOSBinFormatELF()) {
+      // FIXME: Should be switchable via command-line option.
+      PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv1;
+      return *(TheTargetCodeGenInfo =
+               new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+    } else
       return *(TheTargetCodeGenInfo = new PPC64TargetCodeGenInfo(Types));
-  case llvm::Triple::ppc64le:
+  case llvm::Triple::ppc64le: {
     assert(Triple.isOSBinFormatELF() && "PPC64 LE non-ELF not supported!");
-    return *(TheTargetCodeGenInfo = new PPC64_SVR4_TargetCodeGenInfo(Types));
+    // FIXME: Should be switchable via command-line option.
+    PPC64_SVR4_ABIInfo::ABIKind Kind = PPC64_SVR4_ABIInfo::ELFv2;
+    return *(TheTargetCodeGenInfo =
+             new PPC64_SVR4_TargetCodeGenInfo(Types, Kind));
+  }
 
   case llvm::Triple::nvptx:
   case llvm::Triple::nvptx64:

Added: cfe/trunk/test/CodeGen/ppc64le-aggregates.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ppc64le-aggregates.c?rev=213494&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/ppc64le-aggregates.c (added)
+++ cfe/trunk/test/CodeGen/ppc64le-aggregates.c Sun Jul 20 19:48:09 2014
@@ -0,0 +1,422 @@
+// REQUIRES: powerpc-registered-target
+// RUN: %clang_cc1 -faltivec -triple powerpc64le-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+// Test homogeneous float aggregate passing and returning.
+
+struct f1 { float f[1]; };
+struct f2 { float f[2]; };
+struct f3 { float f[3]; };
+struct f4 { float f[4]; };
+struct f5 { float f[5]; };
+struct f6 { float f[6]; };
+struct f7 { float f[7]; };
+struct f8 { float f[8]; };
+struct f9 { float f[9]; };
+
+struct fab { float a; float b; };
+struct fabc { float a; float b; float c; };
+
+// CHECK: define [1 x float] @func_f1(float inreg %x.coerce)
+struct f1 func_f1(struct f1 x) { return x; }
+
+// CHECK: define [2 x float] @func_f2([2 x float] %x.coerce)
+struct f2 func_f2(struct f2 x) { return x; }
+
+// CHECK: define [3 x float] @func_f3([3 x float] %x.coerce)
+struct f3 func_f3(struct f3 x) { return x; }
+
+// CHECK: define [4 x float] @func_f4([4 x float] %x.coerce)
+struct f4 func_f4(struct f4 x) { return x; }
+
+// CHECK: define [5 x float] @func_f5([5 x float] %x.coerce)
+struct f5 func_f5(struct f5 x) { return x; }
+
+// CHECK: define [6 x float] @func_f6([6 x float] %x.coerce)
+struct f6 func_f6(struct f6 x) { return x; }
+
+// CHECK: define [7 x float] @func_f7([7 x float] %x.coerce)
+struct f7 func_f7(struct f7 x) { return x; }
+
+// CHECK: define [8 x float] @func_f8([8 x float] %x.coerce)
+struct f8 func_f8(struct f8 x) { return x; }
+
+// CHECK: define void @func_f9(%struct.f9* noalias sret %agg.result, %struct.f9* byval align 8 %x)
+struct f9 func_f9(struct f9 x) { return x; }
+
+// CHECK: define [2 x float] @func_fab([2 x float] %x.coerce)
+struct fab func_fab(struct fab x) { return x; }
+
+// CHECK: define [3 x float] @func_fabc([3 x float] %x.coerce)
+struct fabc func_fabc(struct fabc x) { return x; }
+
+// CHECK-LABEL: @call_f1
+// CHECK: %[[TMP:[^ ]+]] = load float* getelementptr inbounds (%struct.f1* @global_f1, i32 0, i32 0, i32 0), align 1
+// CHECK: call [1 x float] @func_f1(float inreg %[[TMP]])
+struct f1 global_f1;
+void call_f1(void) { global_f1 = func_f1(global_f1); }
+
+// CHECK-LABEL: @call_f2
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float]* getelementptr inbounds (%struct.f2* @global_f2, i32 0, i32 0), align 1
+// CHECK: call [2 x float] @func_f2([2 x float] %[[TMP]])
+struct f2 global_f2;
+void call_f2(void) { global_f2 = func_f2(global_f2); }
+
+// CHECK-LABEL: @call_f3
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float]* getelementptr inbounds (%struct.f3* @global_f3, i32 0, i32 0), align 1
+// CHECK: call [3 x float] @func_f3([3 x float] %[[TMP]])
+struct f3 global_f3;
+void call_f3(void) { global_f3 = func_f3(global_f3); }
+
+// CHECK-LABEL: @call_f4
+// CHECK: %[[TMP:[^ ]+]] = load [4 x float]* getelementptr inbounds (%struct.f4* @global_f4, i32 0, i32 0), align 1
+// CHECK: call [4 x float] @func_f4([4 x float] %[[TMP]])
+struct f4 global_f4;
+void call_f4(void) { global_f4 = func_f4(global_f4); }
+
+// CHECK-LABEL: @call_f5
+// CHECK: %[[TMP:[^ ]+]] = load [5 x float]* getelementptr inbounds (%struct.f5* @global_f5, i32 0, i32 0), align 1
+// CHECK: call [5 x float] @func_f5([5 x float] %[[TMP]])
+struct f5 global_f5;
+void call_f5(void) { global_f5 = func_f5(global_f5); }
+
+// CHECK-LABEL: @call_f6
+// CHECK: %[[TMP:[^ ]+]] = load [6 x float]* getelementptr inbounds (%struct.f6* @global_f6, i32 0, i32 0), align 1
+// CHECK: call [6 x float] @func_f6([6 x float] %[[TMP]])
+struct f6 global_f6;
+void call_f6(void) { global_f6 = func_f6(global_f6); }
+
+// CHECK-LABEL: @call_f7
+// CHECK: %[[TMP:[^ ]+]] = load [7 x float]* getelementptr inbounds (%struct.f7* @global_f7, i32 0, i32 0), align 1
+// CHECK: call [7 x float] @func_f7([7 x float] %[[TMP]])
+struct f7 global_f7;
+void call_f7(void) { global_f7 = func_f7(global_f7); }
+
+// CHECK-LABEL: @call_f8
+// CHECK: %[[TMP:[^ ]+]] = load [8 x float]* getelementptr inbounds (%struct.f8* @global_f8, i32 0, i32 0), align 1
+// CHECK: call [8 x float] @func_f8([8 x float] %[[TMP]])
+struct f8 global_f8;
+void call_f8(void) { global_f8 = func_f8(global_f8); }
+
+// CHECK-LABEL: @call_f9
+// CHECK: %[[TMP1:[^ ]+]] = alloca %struct.f9, align 8
+// CHECK: %[[TMP2:[^ ]+]] = bitcast %struct.f9* %[[TMP1]] to i8*
+// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %[[TMP2]], i8* bitcast (%struct.f9* @global_f9 to i8*), i64 36, i32 4, i1 false)
+// CHECK: call void @func_f9(%struct.f9* sret %{{[^ ]+}}, %struct.f9* byval align 8 %[[TMP1]])
+struct f9 global_f9;
+void call_f9(void) { global_f9 = func_f9(global_f9); }
+
+// CHECK-LABEL: @call_fab
+// CHECK: %[[TMP:[^ ]+]] = load [2 x float]* bitcast (%struct.fab* @global_fab to [2 x float]*)
+// CHECK: call [2 x float] @func_fab([2 x float] %[[TMP]])
+struct fab global_fab;
+void call_fab(void) { global_fab = func_fab(global_fab); }
+
+// CHECK-LABEL: @call_fabc
+// CHECK: %[[TMP:[^ ]+]] = load [3 x float]* bitcast (%struct.fabc* @global_fabc to [3 x float]*)
+// CHECK: call [3 x float] @func_fabc([3 x float] %[[TMP]])
+struct fabc global_fabc;
+void call_fabc(void) { global_fabc = func_fabc(global_fabc); }
+
+
+// Test homogeneous vector aggregate passing and returning.
+
+struct v1 { vector int v[1]; };
+struct v2 { vector int v[2]; };
+struct v3 { vector int v[3]; };
+struct v4 { vector int v[4]; };
+struct v5 { vector int v[5]; };
+struct v6 { vector int v[6]; };
+struct v7 { vector int v[7]; };
+struct v8 { vector int v[8]; };
+struct v9 { vector int v[9]; };
+
+struct vab { vector int a; vector int b; };
+struct vabc { vector int a; vector int b; vector int c; };
+
+// CHECK: define [1 x <4 x i32>] @func_v1(<4 x i32> inreg %x.coerce)
+struct v1 func_v1(struct v1 x) { return x; }
+
+// CHECK: define [2 x <4 x i32>] @func_v2([2 x <4 x i32>] %x.coerce)
+struct v2 func_v2(struct v2 x) { return x; }
+
+// CHECK: define [3 x <4 x i32>] @func_v3([3 x <4 x i32>] %x.coerce)
+struct v3 func_v3(struct v3 x) { return x; }
+
+// CHECK: define [4 x <4 x i32>] @func_v4([4 x <4 x i32>] %x.coerce)
+struct v4 func_v4(struct v4 x) { return x; }
+
+// CHECK: define [5 x <4 x i32>] @func_v5([5 x <4 x i32>] %x.coerce)
+struct v5 func_v5(struct v5 x) { return x; }
+
+// CHECK: define [6 x <4 x i32>] @func_v6([6 x <4 x i32>] %x.coerce)
+struct v6 func_v6(struct v6 x) { return x; }
+
+// CHECK: define [7 x <4 x i32>] @func_v7([7 x <4 x i32>] %x.coerce)
+struct v7 func_v7(struct v7 x) { return x; }
+
+// CHECK: define [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %x.coerce)
+struct v8 func_v8(struct v8 x) { return x; }
+
+// CHECK: define void @func_v9(%struct.v9* noalias sret %agg.result, %struct.v9* byval align 16 %x)
+struct v9 func_v9(struct v9 x) { return x; }
+
+// CHECK: define [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %x.coerce)
+struct vab func_vab(struct vab x) { return x; }
+
+// CHECK: define [3 x <4 x i32>] @func_vabc([3 x <4 x i32>] %x.coerce)
+struct vabc func_vabc(struct vabc x) { return x; }
+
+// CHECK-LABEL: @call_v1
+// CHECK: %[[TMP:[^ ]+]] = load <4 x i32>* getelementptr inbounds (%struct.v1* @global_v1, i32 0, i32 0, i32 0), align 1
+// CHECK: call [1 x <4 x i32>] @func_v1(<4 x i32> inreg %[[TMP]])
+struct v1 global_v1;
+void call_v1(void) { global_v1 = func_v1(global_v1); }
+
+// CHECK-LABEL: @call_v2
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>]* getelementptr inbounds (%struct.v2* @global_v2, i32 0, i32 0), align 1
+// CHECK: call [2 x <4 x i32>] @func_v2([2 x <4 x i32>] %[[TMP]])
+struct v2 global_v2;
+void call_v2(void) { global_v2 = func_v2(global_v2); }
+
+// CHECK-LABEL: @call_v3
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>]* getelementptr inbounds (%struct.v3* @global_v3, i32 0, i32 0), align 1
+// CHECK: call [3 x <4 x i32>] @func_v3([3 x <4 x i32>] %[[TMP]])
+struct v3 global_v3;
+void call_v3(void) { global_v3 = func_v3(global_v3); }
+
+// CHECK-LABEL: @call_v4
+// CHECK: %[[TMP:[^ ]+]] = load [4 x <4 x i32>]* getelementptr inbounds (%struct.v4* @global_v4, i32 0, i32 0), align 1
+// CHECK: call [4 x <4 x i32>] @func_v4([4 x <4 x i32>] %[[TMP]])
+struct v4 global_v4;
+void call_v4(void) { global_v4 = func_v4(global_v4); }
+
+// CHECK-LABEL: @call_v5
+// CHECK: %[[TMP:[^ ]+]] = load [5 x <4 x i32>]* getelementptr inbounds (%struct.v5* @global_v5, i32 0, i32 0), align 1
+// CHECK: call [5 x <4 x i32>] @func_v5([5 x <4 x i32>] %[[TMP]])
+struct v5 global_v5;
+void call_v5(void) { global_v5 = func_v5(global_v5); }
+
+// CHECK-LABEL: @call_v6
+// CHECK: %[[TMP:[^ ]+]] = load [6 x <4 x i32>]* getelementptr inbounds (%struct.v6* @global_v6, i32 0, i32 0), align 1
+// CHECK: call [6 x <4 x i32>] @func_v6([6 x <4 x i32>] %[[TMP]])
+struct v6 global_v6;
+void call_v6(void) { global_v6 = func_v6(global_v6); }
+
+// CHECK-LABEL: @call_v7
+// CHECK: %[[TMP:[^ ]+]] = load [7 x <4 x i32>]* getelementptr inbounds (%struct.v7* @global_v7, i32 0, i32 0), align 1
+// CHECK: call [7 x <4 x i32>] @func_v7([7 x <4 x i32>] %[[TMP]])
+struct v7 global_v7;
+void call_v7(void) { global_v7 = func_v7(global_v7); }
+
+// CHECK-LABEL: @call_v8
+// CHECK: %[[TMP:[^ ]+]] = load [8 x <4 x i32>]* getelementptr inbounds (%struct.v8* @global_v8, i32 0, i32 0), align 1
+// CHECK: call [8 x <4 x i32>] @func_v8([8 x <4 x i32>] %[[TMP]])
+struct v8 global_v8;
+void call_v8(void) { global_v8 = func_v8(global_v8); }
+
+// CHECK-LABEL: @call_v9
+// CHECK: call void @func_v9(%struct.v9* sret %{{[^ ]+}}, %struct.v9* byval align 16 @global_v9)
+struct v9 global_v9;
+void call_v9(void) { global_v9 = func_v9(global_v9); }
+
+// CHECK-LABEL: @call_vab
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <4 x i32>]* bitcast (%struct.vab* @global_vab to [2 x <4 x i32>]*)
+// CHECK: call [2 x <4 x i32>] @func_vab([2 x <4 x i32>] %[[TMP]])
+struct vab global_vab;
+void call_vab(void) { global_vab = func_vab(global_vab); }
+
+// CHECK-LABEL: @call_vabc
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <4 x i32>]* bitcast (%struct.vabc* @global_vabc to [3 x <4 x i32>]*)
+// CHECK: call [3 x <4 x i32>] @func_vabc([3 x <4 x i32>] %[[TMP]])
+struct vabc global_vabc;
+void call_vabc(void) { global_vabc = func_vabc(global_vabc); }
+
+
+// As clang extension, non-power-of-two vectors may also be part of
+// homogeneous aggregates.
+
+typedef float float3 __attribute__((vector_size (12)));
+
+struct v3f1 { float3 v[1]; };
+struct v3f2 { float3 v[2]; };
+struct v3f3 { float3 v[3]; };
+struct v3f4 { float3 v[4]; };
+struct v3f5 { float3 v[5]; };
+struct v3f6 { float3 v[6]; };
+struct v3f7 { float3 v[7]; };
+struct v3f8 { float3 v[8]; };
+struct v3f9 { float3 v[9]; };
+
+struct v3fab { float3 a; float3 b; };
+struct v3fabc { float3 a; float3 b; float3 c; };
+
+// CHECK: define [1 x <3 x float>] @func_v3f1(<3 x float> inreg %x.coerce)
+struct v3f1 func_v3f1(struct v3f1 x) { return x; }
+
+// CHECK: define [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %x.coerce)
+struct v3f2 func_v3f2(struct v3f2 x) { return x; }
+
+// CHECK: define [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %x.coerce)
+struct v3f3 func_v3f3(struct v3f3 x) { return x; }
+
+// CHECK: define [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %x.coerce)
+struct v3f4 func_v3f4(struct v3f4 x) { return x; }
+
+// CHECK: define [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %x.coerce)
+struct v3f5 func_v3f5(struct v3f5 x) { return x; }
+
+// CHECK: define [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %x.coerce)
+struct v3f6 func_v3f6(struct v3f6 x) { return x; }
+
+// CHECK: define [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %x.coerce)
+struct v3f7 func_v3f7(struct v3f7 x) { return x; }
+
+// CHECK: define [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %x.coerce)
+struct v3f8 func_v3f8(struct v3f8 x) { return x; }
+
+// CHECK: define void @func_v3f9(%struct.v3f9* noalias sret %agg.result, %struct.v3f9* byval align 16 %x)
+struct v3f9 func_v3f9(struct v3f9 x) { return x; }
+
+// CHECK: define [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %x.coerce)
+struct v3fab func_v3fab(struct v3fab x) { return x; }
+
+// CHECK: define [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %x.coerce)
+struct v3fabc func_v3fabc(struct v3fabc x) { return x; }
+
+// CHECK-LABEL: @call_v3f1
+// CHECK: %[[TMP:[^ ]+]] = load <3 x float>* getelementptr inbounds (%struct.v3f1* @global_v3f1, i32 0, i32 0, i32 0), align 1
+// CHECK: call [1 x <3 x float>] @func_v3f1(<3 x float> inreg %[[TMP]])
+struct v3f1 global_v3f1;
+void call_v3f1(void) { global_v3f1 = func_v3f1(global_v3f1); }
+
+// CHECK-LABEL: @call_v3f2
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>]* getelementptr inbounds (%struct.v3f2* @global_v3f2, i32 0, i32 0), align 1
+// CHECK: call [2 x <3 x float>] @func_v3f2([2 x <3 x float>] %[[TMP]])
+struct v3f2 global_v3f2;
+void call_v3f2(void) { global_v3f2 = func_v3f2(global_v3f2); }
+
+// CHECK-LABEL: @call_v3f3
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>]* getelementptr inbounds (%struct.v3f3* @global_v3f3, i32 0, i32 0), align 1
+// CHECK: call [3 x <3 x float>] @func_v3f3([3 x <3 x float>] %[[TMP]])
+struct v3f3 global_v3f3;
+void call_v3f3(void) { global_v3f3 = func_v3f3(global_v3f3); }
+
+// CHECK-LABEL: @call_v3f4
+// CHECK: %[[TMP:[^ ]+]] = load [4 x <3 x float>]* getelementptr inbounds (%struct.v3f4* @global_v3f4, i32 0, i32 0), align 1
+// CHECK: call [4 x <3 x float>] @func_v3f4([4 x <3 x float>] %[[TMP]])
+struct v3f4 global_v3f4;
+void call_v3f4(void) { global_v3f4 = func_v3f4(global_v3f4); }
+
+// CHECK-LABEL: @call_v3f5
+// CHECK: %[[TMP:[^ ]+]] = load [5 x <3 x float>]* getelementptr inbounds (%struct.v3f5* @global_v3f5, i32 0, i32 0), align 1
+// CHECK: call [5 x <3 x float>] @func_v3f5([5 x <3 x float>] %[[TMP]])
+struct v3f5 global_v3f5;
+void call_v3f5(void) { global_v3f5 = func_v3f5(global_v3f5); }
+
+// CHECK-LABEL: @call_v3f6
+// CHECK: %[[TMP:[^ ]+]] = load [6 x <3 x float>]* getelementptr inbounds (%struct.v3f6* @global_v3f6, i32 0, i32 0), align 1
+// CHECK: call [6 x <3 x float>] @func_v3f6([6 x <3 x float>] %[[TMP]])
+struct v3f6 global_v3f6;
+void call_v3f6(void) { global_v3f6 = func_v3f6(global_v3f6); }
+
+// CHECK-LABEL: @call_v3f7
+// CHECK: %[[TMP:[^ ]+]] = load [7 x <3 x float>]* getelementptr inbounds (%struct.v3f7* @global_v3f7, i32 0, i32 0), align 1
+// CHECK: call [7 x <3 x float>] @func_v3f7([7 x <3 x float>] %[[TMP]])
+struct v3f7 global_v3f7;
+void call_v3f7(void) { global_v3f7 = func_v3f7(global_v3f7); }
+
+// CHECK-LABEL: @call_v3f8
+// CHECK: %[[TMP:[^ ]+]] = load [8 x <3 x float>]* getelementptr inbounds (%struct.v3f8* @global_v3f8, i32 0, i32 0), align 1
+// CHECK: call [8 x <3 x float>] @func_v3f8([8 x <3 x float>] %[[TMP]])
+struct v3f8 global_v3f8;
+void call_v3f8(void) { global_v3f8 = func_v3f8(global_v3f8); }
+
+// CHECK-LABEL: @call_v3f9
+// CHECK: call void @func_v3f9(%struct.v3f9* sret %{{[^ ]+}}, %struct.v3f9* byval align 16 @global_v3f9)
+struct v3f9 global_v3f9;
+void call_v3f9(void) { global_v3f9 = func_v3f9(global_v3f9); }
+
+// CHECK-LABEL: @call_v3fab
+// CHECK: %[[TMP:[^ ]+]] = load [2 x <3 x float>]* bitcast (%struct.v3fab* @global_v3fab to [2 x <3 x float>]*)
+// CHECK: call [2 x <3 x float>] @func_v3fab([2 x <3 x float>] %[[TMP]])
+struct v3fab global_v3fab;
+void call_v3fab(void) { global_v3fab = func_v3fab(global_v3fab); }
+
+// CHECK-LABEL: @call_v3fabc
+// CHECK: %[[TMP:[^ ]+]] = load [3 x <3 x float>]* bitcast (%struct.v3fabc* @global_v3fabc to [3 x <3 x float>]*)
+// CHECK: call [3 x <3 x float>] @func_v3fabc([3 x <3 x float>] %[[TMP]])
+struct v3fabc global_v3fabc;
+void call_v3fabc(void) { global_v3fabc = func_v3fabc(global_v3fabc); }
+
+
+// Test returning small aggregates.
+
+struct s1 { char c[1]; };
+struct s2 { char c[2]; };
+struct s3 { char c[3]; };
+struct s4 { char c[4]; };
+struct s5 { char c[5]; };
+struct s6 { char c[6]; };
+struct s7 { char c[7]; };
+struct s8 { char c[8]; };
+struct s9 { char c[9]; };
+struct s16 { char c[16]; };
+struct s17 { char c[17]; };
+
+// CHECK: define i8 @ret_s1()
+struct s1 ret_s1() {
+  return (struct s1) { 17 };
+}
+
+// CHECK: define i16 @ret_s2()
+struct s2 ret_s2() {
+  return (struct s2) { 17, 18 };
+}
+
+// CHECK: define i24 @ret_s3()
+struct s3 ret_s3() {
+  return (struct s3) { 17, 18, 19 };
+}
+
+// CHECK: define i32 @ret_s4()
+struct s4 ret_s4() {
+  return (struct s4) { 17, 18, 19, 20 };
+}
+
+// CHECK: define i40 @ret_s5()
+struct s5 ret_s5() {
+  return (struct s5) { 17, 18, 19, 20, 21 };
+}
+
+// CHECK: define i48 @ret_s6()
+struct s6 ret_s6() {
+  return (struct s6) { 17, 18, 19, 20, 21, 22 };
+}
+
+// CHECK: define i56 @ret_s7()
+struct s7 ret_s7() {
+  return (struct s7) { 17, 18, 19, 20, 21, 22, 23 };
+}
+
+// CHECK: define i64 @ret_s8()
+struct s8 ret_s8() {
+  return (struct s8) { 17, 18, 19, 20, 21, 22, 23, 24 };
+}
+
+// CHECK: define { i64, i64 } @ret_s9()
+struct s9 ret_s9() {
+  return (struct s9) { 17, 18, 19, 20, 21, 22, 23, 24, 25 };
+}
+
+// CHECK: define { i64, i64 } @ret_s16()
+struct s16 ret_s16() {
+  return (struct s16) { 17, 18, 19, 20, 21, 22, 23, 24,
+                        25, 26, 27, 28, 29, 30, 31, 32 };
+}
+
+// CHECK: define void @ret_s17(%struct.s17*
+struct s17 ret_s17() {
+  return (struct s17) { 17, 18, 19, 20, 21, 22, 23, 24,
+                        25, 26, 27, 28, 29, 30, 31, 32, 33 };
+}
+