r212743 - [PowerPC] ABI support for aligned by-value aggregates

Thu Jul 10 11:50:55 PDT 2014

On Thu, Jul 10, 2014 at 10:20 AM, Ulrich Weigand
<ulrich.weigand at de.ibm.com> wrote:
> Author: uweigand
> Date: Thu Jul 10 12:20:07 2014
> New Revision: 212743
>
> URL: http://llvm.org/viewvc/llvm-project?rev=212743&view=rev
> Log:
> [PowerPC] ABI support for aligned by-value aggregates
>
> This patch adds support for respecting the ABI and type alignment
> of aggregates passed by value.  Currently, all aggregates are aligned
> at 8 bytes in the parameter save area.  This is incorrect for two
> reasons:
>
> - Aggregates that need alignment of 16 bytes or more should be aligned
>   at 16 bytes in the parameter save area.  This is implemented by
>   using an appropriate "byval align" attribute in the IR.
>
> - Aggregates that need alignment beyond 16 bytes need to be dynamically
>   realigned by the caller.  This is implemented by setting the Realign
>   flag of the ABIArgInfo::getIndirect call.
>
> In addition, when expanding a va_arg call accessing a type that is
> aligned at 16 bytes in the argument save area (either one of the
> aggregate types as above, or a vector type which is already aligned
> at 16 bytes), code needs to align the va_list pointer accordingly.
>
> Reviewed by Hal Finkel.
>
>
> Added:
>     cfe/trunk/test/CodeGen/ppc64-align-struct.c
> Modified:
>     cfe/trunk/lib/CodeGen/TargetInfo.cpp
>     cfe/trunk/test/CodeGen/ppc64-vector.c
>
> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=212743&r1=212742&r2=212743&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Thu Jul 10 12:20:07 2014
> @@ -2903,6 +2903,7 @@ public:
>    PPC64_SVR4_ABIInfo(CodeGen::CodeGenTypes &CGT) : DefaultABIInfo(CGT) {}
>
>    bool isPromotableTypeForABI(QualType Ty) const;
> +  bool isAlignedParamType(QualType Ty) const;
>
>    ABIArgInfo classifyReturnType(QualType RetTy) const;
>    ABIArgInfo classifyArgumentType(QualType Ty) const;
> @@ -2993,6 +2994,43 @@ PPC64_SVR4_ABIInfo::isPromotableTypeForA
>    return false;
>  }
>
> +/// isAlignedParamType - Determine whether a type requires 16-byte
> +/// alignment in the parameter area.
> +bool
> +PPC64_SVR4_ABIInfo::isAlignedParamType(QualType Ty) const {
> +  // Complex types are passed just like their elements.
> +  if (const ComplexType *CTy = Ty->getAs<ComplexType>())
> +    Ty = CTy->getElementType();
> +
> +  // Only vector types of size 16 bytes need alignment (larger types are
> +  // passed via reference, smaller types are not aligned).
> +  if (Ty->isVectorType())
> +    return getContext().getTypeSize(Ty) == 128;
> +
> +  // For single-element float/vector structs, we consider the whole type
> +  // to have the same alignment requirements as its single element.
> +  const Type *AlignAsType = nullptr;
> +  const Type *EltType = isSingleElementStruct(Ty, getContext());
> +  if (EltType) {
> +    const BuiltinType *BT = EltType->getAs<BuiltinType>();
> +    if ((EltType->isVectorType() &&
> +         getContext().getTypeSize(EltType) == 128) ||
> +        (BT && BT->isFloatingPoint()))
> +      AlignAsType = EltType;
> +  }
> +
> +  // With special case aggregates, only vector base types need alignment.
> +  if (AlignAsType)
> +    return AlignAsType->isVectorType();
> +
> +  // Otherwise, we only need alignment for any aggregate type that
> +  // has an alignment requirement of >= 16 bytes.
> +  if (isAggregateTypeForABI(Ty) && getContext().getTypeAlign(Ty) >= 128)
> +    return true;
> +
> +  return false;
> +}
> +
>  ABIArgInfo
>  PPC64_SVR4_ABIInfo::classifyArgumentType(QualType Ty) const {
>    if (Ty->isAnyComplexType())
> @@ -3014,7 +3052,10 @@ PPC64_SVR4_ABIInfo::classifyArgumentType
>      if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI()))
>        return ABIArgInfo::getIndirect(0, RAA == CGCXXABI::RAA_DirectInMemory);
>
> -    return ABIArgInfo::getIndirect(0);
> +    uint64_t ABIAlign = isAlignedParamType(Ty)? 16 : 8;
> +    uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
> +    return ABIArgInfo::getIndirect(ABIAlign, /*ByVal=*/true,
> +                                   /*Realign=*/TyAlign > ABIAlign);
>    }
>
>    return (isPromotableTypeForABI(Ty) ?
> @@ -3059,6 +3100,14 @@ llvm::Value *PPC64_SVR4_ABIInfo::EmitVAA
>    llvm::Value *VAListAddrAsBPP = Builder.CreateBitCast(VAListAddr, BPP, "ap");
>    llvm::Value *Addr = Builder.CreateLoad(VAListAddrAsBPP, "ap.cur");
>
> +  // Handle types that require 16-byte alignment in the parameter save area.
> +  if (isAlignedParamType(Ty)) {
> +    llvm::Value *AddrAsInt = Builder.CreatePtrToInt(Addr, CGF.Int64Ty);
> +    AddrAsInt = Builder.CreateAdd(AddrAsInt, Builder.getInt64(15));
> +    AddrAsInt = Builder.CreateAnd(AddrAsInt, Builder.getInt64(-16));
> +    Addr = Builder.CreateIntToPtr(AddrAsInt, BP, "ap.align");
> +  }
> +
>    // Update the va_list pointer.  The pointer should be bumped by the
>    // size of the object.  We can trust getTypeSize() except for a complex
>    // type whose base type is smaller than a doubleword.  For these, the
>
> Added: cfe/trunk/test/CodeGen/ppc64-align-struct.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ppc64-align-struct.c?rev=212743&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/ppc64-align-struct.c (added)
> +++ cfe/trunk/test/CodeGen/ppc64-align-struct.c Thu Jul 10 12:20:07 2014
> @@ -0,0 +1,136 @@
> +// RUN: %clang_cc1 -faltivec -triple powerpc64-unknown-linux-gnu -emit-llvm -o - %s | FileCheck %s

This test case relies on named IR values (%x, etc) which are not
present (all IR values are simply numbered) in non-asserts builds of
LLVM. I've marked the test case as "REQUIRES: asserts" for now, which
should hopefully cause this test not to run in a non-asserts build and
thus not fail, but it'd be good if you could go back and update the
test by adding regex matches to ignore/match on whatever the value
names are, rather than hardcoding them into the test. Then the
REQUIRES line can be removed.

- David

> +
> +#include <stdarg.h>
> +
> +struct test1 { int x; int y; };
> +struct test2 { int x; int y; } __attribute__((aligned (16)));
> +struct test3 { int x; int y; } __attribute__((aligned (32)));
> +struct test4 { int x; int y; int z; };
> +
> +// CHECK: define void @test1(i32 signext %x, %struct.test1* byval align 8 %y)
> +void test1 (int x, struct test1 y)
> +{
> +}
> +
> +// CHECK: define void @test2(i32 signext %x, %struct.test2* byval align 16 %y)
> +void test2 (int x, struct test2 y)
> +{
> +}
> +
> +// This case requires run-time realignment of the incoming struct
> +// CHECK: define void @test3(i32 signext %x, %struct.test3* byval align 16)
> +// CHECK: %y = alloca %struct.test3, align 32
> +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
> +void test3 (int x, struct test3 y)
> +{
> +}
> +
> +// CHECK: define void @test4(i32 signext %x, %struct.test4* byval align 8 %y)
> +void test4 (int x, struct test4 y)
> +{
> +}
> +
> +// CHECK: define void @test1va(%struct.test1* noalias sret %agg.result, i32 signext %x, ...)
> +// CHECK: %ap.cur = load i8** %ap
> +// CHECK: %ap.next = getelementptr i8* %ap.cur, i64 8
> +// CHECK: store i8* %ap.next, i8** %ap
> +// CHECK: bitcast i8* %ap.cur to %struct.test1*
> +struct test1 test1va (int x, ...)
> +{
> +  struct test1 y;
> +  va_list ap;
> +  va_start(ap, x);
> +  y = va_arg (ap, struct test1);
> +  va_end(ap);
> +  return y;
> +}
> +
> +// CHECK: define void @test2va(%struct.test2* noalias sret %agg.result, i32 signext %x, ...)
> +// CHECK: %ap.cur = load i8** %ap
> +// CHECK: %[[TMP0:[0-9]+]] = ptrtoint i8* %ap.cur to i64
> +// CHECK: %[[TMP1:[0-9]+]] = add i64 %[[TMP0]], 15
> +// CHECK: %[[TMP2:[0-9]+]] = and i64 %[[TMP1]], -16
> +// CHECK: %ap.align = inttoptr i64 %[[TMP2]] to i8*
> +// CHECK: %ap.next = getelementptr i8* %ap.align, i64 16
> +// CHECK: store i8* %ap.next, i8** %ap
> +// CHECK: bitcast i8* %ap.align to %struct.test2*
> +struct test2 test2va (int x, ...)
> +{
> +  struct test2 y;
> +  va_list ap;
> +  va_start(ap, x);
> +  y = va_arg (ap, struct test2);
> +  va_end(ap);
> +  return y;
> +}
> +
> +// CHECK: define void @test3va(%struct.test3* noalias sret %agg.result, i32 signext %x, ...)
> +// CHECK: %ap.cur = load i8** %ap
> +// CHECK: %[[TMP0:[0-9]+]] = ptrtoint i8* %ap.cur to i64
> +// CHECK: %[[TMP1:[0-9]+]] = add i64 %[[TMP0]], 15
> +// CHECK: %[[TMP2:[0-9]+]] = and i64 %[[TMP1]], -16
> +// CHECK: %ap.align = inttoptr i64 %[[TMP2]] to i8*
> +// CHECK: %ap.next = getelementptr i8* %ap.align, i64 32
> +// CHECK: store i8* %ap.next, i8** %ap
> +// CHECK: bitcast i8* %ap.align to %struct.test3*
> +struct test3 test3va (int x, ...)
> +{
> +  struct test3 y;
> +  va_list ap;
> +  va_start(ap, x);
> +  y = va_arg (ap, struct test3);
> +  va_end(ap);
> +  return y;
> +}
> +
> +// CHECK: define void @test4va(%struct.test4* noalias sret %agg.result, i32 signext %x, ...)
> +// CHECK: %ap.cur = load i8** %ap
> +// CHECK: %ap.next = getelementptr i8* %ap.cur, i64 16
> +// CHECK: store i8* %ap.next, i8** %ap
> +// CHECK: bitcast i8* %ap.cur to %struct.test4*
> +struct test4 test4va (int x, ...)
> +{
> +  struct test4 y;
> +  va_list ap;
> +  va_start(ap, x);
> +  y = va_arg (ap, struct test4);
> +  va_end(ap);
> +  return y;
> +}
> +
> +// CHECK: define void @testva_longdouble(%struct.test_longdouble* noalias sret %agg.result, i32 signext %x, ...)
> +// CHECK: %ap.cur = load i8** %ap
> +// CHECK: %ap.next = getelementptr i8* %ap.cur, i64 16
> +// CHECK: store i8* %ap.next, i8** %ap
> +// CHECK: bitcast i8* %ap.cur to %struct.test_longdouble*
> +struct test_longdouble { long double x; };
> +struct test_longdouble testva_longdouble (int x, ...)
> +{
> +  struct test_longdouble y;
> +  va_list ap;
> +  va_start(ap, x);
> +  y = va_arg (ap, struct test_longdouble);
> +  va_end(ap);
> +  return y;
> +}
> +
> +// CHECK: define void @testva_vector(%struct.test_vector* noalias sret %agg.result, i32 signext %x, ...)
> +// CHECK: %ap.cur = load i8** %ap
> +// CHECK: %[[TMP0:[0-9]+]] = ptrtoint i8* %ap.cur to i64
> +// CHECK: %[[TMP1:[0-9]+]] = add i64 %[[TMP0]], 15
> +// CHECK: %[[TMP2:[0-9]+]] = and i64 %[[TMP1]], -16
> +// CHECK: %ap.align = inttoptr i64 %[[TMP2]] to i8*
> +// CHECK: %ap.next = getelementptr i8* %ap.align, i64 16
> +// CHECK: store i8* %ap.next, i8** %ap
> +// CHECK: bitcast i8* %ap.align to %struct.test_vector*
> +struct test_vector { vector int x; };
> +struct test_vector testva_vector (int x, ...)
> +{
> +  struct test_vector y;
> +  va_list ap;
> +  va_start(ap, x);
> +  y = va_arg (ap, struct test_vector);
> +  va_end(ap);
> +  return y;
> +}
> +
>
> Modified: cfe/trunk/test/CodeGen/ppc64-vector.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ppc64-vector.c?rev=212743&r1=212742&r2=212743&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/ppc64-vector.c (original)
> +++ cfe/trunk/test/CodeGen/ppc64-vector.c Thu Jul 10 12:20:07 2014
> @@ -45,7 +45,7 @@ v16i16 test_v16i16(v16i16 x)
>    return x;
>  }
>
> -// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, %struct.v16i16* byval %x)
> +// CHECK: define void @test_struct_v16i16(%struct.v16i16* noalias sret %agg.result, %struct.v16i16* byval align 16)
>  struct v16i16 test_struct_v16i16(struct v16i16 x)
>  {
>    return x;
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits