[cfe-commits] r167058 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-homogenous.c
Bob Wilson
bob.wilson at apple.com
Wed Oct 31 09:29:33 PDT 2012
On Oct 30, 2012, at 4:21 PM, manman ren <mren at apple.com> wrote:
> Author: mren
> Date: Tue Oct 30 18:21:41 2012
> New Revision: 167058
>
> URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev
> Log:
> ARM AAPCS-VFP: fix handling of homogeneous aggreate.
>
> If HA can only partially fit into VFP registers, we add padding to make sure
> HA will be on stack and later VFP CPRCs will be on stack as well.
Thanks for working on this, Manman. I noticed while reviewing this that the AAPCS-VFP ABI is even trickier than I remembered. The VFP registers aren't simply allocated in order. If you skip over some registers due to alignment constraints, you may need to "backfill" those registers for later arguments. See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this.
That means that the front-end is going to have to keep track of the registers available for backfilling so you can have an accurate count of how many remain available for homogeneous aggregates.
>
> Modified:
> cfe/trunk/lib/CodeGen/TargetInfo.cpp
> cfe/trunk/test/CodeGen/arm-homogenous.c
>
> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012
> @@ -2863,7 +2863,8 @@
> ABIKind getABIKind() const { return Kind; }
>
> ABIArgInfo classifyReturnType(QualType RetTy) const;
> - ABIArgInfo classifyArgumentType(QualType RetTy) const;
> + ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
> + bool &IsHA) const;
> bool isIllegalVectorType(QualType Ty) const;
>
> virtual void computeInfo(CGFunctionInfo &FI) const;
> @@ -2907,10 +2908,32 @@
> }
>
> void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
> + // To correctly handle Homogeneous Aggregate, we need to keep track of the
> + // number of VFP registers allocated so far.
> + // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
> + // VFP registers of the appropriate type unallocated then the argument is
> + // allocated to the lowest-numbered sequence of such registers.
> + // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
> + // unallocated are marked as unavailable.
> + unsigned AllocatedVFP = 0;
> FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
> for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
> - it != ie; ++it)
> - it->info = classifyArgumentType(it->type);
> + it != ie; ++it) {
> + unsigned PreAllocation = AllocatedVFP;
> + bool IsHA = false;
> + // 6.1.2.3 There is one VFP co-processor register class using registers
> + // s0-s15 (d0-d7) for passing arguments.
> + const unsigned NumVFPs = 16;
> + it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
> + // If we do not have enough VFP registers for the HA, any VFP registers
> + // that are unallocated are marked as unavailable. To achieve this, we add
> + // padding of (NumVFPs - PreAllocation) floats.
> + if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
> + llvm::Type *PaddingTy = llvm::ArrayType::get(
> + llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
> + it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
> + }
> + }
>
> // Always honor user-specified calling convention.
> if (FI.getCallingConvention() != llvm::CallingConv::C)
> @@ -3012,7 +3035,17 @@
> return (Members > 0 && Members <= 4);
> }
>
> -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
> +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
> + bool &IsHA) const {
> + // We update number of allocated VFPs according to
> + // 6.1.2.1 The following argument types are VFP CPRCs:
> + // A single-precision floating-point type (including promoted
> + // half-precision types); A double-precision floating-point type;
> + // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
> + // with a Base Type of a single- or double-precision floating-point type,
> + // 64-bit containerized vectors or 128-bit containerized vectors with one
> + // to four Elements.
> +
> // Handle illegal vector types here.
> if (isIllegalVectorType(Ty)) {
> uint64_t Size = getContext().getTypeSize(Ty);
> @@ -3024,15 +3057,38 @@
> if (Size == 64) {
> llvm::Type *ResType = llvm::VectorType::get(
> llvm::Type::getInt32Ty(getVMContext()), 2);
> + // Align AllocatedVFP to an even number to use a D register.
> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
> + AllocatedVFP += 2; // 1 D register = 2 S registers
> return ABIArgInfo::getDirect(ResType);
> }
> if (Size == 128) {
> llvm::Type *ResType = llvm::VectorType::get(
> llvm::Type::getInt32Ty(getVMContext()), 4);
> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
> + AllocatedVFP += 4; // 1 Q register = 4 S registers
> return ABIArgInfo::getDirect(ResType);
> }
> return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> }
> + // Update AllocatedVFP for legal vector types.
> + if (const VectorType *VT = Ty->getAs<VectorType>()) {
> + uint64_t Size = getContext().getTypeSize(VT);
> + // Size of a legal vector should be power of 2 and above 64.
> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
> + AllocatedVFP += (Size / 32);
> + }
> + // Update AllocatedVFP for floating point types.
> + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
> + if (BT->getKind() == BuiltinType::Half ||
> + BT->getKind() == BuiltinType::Float)
> + AllocatedVFP += 1;
> + if (BT->getKind() == BuiltinType::Double ||
> + BT->getKind() == BuiltinType::LongDouble) {
> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
> + AllocatedVFP += 2;
> + }
> + }
>
> if (!isAggregateTypeForABI(Ty)) {
> // Treat an enum type as its underlying type.
> @@ -3053,10 +3109,28 @@
> return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>
> if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
> - // Homogeneous Aggregates need to be expanded.
> + // Homogeneous Aggregates need to be expanded when we can fit the aggregate
> + // into VFP registers.
> const Type *Base = 0;
> - if (isHomogeneousAggregate(Ty, Base, getContext())) {
> + uint64_t Members = 0;
> + if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
> assert(Base && "Base class should be set for homogeneous aggregate");
> + // Base can be a floating-point or a vector.
> + if (Base->isVectorType()) {
> + // ElementSize is in number of floats.
> + unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
> + ElementSize);
> + AllocatedVFP += Members * ElementSize;
> + } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
> + AllocatedVFP += Members;
> + else {
> + assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
> + Base->isSpecificBuiltinType(BuiltinType::LongDouble));
> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
> + AllocatedVFP += Members * 2; // Base type is double.
> + }
> + IsHA = true;
> return ABIArgInfo::getExpand();
> }
> }
>
> Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
> +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012
> @@ -156,6 +156,40 @@
> }
> // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
>
> +// Make sure HAs that can be partially fit into VFP registers will be allocated
> +// on stack and that later VFP candidates will go on stack as well.
> +typedef struct {
> + double x;
> + double a2;
> + double a3;
> + double a4;
> +} struct_of_four_doubles;
> +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
> +struct_of_four_doubles g_s4d;
> +
> +void test_struct_of_four_doubles(void) {
> +// CHECK: test_struct_of_four_doubles
> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
> + takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
> +}
> +
> +typedef __attribute__(( ext_vector_type(8) )) char __char8;
> +typedef __attribute__(( ext_vector_type(4) )) short __short4;
> +typedef struct {
> + __char8 a1;
> + __short4 a2;
> + __char8 a3;
> + __short4 a4;
> +} struct_of_vecs;
> +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs c, double d);
> +struct_of_vecs g_vec;
> +
> +void test_struct_of_vecs(void) {
> +// CHECK: test_struct_of_vecs
> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
> + takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
> +}
> +
> // FIXME: Tests necessary:
> // - Vectors
> // - C++ stuff
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list