[cfe-commits] r167058 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-homogenous.c
Manman Ren
mren at apple.com
Wed Oct 31 09:42:28 PDT 2012
On Oct 31, 2012, at 9:29 AM, Bob Wilson <bob.wilson at apple.com> wrote:
>
> On Oct 30, 2012, at 4:21 PM, manman ren <mren at apple.com> wrote:
>
>> Author: mren
>> Date: Tue Oct 30 18:21:41 2012
>> New Revision: 167058
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev
>> Log:
>> ARM AAPCS-VFP: fix handling of homogeneous aggreate.
>>
>> If HA can only partially fit into VFP registers, we add padding to make sure
>> HA will be on stack and later VFP CPRCs will be on stack as well.
>
> Thanks for working on this, Manman. I noticed while reviewing this that the AAPCS-VFP ABI is even trickier than I remembered. The VFP registers aren't simply allocated in order. If you skip over some registers due to alignment constraints, you may need to "backfill" those registers for later arguments. See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this.
Thanks, I will look into that.
-Manman
>
> That means that the front-end is going to have to keep track of the registers available for backfilling so you can have an accurate count of how many remain available for homogeneous aggregates.
>
>>
>> Modified:
>> cfe/trunk/lib/CodeGen/TargetInfo.cpp
>> cfe/trunk/test/CodeGen/arm-homogenous.c
>>
>> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012
>> @@ -2863,7 +2863,8 @@
>> ABIKind getABIKind() const { return Kind; }
>>
>> ABIArgInfo classifyReturnType(QualType RetTy) const;
>> - ABIArgInfo classifyArgumentType(QualType RetTy) const;
>> + ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
>> + bool &IsHA) const;
>> bool isIllegalVectorType(QualType Ty) const;
>>
>> virtual void computeInfo(CGFunctionInfo &FI) const;
>> @@ -2907,10 +2908,32 @@
>> }
>>
>> void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
>> + // To correctly handle Homogeneous Aggregate, we need to keep track of the
>> + // number of VFP registers allocated so far.
>> + // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
>> + // VFP registers of the appropriate type unallocated then the argument is
>> + // allocated to the lowest-numbered sequence of such registers.
>> + // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
>> + // unallocated are marked as unavailable.
>> + unsigned AllocatedVFP = 0;
>> FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
>> for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
>> - it != ie; ++it)
>> - it->info = classifyArgumentType(it->type);
>> + it != ie; ++it) {
>> + unsigned PreAllocation = AllocatedVFP;
>> + bool IsHA = false;
>> + // 6.1.2.3 There is one VFP co-processor register class using registers
>> + // s0-s15 (d0-d7) for passing arguments.
>> + const unsigned NumVFPs = 16;
>> + it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
>> + // If we do not have enough VFP registers for the HA, any VFP registers
>> + // that are unallocated are marked as unavailable. To achieve this, we add
>> + // padding of (NumVFPs - PreAllocation) floats.
>> + if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
>> + llvm::Type *PaddingTy = llvm::ArrayType::get(
>> + llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
>> + it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
>> + }
>> + }
>>
>> // Always honor user-specified calling convention.
>> if (FI.getCallingConvention() != llvm::CallingConv::C)
>> @@ -3012,7 +3035,17 @@
>> return (Members > 0 && Members <= 4);
>> }
>>
>> -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
>> +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
>> + bool &IsHA) const {
>> + // We update number of allocated VFPs according to
>> + // 6.1.2.1 The following argument types are VFP CPRCs:
>> + // A single-precision floating-point type (including promoted
>> + // half-precision types); A double-precision floating-point type;
>> + // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
>> + // with a Base Type of a single- or double-precision floating-point type,
>> + // 64-bit containerized vectors or 128-bit containerized vectors with one
>> + // to four Elements.
>> +
>> // Handle illegal vector types here.
>> if (isIllegalVectorType(Ty)) {
>> uint64_t Size = getContext().getTypeSize(Ty);
>> @@ -3024,15 +3057,38 @@
>> if (Size == 64) {
>> llvm::Type *ResType = llvm::VectorType::get(
>> llvm::Type::getInt32Ty(getVMContext()), 2);
>> + // Align AllocatedVFP to an even number to use a D register.
>> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> + AllocatedVFP += 2; // 1 D register = 2 S registers
>> return ABIArgInfo::getDirect(ResType);
>> }
>> if (Size == 128) {
>> llvm::Type *ResType = llvm::VectorType::get(
>> llvm::Type::getInt32Ty(getVMContext()), 4);
>> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
>> + AllocatedVFP += 4; // 1 Q register = 4 S registers
>> return ABIArgInfo::getDirect(ResType);
>> }
>> return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>> }
>> + // Update AllocatedVFP for legal vector types.
>> + if (const VectorType *VT = Ty->getAs<VectorType>()) {
>> + uint64_t Size = getContext().getTypeSize(VT);
>> + // Size of a legal vector should be power of 2 and above 64.
>> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
>> + AllocatedVFP += (Size / 32);
>> + }
>> + // Update AllocatedVFP for floating point types.
>> + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
>> + if (BT->getKind() == BuiltinType::Half ||
>> + BT->getKind() == BuiltinType::Float)
>> + AllocatedVFP += 1;
>> + if (BT->getKind() == BuiltinType::Double ||
>> + BT->getKind() == BuiltinType::LongDouble) {
>> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> + AllocatedVFP += 2;
>> + }
>> + }
>>
>> if (!isAggregateTypeForABI(Ty)) {
>> // Treat an enum type as its underlying type.
>> @@ -3053,10 +3109,28 @@
>> return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>
>> if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
>> - // Homogeneous Aggregates need to be expanded.
>> + // Homogeneous Aggregates need to be expanded when we can fit the aggregate
>> + // into VFP registers.
>> const Type *Base = 0;
>> - if (isHomogeneousAggregate(Ty, Base, getContext())) {
>> + uint64_t Members = 0;
>> + if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
>> assert(Base && "Base class should be set for homogeneous aggregate");
>> + // Base can be a floating-point or a vector.
>> + if (Base->isVectorType()) {
>> + // ElementSize is in number of floats.
>> + unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
>> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
>> + ElementSize);
>> + AllocatedVFP += Members * ElementSize;
>> + } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
>> + AllocatedVFP += Members;
>> + else {
>> + assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
>> + Base->isSpecificBuiltinType(BuiltinType::LongDouble));
>> + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> + AllocatedVFP += Members * 2; // Base type is double.
>> + }
>> + IsHA = true;
>> return ABIArgInfo::getExpand();
>> }
>> }
>>
>> Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
>> +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012
>> @@ -156,6 +156,40 @@
>> }
>> // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
>>
>> +// Make sure HAs that can be partially fit into VFP registers will be allocated
>> +// on stack and that later VFP candidates will go on stack as well.
>> +typedef struct {
>> + double x;
>> + double a2;
>> + double a3;
>> + double a4;
>> +} struct_of_four_doubles;
>> +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
>> +struct_of_four_doubles g_s4d;
>> +
>> +void test_struct_of_four_doubles(void) {
>> +// CHECK: test_struct_of_four_doubles
>> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
>> + takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
>> +}
>> +
>> +typedef __attribute__(( ext_vector_type(8) )) char __char8;
>> +typedef __attribute__(( ext_vector_type(4) )) short __short4;
>> +typedef struct {
>> + __char8 a1;
>> + __short4 a2;
>> + __char8 a3;
>> + __short4 a4;
>> +} struct_of_vecs;
>> +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs c, double d);
>> +struct_of_vecs g_vec;
>> +
>> +void test_struct_of_vecs(void) {
>> +// CHECK: test_struct_of_vecs
>> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
>> + takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
>> +}
>> +
>> // FIXME: Tests necessary:
>> // - Vectors
>> // - C++ stuff
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
More information about the cfe-commits
mailing list