[cfe-commits] r167058 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-homogenous.c

manman ren mren at apple.com
Wed Oct 31 12:04:39 PDT 2012


On Oct 31, 2012, at 9:29 AM, Bob Wilson <bob.wilson at apple.com> wrote:

> 
> On Oct 30, 2012, at 4:21 PM, manman ren <mren at apple.com> wrote:
> 
>> Author: mren
>> Date: Tue Oct 30 18:21:41 2012
>> New Revision: 167058
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev
>> Log:
>> ARM AAPCS-VFP: fix handling of homogeneous aggreate.
>> 
>> If HA can only partially fit into VFP registers, we add padding to make sure
>> HA will be on stack and later VFP CPRCs will be on stack as well.
> 
> Thanks for working on this, Manman.  I noticed while reviewing this that the AAPCS-VFP ABI is even trickier than I remembered.  The VFP registers aren't simply allocated in order.  If you skip over some registers due to alignment constraints, you may need to "backfill" those registers for later arguments.  See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this.
> 
> That means that the front-end is going to have to keep track of the registers available for backfilling so you can have an accurate count of how many remain available for homogeneous aggregates.

Committed r167159.
Used a 16-element array to track the allocation.

Thanks,
manman

> 
>> 
>> Modified:
>>   cfe/trunk/lib/CodeGen/TargetInfo.cpp
>>   cfe/trunk/test/CodeGen/arm-homogenous.c
>> 
>> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012
>> @@ -2863,7 +2863,8 @@
>>  ABIKind getABIKind() const { return Kind; }
>> 
>>  ABIArgInfo classifyReturnType(QualType RetTy) const;
>> -  ABIArgInfo classifyArgumentType(QualType RetTy) const;
>> +  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
>> +                                  bool &IsHA) const;
>>  bool isIllegalVectorType(QualType Ty) const;
>> 
>>  virtual void computeInfo(CGFunctionInfo &FI) const;
>> @@ -2907,10 +2908,32 @@
>> }
>> 
>> void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
>> +  // To correctly handle Homogeneous Aggregate, we need to keep track of the
>> +  // number of VFP registers allocated so far.
>> +  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
>> +  // VFP registers of the appropriate type unallocated then the argument is
>> +  // allocated to the lowest-numbered sequence of such registers.
>> +  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
>> +  // unallocated are marked as unavailable. 
>> +  unsigned AllocatedVFP = 0;
>>  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
>>  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
>> -       it != ie; ++it)
>> -    it->info = classifyArgumentType(it->type);
>> +       it != ie; ++it) {
>> +    unsigned PreAllocation = AllocatedVFP;
>> +    bool IsHA = false;
>> +    // 6.1.2.3 There is one VFP co-processor register class using registers
>> +    // s0-s15 (d0-d7) for passing arguments.
>> +    const unsigned NumVFPs = 16;
>> +    it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
>> +    // If we do not have enough VFP registers for the HA, any VFP registers
>> +    // that are unallocated are marked as unavailable. To achieve this, we add
>> +    // padding of (NumVFPs - PreAllocation) floats.
>> +    if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
>> +      llvm::Type *PaddingTy = llvm::ArrayType::get(
>> +          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
>> +      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
>> +    }
>> +  }
>> 
>>  // Always honor user-specified calling convention.
>>  if (FI.getCallingConvention() != llvm::CallingConv::C)
>> @@ -3012,7 +3035,17 @@
>>  return (Members > 0 && Members <= 4);
>> }
>> 
>> -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
>> +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
>> +                                            bool &IsHA) const {
>> +  // We update number of allocated VFPs according to
>> +  // 6.1.2.1 The following argument types are VFP CPRCs:
>> +  //   A single-precision floating-point type (including promoted
>> +  //   half-precision types); A double-precision floating-point type;
>> +  //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
>> +  //   with a Base Type of a single- or double-precision floating-point type,
>> +  //   64-bit containerized vectors or 128-bit containerized vectors with one
>> +  //   to four Elements.
>> +
>>  // Handle illegal vector types here.
>>  if (isIllegalVectorType(Ty)) {
>>    uint64_t Size = getContext().getTypeSize(Ty);
>> @@ -3024,15 +3057,38 @@
>>    if (Size == 64) {
>>      llvm::Type *ResType = llvm::VectorType::get(
>>          llvm::Type::getInt32Ty(getVMContext()), 2);
>> +      // Align AllocatedVFP to an even number to use a D register.
>> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> +      AllocatedVFP += 2; // 1 D register = 2 S registers
>>      return ABIArgInfo::getDirect(ResType);
>>    }
>>    if (Size == 128) {
>>      llvm::Type *ResType = llvm::VectorType::get(
>>          llvm::Type::getInt32Ty(getVMContext()), 4);
>> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
>> +      AllocatedVFP += 4; // 1 Q register = 4 S registers
>>      return ABIArgInfo::getDirect(ResType);
>>    }
>>    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>  }
>> +  // Update AllocatedVFP for legal vector types.
>> +  if (const VectorType *VT = Ty->getAs<VectorType>()) {
>> +    uint64_t Size = getContext().getTypeSize(VT);
>> +    // Size of a legal vector should be power of 2 and above 64.
>> +    AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
>> +    AllocatedVFP += (Size / 32);
>> +  }
>> +  // Update AllocatedVFP for floating point types.
>> +  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
>> +    if (BT->getKind() == BuiltinType::Half ||
>> +        BT->getKind() == BuiltinType::Float)
>> +      AllocatedVFP += 1;
>> +    if (BT->getKind() == BuiltinType::Double ||
>> +        BT->getKind() == BuiltinType::LongDouble) {
>> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> +      AllocatedVFP += 2;
>> +    }
>> +  }
>> 
>>  if (!isAggregateTypeForABI(Ty)) {
>>    // Treat an enum type as its underlying type.
>> @@ -3053,10 +3109,28 @@
>>    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>> 
>>  if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
>> -    // Homogeneous Aggregates need to be expanded.
>> +    // Homogeneous Aggregates need to be expanded when we can fit the aggregate
>> +    // into VFP registers.
>>    const Type *Base = 0;
>> -    if (isHomogeneousAggregate(Ty, Base, getContext())) {
>> +    uint64_t Members = 0;
>> +    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
>>      assert(Base && "Base class should be set for homogeneous aggregate");
>> +      // Base can be a floating-point or a vector.
>> +      if (Base->isVectorType()) {
>> +        // ElementSize is in number of floats.
>> +        unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
>> +        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
>> +                       ElementSize);
>> +        AllocatedVFP += Members * ElementSize;
>> +      } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
>> +        AllocatedVFP += Members;
>> +      else {
>> +        assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
>> +               Base->isSpecificBuiltinType(BuiltinType::LongDouble));
>> +        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> +        AllocatedVFP += Members * 2; // Base type is double.
>> +      }
>> +      IsHA = true;
>>      return ABIArgInfo::getExpand();
>>    }
>>  }
>> 
>> Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
>> +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012
>> @@ -156,6 +156,40 @@
>> }
>> // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
>> 
>> +// Make sure HAs that can be partially fit into VFP registers will be allocated
>> +// on stack and that later VFP candidates will go on stack as well.
>> +typedef struct {
>> +  double x;
>> +  double a2;
>> +  double a3;
>> +  double a4;
>> +} struct_of_four_doubles;
>> +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
>> +struct_of_four_doubles g_s4d;
>> +
>> +void test_struct_of_four_doubles(void) {
>> +// CHECK: test_struct_of_four_doubles
>> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
>> +  takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
>> +}
>> +
>> +typedef __attribute__(( ext_vector_type(8) )) char __char8;
>> +typedef __attribute__(( ext_vector_type(4) ))  short __short4;
>> +typedef struct {
>> +  __char8  a1;
>> +  __short4 a2;
>> +  __char8  a3;
>> +  __short4 a4;
>> +} struct_of_vecs;
>> +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs c, double d);
>> +struct_of_vecs g_vec;
>> +
>> +void test_struct_of_vecs(void) {
>> +// CHECK: test_struct_of_vecs
>> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
>> +  takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
>> +}
>> +
>> // FIXME: Tests necessary:
>> //         - Vectors
>> //         - C++ stuff
>> 
>> 
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
> 




More information about the cfe-commits mailing list