[cfe-commits] r167058 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-homogenous.c

Manman Ren mren at apple.com
Wed Oct 31 09:42:28 PDT 2012



On Oct 31, 2012, at 9:29 AM, Bob Wilson <bob.wilson at apple.com> wrote:

> 
> On Oct 30, 2012, at 4:21 PM, manman ren <mren at apple.com> wrote:
> 
>> Author: mren
>> Date: Tue Oct 30 18:21:41 2012
>> New Revision: 167058
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev
>> Log:
>> ARM AAPCS-VFP: fix handling of homogeneous aggreate.
>> 
>> If HA can only partially fit into VFP registers, we add padding to make sure
>> HA will be on stack and later VFP CPRCs will be on stack as well.
> 
> Thanks for working on this, Manman.  I noticed while reviewing this that the AAPCS-VFP ABI is even trickier than I remembered.  The VFP registers aren't simply allocated in order.  If you skip over some registers due to alignment constraints, you may need to "backfill" those registers for later arguments.  See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this.

Thanks, I will look into that.

-Manman
> 
> That means that the front-end is going to have to keep track of the registers available for backfilling so you can have an accurate count of how many remain available for homogeneous aggregates.
> 
>> 
>> Modified:
>>   cfe/trunk/lib/CodeGen/TargetInfo.cpp
>>   cfe/trunk/test/CodeGen/arm-homogenous.c
>> 
>> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012
>> @@ -2863,7 +2863,8 @@
>>  ABIKind getABIKind() const { return Kind; }
>> 
>>  ABIArgInfo classifyReturnType(QualType RetTy) const;
>> -  ABIArgInfo classifyArgumentType(QualType RetTy) const;
>> +  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
>> +                                  bool &IsHA) const;
>>  bool isIllegalVectorType(QualType Ty) const;
>> 
>>  virtual void computeInfo(CGFunctionInfo &FI) const;
>> @@ -2907,10 +2908,32 @@
>> }
>> 
>> void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
>> +  // To correctly handle Homogeneous Aggregate, we need to keep track of the
>> +  // number of VFP registers allocated so far.
>> +  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
>> +  // VFP registers of the appropriate type unallocated then the argument is
>> +  // allocated to the lowest-numbered sequence of such registers.
>> +  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
>> +  // unallocated are marked as unavailable. 
>> +  unsigned AllocatedVFP = 0;
>>  FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
>>  for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
>> -       it != ie; ++it)
>> -    it->info = classifyArgumentType(it->type);
>> +       it != ie; ++it) {
>> +    unsigned PreAllocation = AllocatedVFP;
>> +    bool IsHA = false;
>> +    // 6.1.2.3 There is one VFP co-processor register class using registers
>> +    // s0-s15 (d0-d7) for passing arguments.
>> +    const unsigned NumVFPs = 16;
>> +    it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
>> +    // If we do not have enough VFP registers for the HA, any VFP registers
>> +    // that are unallocated are marked as unavailable. To achieve this, we add
>> +    // padding of (NumVFPs - PreAllocation) floats.
>> +    if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
>> +      llvm::Type *PaddingTy = llvm::ArrayType::get(
>> +          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
>> +      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
>> +    }
>> +  }
>> 
>>  // Always honor user-specified calling convention.
>>  if (FI.getCallingConvention() != llvm::CallingConv::C)
>> @@ -3012,7 +3035,17 @@
>>  return (Members > 0 && Members <= 4);
>> }
>> 
>> -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
>> +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
>> +                                            bool &IsHA) const {
>> +  // We update number of allocated VFPs according to
>> +  // 6.1.2.1 The following argument types are VFP CPRCs:
>> +  //   A single-precision floating-point type (including promoted
>> +  //   half-precision types); A double-precision floating-point type;
>> +  //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
>> +  //   with a Base Type of a single- or double-precision floating-point type,
>> +  //   64-bit containerized vectors or 128-bit containerized vectors with one
>> +  //   to four Elements.
>> +
>>  // Handle illegal vector types here.
>>  if (isIllegalVectorType(Ty)) {
>>    uint64_t Size = getContext().getTypeSize(Ty);
>> @@ -3024,15 +3057,38 @@
>>    if (Size == 64) {
>>      llvm::Type *ResType = llvm::VectorType::get(
>>          llvm::Type::getInt32Ty(getVMContext()), 2);
>> +      // Align AllocatedVFP to an even number to use a D register.
>> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> +      AllocatedVFP += 2; // 1 D register = 2 S registers
>>      return ABIArgInfo::getDirect(ResType);
>>    }
>>    if (Size == 128) {
>>      llvm::Type *ResType = llvm::VectorType::get(
>>          llvm::Type::getInt32Ty(getVMContext()), 4);
>> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
>> +      AllocatedVFP += 4; // 1 Q register = 4 S registers
>>      return ABIArgInfo::getDirect(ResType);
>>    }
>>    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>>  }
>> +  // Update AllocatedVFP for legal vector types.
>> +  if (const VectorType *VT = Ty->getAs<VectorType>()) {
>> +    uint64_t Size = getContext().getTypeSize(VT);
>> +    // Size of a legal vector should be power of 2 and above 64.
>> +    AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
>> +    AllocatedVFP += (Size / 32);
>> +  }
>> +  // Update AllocatedVFP for floating point types.
>> +  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
>> +    if (BT->getKind() == BuiltinType::Half ||
>> +        BT->getKind() == BuiltinType::Float)
>> +      AllocatedVFP += 1;
>> +    if (BT->getKind() == BuiltinType::Double ||
>> +        BT->getKind() == BuiltinType::LongDouble) {
>> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> +      AllocatedVFP += 2;
>> +    }
>> +  }
>> 
>>  if (!isAggregateTypeForABI(Ty)) {
>>    // Treat an enum type as its underlying type.
>> @@ -3053,10 +3109,28 @@
>>    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>> 
>>  if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
>> -    // Homogeneous Aggregates need to be expanded.
>> +    // Homogeneous Aggregates need to be expanded when we can fit the aggregate
>> +    // into VFP registers.
>>    const Type *Base = 0;
>> -    if (isHomogeneousAggregate(Ty, Base, getContext())) {
>> +    uint64_t Members = 0;
>> +    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
>>      assert(Base && "Base class should be set for homogeneous aggregate");
>> +      // Base can be a floating-point or a vector.
>> +      if (Base->isVectorType()) {
>> +        // ElementSize is in number of floats.
>> +        unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
>> +        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
>> +                       ElementSize);
>> +        AllocatedVFP += Members * ElementSize;
>> +      } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
>> +        AllocatedVFP += Members;
>> +      else {
>> +        assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
>> +               Base->isSpecificBuiltinType(BuiltinType::LongDouble));
>> +        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
>> +        AllocatedVFP += Members * 2; // Base type is double.
>> +      }
>> +      IsHA = true;
>>      return ABIArgInfo::getExpand();
>>    }
>>  }
>> 
>> Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
>> +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012
>> @@ -156,6 +156,40 @@
>> }
>> // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
>> 
>> +// Make sure HAs that can be partially fit into VFP registers will be allocated
>> +// on stack and that later VFP candidates will go on stack as well.
>> +typedef struct {
>> +  double x;
>> +  double a2;
>> +  double a3;
>> +  double a4;
>> +} struct_of_four_doubles;
>> +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
>> +struct_of_four_doubles g_s4d;
>> +
>> +void test_struct_of_four_doubles(void) {
>> +// CHECK: test_struct_of_four_doubles
>> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
>> +  takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
>> +}
>> +
>> +typedef __attribute__(( ext_vector_type(8) )) char __char8;
>> +typedef __attribute__(( ext_vector_type(4) ))  short __short4;
>> +typedef struct {
>> +  __char8  a1;
>> +  __short4 a2;
>> +  __char8  a3;
>> +  __short4 a4;
>> +} struct_of_vecs;
>> +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs c, double d);
>> +struct_of_vecs g_vec;
>> +
>> +void test_struct_of_vecs(void) {
>> +// CHECK: test_struct_of_vecs
>> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
>> +  takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
>> +}
>> +
>> // FIXME: Tests necessary:
>> //         - Vectors
>> //         - C++ stuff
>> 
>> 
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
> 



More information about the cfe-commits mailing list