[cfe-commits] r167058 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-homogenous.c

Wed Oct 31 09:29:33 PDT 2012

On Oct 30, 2012, at 4:21 PM, manman ren <mren at apple.com> wrote:

> Author: mren
> Date: Tue Oct 30 18:21:41 2012
> New Revision: 167058
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev
> Log:
> ARM AAPCS-VFP: fix handling of homogeneous aggreate.
> 
> If HA can only partially fit into VFP registers, we add padding to make sure
> HA will be on stack and later VFP CPRCs will be on stack as well.

Thanks for working on this, Manman.  I noticed while reviewing this that the AAPCS-VFP ABI is even trickier than I remembered.  The VFP registers aren't simply allocated in order.  If you skip over some registers due to alignment constraints, you may need to "backfill" those registers for later arguments.  See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this.

That means that the front-end is going to have to keep track of the registers available for backfilling so you can have an accurate count of how many remain available for homogeneous aggregates.

> 
> Modified:
>    cfe/trunk/lib/CodeGen/TargetInfo.cpp
>    cfe/trunk/test/CodeGen/arm-homogenous.c
> 
> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012
> @@ -2863,7 +2863,8 @@
>   ABIKind getABIKind() const { return Kind; }
> 
>   ABIArgInfo classifyReturnType(QualType RetTy) const;
> -  ABIArgInfo classifyArgumentType(QualType RetTy) const;
> +  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
> +                                  bool &IsHA) const;
>   bool isIllegalVectorType(QualType Ty) const;
> 
>   virtual void computeInfo(CGFunctionInfo &FI) const;
> @@ -2907,10 +2908,32 @@
> }
> 
> void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
> +  // To correctly handle Homogeneous Aggregate, we need to keep track of the
> +  // number of VFP registers allocated so far.
> +  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
> +  // VFP registers of the appropriate type unallocated then the argument is
> +  // allocated to the lowest-numbered sequence of such registers.
> +  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
> +  // unallocated are marked as unavailable. 
> +  unsigned AllocatedVFP = 0;
>   FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
>   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
> -       it != ie; ++it)
> -    it->info = classifyArgumentType(it->type);
> +       it != ie; ++it) {
> +    unsigned PreAllocation = AllocatedVFP;
> +    bool IsHA = false;
> +    // 6.1.2.3 There is one VFP co-processor register class using registers
> +    // s0-s15 (d0-d7) for passing arguments.
> +    const unsigned NumVFPs = 16;
> +    it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
> +    // If we do not have enough VFP registers for the HA, any VFP registers
> +    // that are unallocated are marked as unavailable. To achieve this, we add
> +    // padding of (NumVFPs - PreAllocation) floats.
> +    if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
> +      llvm::Type *PaddingTy = llvm::ArrayType::get(
> +          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
> +      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
> +    }
> +  }
> 
>   // Always honor user-specified calling convention.
>   if (FI.getCallingConvention() != llvm::CallingConv::C)
> @@ -3012,7 +3035,17 @@
>   return (Members > 0 && Members <= 4);
> }
> 
> -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
> +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
> +                                            bool &IsHA) const {
> +  // We update number of allocated VFPs according to
> +  // 6.1.2.1 The following argument types are VFP CPRCs:
> +  //   A single-precision floating-point type (including promoted
> +  //   half-precision types); A double-precision floating-point type;
> +  //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
> +  //   with a Base Type of a single- or double-precision floating-point type,
> +  //   64-bit containerized vectors or 128-bit containerized vectors with one
> +  //   to four Elements.
> +
>   // Handle illegal vector types here.
>   if (isIllegalVectorType(Ty)) {
>     uint64_t Size = getContext().getTypeSize(Ty);
> @@ -3024,15 +3057,38 @@
>     if (Size == 64) {
>       llvm::Type *ResType = llvm::VectorType::get(
>           llvm::Type::getInt32Ty(getVMContext()), 2);
> +      // Align AllocatedVFP to an even number to use a D register.
> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
> +      AllocatedVFP += 2; // 1 D register = 2 S registers
>       return ABIArgInfo::getDirect(ResType);
>     }
>     if (Size == 128) {
>       llvm::Type *ResType = llvm::VectorType::get(
>           llvm::Type::getInt32Ty(getVMContext()), 4);
> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
> +      AllocatedVFP += 4; // 1 Q register = 4 S registers
>       return ABIArgInfo::getDirect(ResType);
>     }
>     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
>   }
> +  // Update AllocatedVFP for legal vector types.
> +  if (const VectorType *VT = Ty->getAs<VectorType>()) {
> +    uint64_t Size = getContext().getTypeSize(VT);
> +    // Size of a legal vector should be power of 2 and above 64.
> +    AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
> +    AllocatedVFP += (Size / 32);
> +  }
> +  // Update AllocatedVFP for floating point types.
> +  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
> +    if (BT->getKind() == BuiltinType::Half ||
> +        BT->getKind() == BuiltinType::Float)
> +      AllocatedVFP += 1;
> +    if (BT->getKind() == BuiltinType::Double ||
> +        BT->getKind() == BuiltinType::LongDouble) {
> +      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
> +      AllocatedVFP += 2;
> +    }
> +  }
> 
>   if (!isAggregateTypeForABI(Ty)) {
>     // Treat an enum type as its underlying type.
> @@ -3053,10 +3109,28 @@
>     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
> 
>   if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
> -    // Homogeneous Aggregates need to be expanded.
> +    // Homogeneous Aggregates need to be expanded when we can fit the aggregate
> +    // into VFP registers.
>     const Type *Base = 0;
> -    if (isHomogeneousAggregate(Ty, Base, getContext())) {
> +    uint64_t Members = 0;
> +    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
>       assert(Base && "Base class should be set for homogeneous aggregate");
> +      // Base can be a floating-point or a vector.
> +      if (Base->isVectorType()) {
> +        // ElementSize is in number of floats.
> +        unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
> +        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
> +                       ElementSize);
> +        AllocatedVFP += Members * ElementSize;
> +      } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
> +        AllocatedVFP += Members;
> +      else {
> +        assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
> +               Base->isSpecificBuiltinType(BuiltinType::LongDouble));
> +        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
> +        AllocatedVFP += Members * 2; // Base type is double.
> +      }
> +      IsHA = true;
>       return ABIArgInfo::getExpand();
>     }
>   }
> 
> Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
> +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012
> @@ -156,6 +156,40 @@
> }
> // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
> 
> +// Make sure HAs that can be partially fit into VFP registers will be allocated
> +// on stack and that later VFP candidates will go on stack as well.
> +typedef struct {
> +  double x;
> +  double a2;
> +  double a3;
> +  double a4;
> +} struct_of_four_doubles;
> +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
> +struct_of_four_doubles g_s4d;
> +
> +void test_struct_of_four_doubles(void) {
> +// CHECK: test_struct_of_four_doubles
> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
> +  takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
> +}
> +
> +typedef __attribute__(( ext_vector_type(8) )) char __char8;
> +typedef __attribute__(( ext_vector_type(4) ))  short __short4;
> +typedef struct {
> +  __char8  a1;
> +  __short4 a2;
> +  __char8  a3;
> +  __short4 a4;
> +} struct_of_vecs;
> +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs c, double d);
> +struct_of_vecs g_vec;
> +
> +void test_struct_of_vecs(void) {
> +// CHECK: test_struct_of_vecs
> +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
> +  takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
> +}
> +
> // FIXME: Tests necessary:
> //         - Vectors
> //         - C++ stuff
> 
> 
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits