[cfe-commits] r167058 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-homogenous.c

Tue Oct 30 16:21:41 PDT 2012

Author: mren
Date: Tue Oct 30 18:21:41 2012
New Revision: 167058

URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev
Log:
ARM AAPCS-VFP: fix handling of homogeneous aggreate.

If HA can only partially fit into VFP registers, we add padding to make sure
HA will be on stack and later VFP CPRCs will be on stack as well.

Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/arm-homogenous.c

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012
@@ -2863,7 +2863,8 @@
   ABIKind getABIKind() const { return Kind; }
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP,
+                                  bool &IsHA) const;
   bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const;
@@ -2907,10 +2908,32 @@
 }
 
 void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const {
+  // To correctly handle Homogeneous Aggregate, we need to keep track of the
+  // number of VFP registers allocated so far.
+  // C.1.vfp If the argument is a VFP CPRC and there are sufficient consecutive
+  // VFP registers of the appropriate type unallocated then the argument is
+  // allocated to the lowest-numbered sequence of such registers.
+  // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are
+  // unallocated are marked as unavailable. 
+  unsigned AllocatedVFP = 0;
   FI.getReturnInfo() = classifyReturnType(FI.getReturnType());
   for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end();
-       it != ie; ++it)
-    it->info = classifyArgumentType(it->type);
+       it != ie; ++it) {
+    unsigned PreAllocation = AllocatedVFP;
+    bool IsHA = false;
+    // 6.1.2.3 There is one VFP co-processor register class using registers
+    // s0-s15 (d0-d7) for passing arguments.
+    const unsigned NumVFPs = 16;
+    it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA);
+    // If we do not have enough VFP registers for the HA, any VFP registers
+    // that are unallocated are marked as unavailable. To achieve this, we add
+    // padding of (NumVFPs - PreAllocation) floats.
+    if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) {
+      llvm::Type *PaddingTy = llvm::ArrayType::get(
+          llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation);
+      it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy);
+    }
+  }
 
   // Always honor user-specified calling convention.
   if (FI.getCallingConvention() != llvm::CallingConv::C)
@@ -3012,7 +3035,17 @@
   return (Members > 0 && Members <= 4);
 }
 
-ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
+ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned &AllocatedVFP,
+                                            bool &IsHA) const {
+  // We update number of allocated VFPs according to
+  // 6.1.2.1 The following argument types are VFP CPRCs:
+  //   A single-precision floating-point type (including promoted
+  //   half-precision types); A double-precision floating-point type;
+  //   A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate
+  //   with a Base Type of a single- or double-precision floating-point type,
+  //   64-bit containerized vectors or 128-bit containerized vectors with one
+  //   to four Elements.
+
   // Handle illegal vector types here.
   if (isIllegalVectorType(Ty)) {
     uint64_t Size = getContext().getTypeSize(Ty);
@@ -3024,15 +3057,38 @@
     if (Size == 64) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 2);
+      // Align AllocatedVFP to an even number to use a D register.
+      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
+      AllocatedVFP += 2; // 1 D register = 2 S registers
       return ABIArgInfo::getDirect(ResType);
     }
     if (Size == 128) {
       llvm::Type *ResType = llvm::VectorType::get(
           llvm::Type::getInt32Ty(getVMContext()), 4);
+      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4);
+      AllocatedVFP += 4; // 1 Q register = 4 S registers
       return ABIArgInfo::getDirect(ResType);
     }
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
   }
+  // Update AllocatedVFP for legal vector types.
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    uint64_t Size = getContext().getTypeSize(VT);
+    // Size of a legal vector should be power of 2 and above 64.
+    AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : 2);
+    AllocatedVFP += (Size / 32);
+  }
+  // Update AllocatedVFP for floating point types.
+  if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
+    if (BT->getKind() == BuiltinType::Half ||
+        BT->getKind() == BuiltinType::Float)
+      AllocatedVFP += 1;
+    if (BT->getKind() == BuiltinType::Double ||
+        BT->getKind() == BuiltinType::LongDouble) {
+      AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
+      AllocatedVFP += 2;
+    }
+  }
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
@@ -3053,10 +3109,28 @@
     return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
 
   if (getABIKind() == ARMABIInfo::AAPCS_VFP) {
-    // Homogeneous Aggregates need to be expanded.
+    // Homogeneous Aggregates need to be expanded when we can fit the aggregate
+    // into VFP registers.
     const Type *Base = 0;
-    if (isHomogeneousAggregate(Ty, Base, getContext())) {
+    uint64_t Members = 0;
+    if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
       assert(Base && "Base class should be set for homogeneous aggregate");
+      // Base can be a floating-point or a vector.
+      if (Base->isVectorType()) {
+        // ElementSize is in number of floats.
+        unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4;
+        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP,
+                       ElementSize);
+        AllocatedVFP += Members * ElementSize;
+      } else if (Base->isSpecificBuiltinType(BuiltinType::Float))
+        AllocatedVFP += Members;
+      else {
+        assert(Base->isSpecificBuiltinType(BuiltinType::Double) ||
+               Base->isSpecificBuiltinType(BuiltinType::LongDouble));
+        AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2);
+        AllocatedVFP += Members * 2; // Base type is double.
+      }
+      IsHA = true;
       return ABIArgInfo::getExpand();
     }
   }

Modified: cfe/trunk/test/CodeGen/arm-homogenous.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-homogenous.c (original)
+++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012
@@ -156,6 +156,40 @@
 }
 // CHECK: declare arm_aapcs_vfpcc %union.union_with_struct_with_fundamental_elems @returns_union_with_struct_with_fundamental_elems()
 
+// Make sure HAs that can be partially fit into VFP registers will be allocated
+// on stack and that later VFP candidates will go on stack as well.
+typedef struct {
+  double x;
+  double a2;
+  double a3;
+  double a4;
+} struct_of_four_doubles;
+extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, struct_of_four_doubles c, double d);
+struct_of_four_doubles g_s4d;
+
+void test_struct_of_four_doubles(void) {
+// CHECK: test_struct_of_four_doubles
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}})
+  takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0);
+}
+
+typedef __attribute__(( ext_vector_type(8) )) char __char8;
+typedef __attribute__(( ext_vector_type(4) ))  short __short4;
+typedef struct {
+  __char8  a1;
+  __short4 a2;
+  __char8  a3;
+  __short4 a4;
+} struct_of_vecs;
+extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs c, double d);
+struct_of_vecs g_vec;
+
+void test_struct_of_vecs(void) {
+// CHECK: test_struct_of_vecs
+// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, double {{.*}})
+  takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0);
+}
+
 // FIXME: Tests necessary:
 //         - Vectors
 //         - C++ stuff