[cfe-commits] r166043 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-abi-vector.c

Tue Oct 16 12:18:40 PDT 2012

Author: mren
Date: Tue Oct 16 14:18:39 2012
New Revision: 166043

URL: http://llvm.org/viewvc/llvm-project?rev=166043&view=rev
Log:
ARM ABI: passing illegal vector types as varargs.

We expand varargs in clang and the call site is handled in the back end, it is
hard to match exactly how illegal vectors are handled in the backend. Therefore,
we legalize the illegal vector types in clang:
if (Size <= 32), legalize to i32.
if (Size == 64), legalize to v2i32.
if (Size == 128), legalize to v4i32.
if (Size > 128), use indirect.

rdar://12439123

Modified:
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/test/CodeGen/arm-abi-vector.c

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=166043&r1=166042&r2=166043&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 16 14:18:39 2012
@@ -2799,6 +2799,7 @@
 
   ABIArgInfo classifyReturnType(QualType RetTy) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  bool isIllegalVectorType(QualType Ty) const;
 
   virtual void computeInfo(CGFunctionInfo &FI) const;
 
@@ -2945,6 +2946,27 @@
 }
 
 ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const {
+  // Handle illegal vector types here.
+  if (isIllegalVectorType(Ty)) {
+    uint64_t Size = getContext().getTypeSize(Ty);
+    if (Size <= 32) {
+      llvm::Type *ResType =
+          llvm::Type::getInt32Ty(getVMContext());
+      return ABIArgInfo::getDirect(ResType);
+    }
+    if (Size == 64) {
+      llvm::Type *ResType = llvm::VectorType::get(
+          llvm::Type::getInt32Ty(getVMContext()), 2);
+      return ABIArgInfo::getDirect(ResType);
+    }
+    if (Size == 128) {
+      llvm::Type *ResType = llvm::VectorType::get(
+          llvm::Type::getInt32Ty(getVMContext()), 4);
+      return ABIArgInfo::getDirect(ResType);
+    }
+    return ABIArgInfo::getIndirect(0, /*ByVal=*/false);
+  }
+
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
     if (const EnumType *EnumTy = Ty->getAs<EnumType>())
@@ -3161,6 +3183,21 @@
   return ABIArgInfo::getIndirect(0);
 }
 
+/// isIllegalVector - check whether Ty is an illegal vector type.
+bool ARMABIInfo::isIllegalVectorType(QualType Ty) const {
+  if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    // Check whether VT is legal.
+    unsigned NumElements = VT->getNumElements();
+    uint64_t Size = getContext().getTypeSize(VT);
+    // NumElements should be power of 2.
+    if ((NumElements & (NumElements - 1)) != 0)
+      return true;
+    // Size should be greater than 32 bits.
+    return Size <= 32;
+  }
+  return false;
+}
+
 llvm::Value *ARMABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty,
                                    CodeGenFunction &CGF) const {
   llvm::Type *BP = CGF.Int8PtrTy;
@@ -3172,6 +3209,7 @@
 
   uint64_t Size = CGF.getContext().getTypeSize(Ty) / 8;
   uint64_t TyAlign = CGF.getContext().getTypeAlign(Ty) / 8;
+  bool IsIndirect = false;
 
   // The ABI alignment for 64-bit or 128-bit vectors is 8 for AAPCS and 4 for
   // APCS. For AAPCS, the ABI alignment is at least 4-byte and at most 8-byte.
@@ -3182,6 +3220,12 @@
     else
       TyAlign = 4;
   }
+  // Use indirect if size of the illegal vector is bigger than 16 bytes.
+  if (isIllegalVectorType(Ty) && Size > 16) {
+    IsIndirect = true;
+    Size = 4;
+    TyAlign = 4;
+  }
 
   // Handle address alignment for ABI alignment > 4 bytes.
   if (TyAlign > 4) {
@@ -3200,8 +3244,10 @@
                       "ap.next");
   Builder.CreateStore(NextAddr, VAListAddrAsBPP);
 
-  if (Ty->getAs<VectorType>() &&
-      (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
+  if (IsIndirect)
+    Addr = Builder.CreateLoad(Builder.CreateBitCast(Addr, BPP));
+  else if (Ty->getAs<VectorType>() &&
+           (TyAlign < CGF.getContext().getTypeAlign(Ty) / 8)) {
     // We can't directly cast ap.cur to pointer to a vector type, since ap.cur
     // may not be correctly aligned for the vector type. We create an aligned
     // temporary space and copy the content over from ap.cur to the temporary

Modified: cfe/trunk/test/CodeGen/arm-abi-vector.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-abi-vector.c?rev=166043&r1=166042&r2=166043&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/arm-abi-vector.c (original)
+++ cfe/trunk/test/CodeGen/arm-abi-vector.c Tue Oct 16 14:18:39 2012
@@ -4,6 +4,12 @@
 #include <stdarg.h>
 
 typedef __attribute__(( ext_vector_type(2) ))  int __int2;
+typedef __attribute__(( ext_vector_type(3) ))  char __char3;
+typedef __attribute__(( ext_vector_type(5) ))  char __char5;
+typedef __attribute__(( ext_vector_type(9) ))  char __char9;
+typedef __attribute__(( ext_vector_type(19) )) char __char19;
+typedef __attribute__(( ext_vector_type(3) ))  short __short3;
+typedef __attribute__(( ext_vector_type(5) ))  short __short5;
 
 // Passing legal vector types as varargs.
 double varargs_vec_2i(int fixed, ...) {
@@ -36,3 +42,187 @@
 // APCS-GNU: call double (i32, ...)* @varargs_vec_2i(i32 3, <2 x i32> %1)
   return varargs_vec_2i(3, *in);
 }
+
+double varargs_vec_3c(int fixed, ...) {
+// CHECK: varargs_vec_3c
+// CHECK: %c3 = alloca <3 x i8>, align 4
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i32 4
+// CHECK: %1 = bitcast i8* %ap.cur to <3 x i8>*
+// APCS-GNU: varargs_vec_3c
+// APCS-GNU: %c3 = alloca <3 x i8>, align 4
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 4
+// APCS-GNU: bitcast i8* %ap.cur to <3 x i8>*
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char3 c3 = va_arg(ap, __char3);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_3c(__char3 *in) {
+// CHECK: test_3c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_3c(i32 3, i32 %2)
+// APCS-GNU: test_3c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_3c(i32 3, i32 %2)
+  return varargs_vec_3c(3, *in);
+}
+
+double varargs_vec_5c(int fixed, ...) {
+// CHECK: varargs_vec_5c
+// CHECK: %c5 = alloca <5 x i8>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <5 x i8>*
+// APCS-GNU: varargs_vec_5c
+// APCS-GNU: %c5 = alloca <5 x i8>, align 8
+// APCS-GNU: %var.align = alloca <5 x i8>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <5 x i8>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <5 x i8>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char5 c5 = va_arg(ap, __char5);
+  sum = sum + c5.x + c5.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_5c(__char5 *in) {
+// CHECK: test_5c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_5c(i32 5, <2 x i32> %3)
+// APCS-GNU: test_5c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_5c(i32 5, <2 x i32> %3)
+  return varargs_vec_5c(5, *in);
+}
+
+double varargs_vec_9c(int fixed, ...) {
+// CHECK: varargs_vec_9c
+// CHECK: %c9 = alloca <9 x i8>, align 16
+// CHECK: %var.align = alloca <9 x i8>
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 16
+// CHECK: %4 = bitcast <9 x i8>* %var.align to i8*
+// CHECK: call void @llvm.memcpy
+// CHECK: %5 = load <9 x i8>* %var.align
+// APCS-GNU: varargs_vec_9c
+// APCS-GNU: %c9 = alloca <9 x i8>, align 16
+// APCS-GNU: %var.align = alloca <9 x i8>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 16
+// APCS-GNU: %1 = bitcast <9 x i8>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <9 x i8>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char9 c9 = va_arg(ap, __char9);
+  sum = sum + c9.x + c9.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_9c(__char9 *in) {
+// CHECK: test_9c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_9c(i32 9, <4 x i32> %3)
+// APCS-GNU: test_9c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_9c(i32 9, <4 x i32> %3)
+  return varargs_vec_9c(9, *in);
+}
+
+double varargs_vec_19c(int fixed, ...) {
+// CHECK: varargs_vec_19c
+// CHECK: %ap.next = getelementptr i8* %ap.cur, i32 4
+// CHECK: %1 = bitcast i8* %ap.cur to i8**
+// CHECK: %2 = load i8** %1
+// CHECK: bitcast i8* %2 to <19 x i8>*
+// APCS-GNU: varargs_vec_19c
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 4
+// APCS-GNU: %1 = bitcast i8* %ap.cur to i8**
+// APCS-GNU: %2 = load i8** %1
+// APCS-GNU: bitcast i8* %2 to <19 x i8>*
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __char19 c19 = va_arg(ap, __char19);
+  sum = sum + c19.x + c19.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_19c(__char19 *in) {
+// CHECK: test_19c
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_19c(i32 19, <19 x i8>* %tmp)
+// APCS-GNU: test_19c
+// APCS-GNU: call double (i32, ...)* @varargs_vec_19c(i32 19, <19 x i8>* %tmp)
+  return varargs_vec_19c(19, *in);
+}
+
+double varargs_vec_3s(int fixed, ...) {
+// CHECK: varargs_vec_3s
+// CHECK: %c3 = alloca <3 x i16>, align 8
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 8
+// CHECK: bitcast i8* %ap.align to <3 x i16>*
+// APCS-GNU: varargs_vec_3s
+// APCS-GNU: %c3 = alloca <3 x i16>, align 8
+// APCS-GNU: %var.align = alloca <3 x i16>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 8
+// APCS-GNU: %1 = bitcast <3 x i16>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <3 x i16>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __short3 c3 = va_arg(ap, __short3);
+  sum = sum + c3.x + c3.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_3s(__short3 *in) {
+// CHECK: test_3s
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_3s(i32 3, <2 x i32> %2)
+// APCS-GNU: test_3s
+// APCS-GNU: call double (i32, ...)* @varargs_vec_3s(i32 3, <2 x i32> %2)
+  return varargs_vec_3s(3, *in);
+}
+
+double varargs_vec_5s(int fixed, ...) {
+// CHECK: varargs_vec_5s
+// CHECK: %c5 = alloca <5 x i16>, align 16
+// CHECK: %var.align = alloca <5 x i16>
+// CHECK: %3 = and i32 %2, -8
+// CHECK: %ap.align = inttoptr i32 %3 to i8*
+// CHECK: %ap.next = getelementptr i8* %ap.align, i32 16
+// CHECK: %4 = bitcast <5 x i16>* %var.align to i8*
+// CHECK: call void @llvm.memcpy
+// CHECK: %5 = load <5 x i16>* %var.align
+// APCS-GNU: varargs_vec_5s
+// APCS-GNU: %c5 = alloca <5 x i16>, align 16
+// APCS-GNU: %var.align = alloca <5 x i16>
+// APCS-GNU: %ap.next = getelementptr i8* %ap.cur, i32 16
+// APCS-GNU: %1 = bitcast <5 x i16>* %var.align to i8*
+// APCS-GNU: call void @llvm.memcpy
+// APCS-GNU: %2 = load <5 x i16>* %var.align
+  va_list ap;
+  double sum = fixed;
+  va_start(ap, fixed);
+  __short5 c5 = va_arg(ap, __short5);
+  sum = sum + c5.x + c5.y;
+  va_end(ap);
+  return sum;
+}
+
+double test_5s(__short5 *in) {
+// CHECK: test_5s
+// CHECK: call arm_aapcscc double (i32, ...)* @varargs_vec_5s(i32 5, <4 x i32> %3)
+// APCS-GNU: test_5s
+// APCS-GNU: call double (i32, ...)* @varargs_vec_5s(i32 5, <4 x i32> %3)
+  return varargs_vec_5s(5, *in);
+}