[cfe-commits] r105599 - in /cfe/trunk: include/clang/Basic/BuiltinsARM.def lib/CodeGen/CGBuiltin.cpp lib/CodeGen/CodeGenFunction.h

Nate Begeman natebegeman at mac.com
Mon Jun 7 23:03:01 PDT 2010


Author: sampo
Date: Tue Jun  8 01:03:01 2010
New Revision: 105599

URL: http://llvm.org/viewvc/llvm-project?rev=105599&view=rev
Log:
Fix NEON intrinsic argument passing, support vext.  Most now successfully make it through codegen to the .s file

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsARM.def
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsARM.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsARM.def?rev=105599&r1=105598&r2=105599&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsARM.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsARM.def Tue Jun  8 01:03:01 2010
@@ -34,14 +34,14 @@
 BUILTIN(__builtin_neon_vaddw_v, "V16cV16cV8ci", "n")
 BUILTIN(__builtin_neon_vbsl_v, "V8cV8cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vbslq_v, "V16cV16cV16cV16ci", "n")
-BUILTIN(__builtin_neon_vcage_v, "V8cV8cV8ci", "n")
-BUILTIN(__builtin_neon_vcageq_v, "V16cV16cV16ci", "n")
-BUILTIN(__builtin_neon_vcagt_v, "V8cV8cV8ci", "n")
-BUILTIN(__builtin_neon_vcagtq_v, "V16cV16cV16ci", "n")
-BUILTIN(__builtin_neon_vcale_v, "V8cV8cV8ci", "n")
-BUILTIN(__builtin_neon_vcaleq_v, "V16cV16cV16ci", "n")
-BUILTIN(__builtin_neon_vcalt_v, "V8cV8cV8ci", "n")
-BUILTIN(__builtin_neon_vcaltq_v, "V16cV16cV16ci", "n")
+BUILTIN(__builtin_neon_vcage_v, "V2iV8cV8ci", "n")
+BUILTIN(__builtin_neon_vcageq_v, "V4iV16cV16ci", "n")
+BUILTIN(__builtin_neon_vcagt_v, "V2iV8cV8ci", "n")
+BUILTIN(__builtin_neon_vcagtq_v, "V4iV16cV16ci", "n")
+BUILTIN(__builtin_neon_vcale_v, "V2iV8cV8ci", "n")
+BUILTIN(__builtin_neon_vcaleq_v, "V4iV16cV16ci", "n")
+BUILTIN(__builtin_neon_vcalt_v, "V2iV8cV8ci", "n")
+BUILTIN(__builtin_neon_vcaltq_v, "V4iV16cV16ci", "n")
 BUILTIN(__builtin_neon_vcls_v, "V8cV8ci", "n")
 BUILTIN(__builtin_neon_vclsq_v, "V16cV16ci", "n")
 BUILTIN(__builtin_neon_vclz_v, "V8cV8ci", "n")
@@ -54,14 +54,14 @@
 BUILTIN(__builtin_neon_vcvt_f32_f16, "V16cV8ci", "n")
 BUILTIN(__builtin_neon_vcvt_n_f32_v, "V2fV8cii", "n")
 BUILTIN(__builtin_neon_vcvtq_n_f32_v, "V4fV16cii", "n")
-BUILTIN(__builtin_neon_vcvt_n_s32_v, "V8cV8cii", "n")
-BUILTIN(__builtin_neon_vcvtq_n_s32_v, "V16cV16cii", "n")
-BUILTIN(__builtin_neon_vcvt_n_u32_v, "V8cV8cii", "n")
-BUILTIN(__builtin_neon_vcvtq_n_u32_v, "V16cV16cii", "n")
-BUILTIN(__builtin_neon_vcvt_s32_v, "V8cV8ci", "n")
-BUILTIN(__builtin_neon_vcvtq_s32_v, "V16cV16ci", "n")
-BUILTIN(__builtin_neon_vcvt_u32_v, "V8cV8ci", "n")
-BUILTIN(__builtin_neon_vcvtq_u32_v, "V16cV16ci", "n")
+BUILTIN(__builtin_neon_vcvt_n_s32_v, "V2iV8cii", "n")
+BUILTIN(__builtin_neon_vcvtq_n_s32_v, "V4iV16cii", "n")
+BUILTIN(__builtin_neon_vcvt_n_u32_v, "V2iV8cii", "n")
+BUILTIN(__builtin_neon_vcvtq_n_u32_v, "V4iV16cii", "n")
+BUILTIN(__builtin_neon_vcvt_s32_v, "V2iV8ci", "n")
+BUILTIN(__builtin_neon_vcvtq_s32_v, "V4iV16ci", "n")
+BUILTIN(__builtin_neon_vcvt_u32_v, "V2iV8ci", "n")
+BUILTIN(__builtin_neon_vcvtq_u32_v, "V4iV16ci", "n")
 BUILTIN(__builtin_neon_vext_v, "V8cV8cV8cii", "n")
 BUILTIN(__builtin_neon_vextq_v, "V16cV16cV16cii", "n")
 BUILTIN(__builtin_neon_vget_high_v, "V8cV16ci", "n")
@@ -203,15 +203,15 @@
 BUILTIN(__builtin_neon_vqrshl_v, "V8cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vqrshlq_v, "V16cV16cV16ci", "n")
 BUILTIN(__builtin_neon_vqrshrn_n_v, "V8cV16cii", "n")
-BUILTIN(__builtin_neon_vqrshrun_n_v, "V8cV16cii", "n")
+BUILTIN(__builtin_neon_vqrshrun_n_v, "V2iV16cii", "n")
 BUILTIN(__builtin_neon_vqshl_v, "V8cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vqshlq_v, "V16cV16cV16ci", "n")
-BUILTIN(__builtin_neon_vqshlu_n_v, "V8cV8cii", "n")
-BUILTIN(__builtin_neon_vqshluq_n_v, "V16cV16cii", "n")
+BUILTIN(__builtin_neon_vqshlu_n_v, "V2iV8cii", "n")
+BUILTIN(__builtin_neon_vqshluq_n_v, "V4iV16cii", "n")
 BUILTIN(__builtin_neon_vqshl_n_v, "V8cV8cii", "n")
 BUILTIN(__builtin_neon_vqshlq_n_v, "V16cV16cii", "n")
 BUILTIN(__builtin_neon_vqshrn_n_v, "V8cV16cii", "n")
-BUILTIN(__builtin_neon_vqshrun_n_v, "V8cV16cii", "n")
+BUILTIN(__builtin_neon_vqshrun_n_v, "V2iV16cii", "n")
 BUILTIN(__builtin_neon_vqsub_v, "V8cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vqsubq_v, "V16cV16cV16ci", "n")
 BUILTIN(__builtin_neon_vraddhn_v, "V8cV16cV16ci", "n")
@@ -292,8 +292,8 @@
 BUILTIN(__builtin_neon_vtbx4_v, "V8cV8cV8cV8cV8cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vtrn_v, "V16cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vtrnq_v, "V32cV16cV16ci", "n")
-BUILTIN(__builtin_neon_vtst_v, "V8cV8cV8ci", "n")
-BUILTIN(__builtin_neon_vtstq_v, "V16cV16cV16ci", "n")
+BUILTIN(__builtin_neon_vtst_v, "V2iV8cV8ci", "n")
+BUILTIN(__builtin_neon_vtstq_v, "V4iV16cV16ci", "n")
 BUILTIN(__builtin_neon_vuzp_v, "V16cV8cV8ci", "n")
 BUILTIN(__builtin_neon_vuzpq_v, "V32cV16cV16ci", "n")
 BUILTIN(__builtin_neon_vzip_v, "V16cV8cV8ci", "n")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=105599&r1=105598&r2=105599&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Jun  8 01:03:01 2010
@@ -894,15 +894,25 @@
   return 0;
 }
 
+Value *CodeGenFunction::EmitNeonCall(Function *F, SmallVectorImpl<Value*> &Ops,
+                                     const char *name) {
+  unsigned j = 0;
+  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+       ai != ae; ++ai, ++j)
+    Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
+
+  return Builder.CreateCall(F, Ops.begin(), Ops.end(), name);
+}
+
 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
                                            const CallExpr *E) {
   llvm::SmallVector<Value*, 4> Ops;
-  bool usgn, poly, half;
+  bool usgn, quad, poly, half;
   const llvm::Type *Ty;
   unsigned Int;
   
   // Determine the type of this overloaded NEON intrinsic.
-  if (BuiltinID != ARM::BI__clear_cache) {
+  if (BuiltinID > ARM::BI__builtin_thread_pointer) {
     for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++)
       Ops.push_back(EmitScalarExpr(E->getArg(i)));
     
@@ -912,13 +922,14 @@
       return 0;
     
     unsigned type = Result.getZExtValue();
-    Ty = GetNeonType(VMContext, type & 0x7, type & 0x10);
-    if (!Ty)
-      return 0;
-    
     usgn = type & 0x08;
+    quad = type & 0x10;
     poly = type == 5 || type == 6;
     half = type == 7;
+
+    Ty = GetNeonType(VMContext, type & 0x7, quad);
+    if (!Ty)
+      return 0;
   }
   
   switch (BuiltinID) {
@@ -933,87 +944,80 @@
     return Builder.CreateCall2(CGM.CreateRuntimeFunction(FTy, Name),
                                a, b);
   }
-  // FIXME: bitcast args, return.
   case ARM::BI__builtin_neon_vaba_v:
-  case ARM::BI__builtin_neon_vabaq_v: {
+  case ARM::BI__builtin_neon_vabaq_v:
     Int = usgn ? Intrinsic::arm_neon_vabau : Intrinsic::arm_neon_vabas;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 3, "vaba");
-  }
-  case ARM::BI__builtin_neon_vabal_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaba");
+  case ARM::BI__builtin_neon_vabal_v:
     Int = usgn ? Intrinsic::arm_neon_vabalu : Intrinsic::arm_neon_vabals;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 3, "vabal");
-  }
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabal");
   case ARM::BI__builtin_neon_vabd_v:
-  case ARM::BI__builtin_neon_vabdq_v: {
+  case ARM::BI__builtin_neon_vabdq_v:
     Int = usgn ? Intrinsic::arm_neon_vabdu : Intrinsic::arm_neon_vabds;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vabd");
-  }
-  case ARM::BI__builtin_neon_vabdl_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabd");
+  case ARM::BI__builtin_neon_vabdl_v:
     Int = usgn ? Intrinsic::arm_neon_vabdlu : Intrinsic::arm_neon_vabdls;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vabdl");
-  }
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vabdl");
   case ARM::BI__builtin_neon_vabs_v:
   case ARM::BI__builtin_neon_vabsq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vabs");
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vabs, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vabs");
   }
   case ARM::BI__builtin_neon_vaddhn_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddhn");
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vaddhn");
   }
-  case ARM::BI__builtin_neon_vaddl_v: {
+  case ARM::BI__builtin_neon_vaddl_v:
     Int = usgn ? Intrinsic::arm_neon_vaddlu : Intrinsic::arm_neon_vaddls;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddl");
-  }
-  case ARM::BI__builtin_neon_vaddw_v: {
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddl");
+  case ARM::BI__builtin_neon_vaddw_v:
     Int = usgn ? Intrinsic::arm_neon_vaddws : Intrinsic::arm_neon_vaddwu;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vaddw");
-  }
+    return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vaddw");
   // FIXME: vbsl -> or ((0 & 1), (0 & 2)) in arm_neon.h
   case ARM::BI__builtin_neon_vcale_v:
     std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcage_v:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacged),
-                              &Ops[0], &Ops[0] + 2, "vcage");
+  case ARM::BI__builtin_neon_vcage_v: {
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacged, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vcage");
+  }
   case ARM::BI__builtin_neon_vcaleq_v:
     std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcageq_v:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq),
-                              &Ops[0], &Ops[0] + 2, "vcage");
+  case ARM::BI__builtin_neon_vcageq_v: {
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgeq, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vcage");
+  }
   case ARM::BI__builtin_neon_vcalt_v:
     std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcagt_v:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd),
-                              &Ops[0], &Ops[0] + 2, "vcagt");
+  case ARM::BI__builtin_neon_vcagt_v: {
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtd, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vcagt");
+  }
   case ARM::BI__builtin_neon_vcaltq_v:
     std::swap(Ops[0], Ops[1]);
-  case ARM::BI__builtin_neon_vcagtq_v:
-    return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq),
-                              &Ops[0], &Ops[0] + 2, "vcagt");
+  case ARM::BI__builtin_neon_vcagtq_v: {
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vacgtq, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vcagt");
+  }
   case ARM::BI__builtin_neon_vcls_v:
   case ARM::BI__builtin_neon_vclsq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vcls");
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcls, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vcls");
   }
   case ARM::BI__builtin_neon_vclz_v:
   case ARM::BI__builtin_neon_vclzq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vclz");
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vclz, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vclz");
   }
   case ARM::BI__builtin_neon_vcnt_v:
   case ARM::BI__builtin_neon_vcntq_v: {
-    Value *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 1, "vcnt");
+    Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vcnt, &Ty, 1);
+    return EmitNeonCall(F, Ops, "vcnt");
   }
   // FIXME: intrinsics for f16<->f32 convert missing from ARM target.
   case ARM::BI__builtin_neon_vcvt_f32_v:
   case ARM::BI__builtin_neon_vcvtq_f32_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ty = GetNeonType(VMContext, 4, quad);
     return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt") 
                 : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
   }
@@ -1021,22 +1025,44 @@
   case ARM::BI__builtin_neon_vcvt_u32_v:
   case ARM::BI__builtin_neon_vcvtq_s32_v:
   case ARM::BI__builtin_neon_vcvtq_u32_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(VMContext, 4, quad));
     return usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt") 
                 : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
   }
+  // FIXME: these intrinsics often do not work due to the fragility of bitcasts
+  // coming and going during codegen.
   case ARM::BI__builtin_neon_vcvt_n_f32_v:
   case ARM::BI__builtin_neon_vcvtq_n_f32_v: {
+    const llvm::Type *Tys[2] = { GetNeonType(VMContext, 4, quad), Ty };
     Int = usgn ? Intrinsic::arm_neon_vcvtfxu2fp : Intrinsic::arm_neon_vcvtfxs2fp;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vcvt_n");
+    Function *F = CGM.getIntrinsic(Int, Tys, 2);
+    return EmitNeonCall(F, Ops, "vcvt_n");
   }
   case ARM::BI__builtin_neon_vcvt_n_s32_v:
   case ARM::BI__builtin_neon_vcvt_n_u32_v:
   case ARM::BI__builtin_neon_vcvtq_n_s32_v:
   case ARM::BI__builtin_neon_vcvtq_n_u32_v: {
+    const llvm::Type *Tys[2] = { Ty, GetNeonType(VMContext, 4, quad) };
     Int = usgn ? Intrinsic::arm_neon_vcvtfp2fxu : Intrinsic::arm_neon_vcvtfp2fxs;
-    Value *F = CGM.getIntrinsic(Int, &Ty, 1);
-    return Builder.CreateCall(F, &Ops[0], &Ops[0] + 2, "vcvt_n");
+    Function *F = CGM.getIntrinsic(Int, Tys, 2);
+    return EmitNeonCall(F, Ops, "vcvt_n");
+  }
+  case ARM::BI__builtin_neon_vext_v:
+  case ARM::BI__builtin_neon_vextq_v: {
+    ConstantInt *C = dyn_cast<ConstantInt>(Ops[2]);
+    int CV = C->getSExtValue();
+    
+    SmallVector<Constant*, 8> Indices;
+
+    const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext);
+    for (unsigned i = 0, e = cast<llvm::VectorType>(Ty)->getNumElements();
+         i != e; ++i)
+      Indices.push_back(ConstantInt::get(I32Ty, i+CV));
+    
+    Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+    Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
+    return Builder.CreateShuffleVector(Ops[0], Ops[1], SV);
   }
   }
 }

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=105599&r1=105598&r2=105599&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Tue Jun  8 01:03:01 2010
@@ -1146,6 +1146,10 @@
   llvm::Value *EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 
   llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitNeonCall(llvm::Function *F, 
+                            llvm::SmallVectorImpl<llvm::Value*> &O,
+                            const char *name);
+  
   llvm::Value *EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
 





More information about the cfe-commits mailing list