r200471 - ARM & AArch64: fully share NEON implementation of permutation intrinsics

Tim Northover tnorthover at apple.com
Thu Jan 30 06:47:58 PST 2014


Author: tnorthover
Date: Thu Jan 30 08:47:57 2014
New Revision: 200471

URL: http://llvm.org/viewvc/llvm-project?rev=200471&view=rev
Log:
ARM & AArch64: fully share NEON implementation of permutation intrinsics

As a starting point, this moves the CodeGen for NEON permutation
instructions (vtrn, vzip, vuzp) into a new shared function.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/lib/CodeGen/CodeGenFunction.h

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=200471&r1=200470&r2=200471&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jan 30 08:47:57 2014
@@ -1754,6 +1754,76 @@ CodeGenFunction::EmitPointerWithAlignmen
   return std::make_pair(EmitScalarExpr(Addr), Align);
 }
 
+Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(unsigned BuiltinID,
+                                                  const CallExpr *E,
+                                                  SmallVectorImpl<Value *> &Ops,
+                                                  llvm::VectorType *VTy) {
+  switch (BuiltinID) {
+  default: break;
+  case NEON::BI__builtin_neon_vtrn_v:
+  case NEON::BI__builtin_neon_vtrnq_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+    Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+    Value *SV = 0;
+
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      SmallVector<Constant*, 16> Indices;
+      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+        Indices.push_back(Builder.getInt32(i+vi));
+        Indices.push_back(Builder.getInt32(i+e+vi));
+      }
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      SV = llvm::ConstantVector::get(Indices);
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+      SV = Builder.CreateStore(SV, Addr);
+    }
+    return SV;
+  }
+  case NEON::BI__builtin_neon_vuzp_v:
+  case NEON::BI__builtin_neon_vuzpq_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+    Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+    Value *SV = 0;
+
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      SmallVector<Constant*, 16> Indices;
+      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
+        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
+
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      SV = llvm::ConstantVector::get(Indices);
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+      SV = Builder.CreateStore(SV, Addr);
+    }
+    return SV;
+  }
+  case NEON::BI__builtin_neon_vzip_v:
+  case NEON::BI__builtin_neon_vzipq_v: {
+    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
+    Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+    Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+    Value *SV = 0;
+
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      SmallVector<Constant*, 16> Indices;
+      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
+        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
+      }
+      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+      SV = llvm::ConstantVector::get(Indices);
+      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+      SV = Builder.CreateStore(SV, Addr);
+    }
+    return SV;
+  }
+  }
+
+  return 0;
+}
+
 static Value *EmitAArch64ScalarBuiltinExpr(CodeGenFunction &CGF,
                                            unsigned BuiltinID,
                                            const CallExpr *E) {
@@ -2981,6 +3051,11 @@ Value *CodeGenFunction::EmitAArch64Built
   if (!Ty)
     return 0;
 
+  // Many NEON builtins have identical semantics and uses in ARM and
+  // AArch64. Emit these in a single function.
+  if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, VTy))
+    return Result;
+
   unsigned Int;
   switch (BuiltinID) {
   default:
@@ -2989,18 +3064,6 @@ Value *CodeGenFunction::EmitAArch64Built
   // AArch64 builtins mapping to legacy ARM v7 builtins.
   // FIXME: the mapped builtins listed correspond to what has been tested
   // in aarch64-neon-intrinsics.c so far.
-  case NEON::BI__builtin_neon_vuzp_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vuzp_v, E);
-  case NEON::BI__builtin_neon_vuzpq_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vuzpq_v, E);
-  case NEON::BI__builtin_neon_vzip_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vzip_v, E);
-  case NEON::BI__builtin_neon_vzipq_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vzipq_v, E);
-  case NEON::BI__builtin_neon_vtrn_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vtrn_v, E);
-  case NEON::BI__builtin_neon_vtrnq_v:
-    return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vtrnq_v, E);
   case NEON::BI__builtin_neon_vext_v:
     return EmitARMBuiltinExpr(NEON::BI__builtin_neon_vext_v, E);
   case NEON::BI__builtin_neon_vextq_v:
@@ -4213,6 +4276,11 @@ Value *CodeGenFunction::EmitARMBuiltinEx
   if (!Ty)
     return 0;
 
+  // Many NEON builtins have identical semantics and uses in ARM and
+  // AArch64. Emit these in a single function.
+  if (Value *Result = EmitCommonNeonBuiltinExpr(BuiltinID, E, Ops, VTy))
+    return Result;
+
   unsigned Int;
   switch (BuiltinID) {
   default: return 0;
@@ -4869,65 +4937,6 @@ Value *CodeGenFunction::EmitARMBuiltinEx
                                 ConstantAggregateZero::get(Ty));
     return Builder.CreateSExt(Ops[0], Ty, "vtst");
   }
-  case NEON::BI__builtin_neon_vtrn_v:
-  case NEON::BI__builtin_neon_vtrnq_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
-
-    for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
-      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(Builder.getInt32(i+vi));
-        Indices.push_back(Builder.getInt32(i+e+vi));
-      }
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
-      SV = Builder.CreateStore(SV, Addr);
-    }
-    return SV;
-  }
-  case NEON::BI__builtin_neon_vuzp_v:
-  case NEON::BI__builtin_neon_vuzpq_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
-
-    for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
-      for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
-        Indices.push_back(ConstantInt::get(Int32Ty, 2*i+vi));
-
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
-      SV = Builder.CreateStore(SV, Addr);
-    }
-    return SV;
-  }
-  case NEON::BI__builtin_neon_vzip_v:
-  case NEON::BI__builtin_neon_vzipq_v: {
-    Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
-    Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
-    Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
-    Value *SV = 0;
-
-    for (unsigned vi = 0; vi != 2; ++vi) {
-      SmallVector<Constant*, 16> Indices;
-      for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
-        Indices.push_back(ConstantInt::get(Int32Ty, (i + vi*e) >> 1));
-        Indices.push_back(ConstantInt::get(Int32Ty, ((i + vi*e) >> 1)+e));
-      }
-      Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
-      SV = llvm::ConstantVector::get(Indices);
-      SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
-      SV = Builder.CreateStore(SV, Addr);
-    }
-    return SV;
-  }
   }
 }
 

Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=200471&r1=200470&r2=200471&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Thu Jan 30 08:47:57 2014
@@ -2177,6 +2177,9 @@ public:
   llvm::Value *EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty);
   llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E);
   llvm::Value *EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E);
+  llvm::Value *EmitCommonNeonBuiltinExpr(unsigned BuiltinID, const CallExpr *E,
+                                         SmallVectorImpl<llvm::Value *> &Ops,
+                                         llvm::VectorType *VTy);
   llvm::Value *EmitNeonCall(llvm::Function *F,
                             SmallVectorImpl<llvm::Value*> &O,
                             const char *name,





More information about the cfe-commits mailing list