[cfe-commits] r106407 - in /cfe/trunk: include/clang/Basic/arm_neon.td lib/CodeGen/CGBuiltin.cpp
Nate Begeman
natebegeman at mac.com
Sun Jun 20 16:05:28 PDT 2010
Author: sampo
Date: Sun Jun 20 18:05:28 2010
New Revision: 106407
URL: http://llvm.org/viewvc/llvm-project?rev=106407&view=rev
Log:
Implement remaining codegen for NEON, all operations should now work.
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=106407&r1=106406&r2=106407&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Sun Jun 20 18:05:28 2010
@@ -200,7 +200,7 @@
////////////////////////////////////////////////////////////////////////////////
// E.3.14 Loads and stores of a single vector
def VLD1 : WInst<"dc", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
-def VLD1_LANE : WInst<"dci", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
+def VLD1_LANE : WInst<"dcdi", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
def VLD1_DUP : WInst<"dc", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
def VST1 : WInst<"vpd", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
def VST1_LANE : WInst<"vpdi", "QUcQUsQUiQUlQcQsQiQlQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
@@ -213,9 +213,9 @@
def VLD2_DUP : WInst<"2c", "UcUsUiUlcsilhfPcPs">;
def VLD3_DUP : WInst<"3c", "UcUsUiUlcsilhfPcPs">;
def VLD4_DUP : WInst<"4c", "UcUsUiUlcsilhfPcPs">;
-def VLD2_LANE : WInst<"2ci", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">;
-def VLD3_LANE : WInst<"3ci", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">;
-def VLD4_LANE : WInst<"4ci", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">;
+def VLD2_LANE : WInst<"2c2i", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">;
+def VLD3_LANE : WInst<"3c3i", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">;
+def VLD4_LANE : WInst<"4c4i", "QUsQUiQsQiQhQfQPsUcUsUicsihfPcPs">;
def VST2 : WInst<"vp2", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
def VST3 : WInst<"vp3", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
def VST4 : WInst<"vp4", "QUcQUsQUiQcQsQiQhQfQPcQPsUcUsUiUlcsilhfPcPs">;
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=106407&r1=106406&r2=106407&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Sun Jun 20 18:05:28 2010
@@ -948,17 +948,17 @@
}
}
-const llvm::Type *GetNeonType(LLVMContext &Ctx, unsigned type, bool q) {
+const llvm::VectorType *GetNeonType(LLVMContext &C, unsigned type, bool q) {
switch (type) {
default: break;
case 0:
- case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(Ctx), 8 << (int)q);
+ case 5: return llvm::VectorType::get(llvm::Type::getInt8Ty(C), 8 << (int)q);
case 6:
case 7:
- case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(Ctx), 4 << (int)q);
- case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(Ctx), 2 << (int)q);
- case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(Ctx), 1 << (int)q);
- case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(Ctx), 2 << (int)q);
+ case 1: return llvm::VectorType::get(llvm::Type::getInt16Ty(C),4 << (int)q);
+ case 2: return llvm::VectorType::get(llvm::Type::getInt32Ty(C),2 << (int)q);
+ case 3: return llvm::VectorType::get(llvm::Type::getInt64Ty(C),1 << (int)q);
+ case 4: return llvm::VectorType::get(llvm::Type::getFloatTy(C),2 << (int)q);
};
return 0;
}
@@ -1030,7 +1030,8 @@
bool poly = (type & 0x7) == 5 || (type & 0x7) == 6;
bool splat = false;
- const llvm::Type *Ty = GetNeonType(VMContext, type & 0x7, quad);
+ const llvm::VectorType *VTy = GetNeonType(VMContext, type & 0x7, quad);
+ const llvm::Type *Ty = VTy;
if (!Ty)
return 0;
@@ -1142,8 +1143,7 @@
const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext);
SmallVector<Constant*, 16> Indices;
- for (unsigned i = 0, e = cast<llvm::VectorType>(Ty)->getNumElements();
- i != e; ++i)
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
Indices.push_back(ConstantInt::get(I32Ty, i+CV));
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
@@ -1171,7 +1171,122 @@
case ARM::BI__builtin_neon_vhsubq_v:
Int = usgn ? Intrinsic::arm_neon_vhsubu : Intrinsic::arm_neon_vhsubs;
return EmitNeonCall(CGM.getIntrinsic(Int, &Ty, 1), Ops, "vhsub");
- // FIXME: vld*
+ case ARM::BI__builtin_neon_vld1_v:
+ case ARM::BI__builtin_neon_vld1q_v:
+ return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, &Ty, 1),
+ Ops, "vld1");
+ case ARM::BI__builtin_neon_vld1_lane_v:
+ case ARM::BI__builtin_neon_vld1q_lane_v:
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateLoad(Ops[0]);
+ return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
+ case ARM::BI__builtin_neon_vld1_dup_v:
+ case ARM::BI__builtin_neon_vld1q_dup_v: {
+ Value *V = UndefValue::get(Ty);
+ Ty = llvm::PointerType::getUnqual(VTy->getElementType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateLoad(Ops[0]);
+ llvm::Constant *CI = ConstantInt::get(llvm::Type::getInt32Ty(VMContext), 0);
+ Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
+ return EmitNeonSplat(Ops[0], CI);
+ }
+ case ARM::BI__builtin_neon_vld2_v:
+ case ARM::BI__builtin_neon_vld2q_v: {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2, &Ty, 1);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case ARM::BI__builtin_neon_vld3_v:
+ case ARM::BI__builtin_neon_vld3q_v: {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3, &Ty, 1);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case ARM::BI__builtin_neon_vld4_v:
+ case ARM::BI__builtin_neon_vld4q_v: {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4, &Ty, 1);
+ Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case ARM::BI__builtin_neon_vld2_lane_v:
+ case ARM::BI__builtin_neon_vld2q_lane_v: {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld2lane, &Ty, 1);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+ Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld2_lane");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case ARM::BI__builtin_neon_vld3_lane_v:
+ case ARM::BI__builtin_neon_vld3q_lane_v: {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld3lane, &Ty, 1);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+ Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
+ Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case ARM::BI__builtin_neon_vld4_lane_v:
+ case ARM::BI__builtin_neon_vld4q_lane_v: {
+ Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld4lane, &Ty, 1);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
+ Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
+ Ops[5] = Builder.CreateBitCast(Ops[5], Ty);
+ Ops[1] = Builder.CreateCall(F, Ops.begin() + 1, Ops.end(), "vld3_lane");
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
+ case ARM::BI__builtin_neon_vld2_dup_v:
+ case ARM::BI__builtin_neon_vld3_dup_v:
+ case ARM::BI__builtin_neon_vld4_dup_v: {
+ switch (BuiltinID) {
+ case ARM::BI__builtin_neon_vld2_dup_v:
+ Int = Intrinsic::arm_neon_vld2lane;
+ break;
+ case ARM::BI__builtin_neon_vld3_dup_v:
+ Int = Intrinsic::arm_neon_vld2lane;
+ break;
+ case ARM::BI__builtin_neon_vld4_dup_v:
+ Int = Intrinsic::arm_neon_vld2lane;
+ break;
+ default: assert(0 && "unknown vld_dup intrinsic?");
+ }
+ Function *F = CGM.getIntrinsic(Int, &Ty, 1);
+ const llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
+
+ SmallVector<Value*, 6> Args;
+ Args.push_back(Ops[1]);
+ Args.append(STy->getNumElements(), UndefValue::get(Ty));
+
+ llvm::Constant *CI = ConstantInt::get(llvm::Type::getInt32Ty(VMContext), 0);
+ Args.push_back(CI);
+
+ Ops[1] = Builder.CreateCall(F, Args.begin(), Args.end(), "vld_dup");
+ // splat lane 0 to all elts in each vector of the result.
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ Value *Val = Builder.CreateExtractValue(Ops[1], i);
+ Value *Elt = Builder.CreateBitCast(Val, Ty);
+ Elt = EmitNeonSplat(Elt, CI);
+ Elt = Builder.CreateBitCast(Elt, Val->getType());
+ Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
+ }
+ Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ return Builder.CreateStore(Ops[1], Ops[0]);
+ }
case ARM::BI__builtin_neon_vmax_v:
case ARM::BI__builtin_neon_vmaxq_v:
Int = usgn ? Intrinsic::arm_neon_vmaxu : Intrinsic::arm_neon_vmaxs;
@@ -1474,50 +1589,67 @@
ConstantAggregateZero::get(Ty));
return Builder.CreateSExt(Ops[0], Ty, "vtst");
}
- // FIXME: transpose/zip/unzip don't currently match patterns for
- // the non-q variants, but emitting 2 shufflevectors seems like a hack.
case ARM::BI__builtin_neon_vtrn_v:
case ARM::BI__builtin_neon_vtrnq_v: {
- const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext);
- SmallVector<Constant*, 32> Indices;
- unsigned nElts = cast<llvm::VectorType>(Ty)->getNumElements();
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ty = llvm::Type::getInt32Ty(VMContext);
+ Value *SV;
+
for (unsigned vi = 0; vi != 2; ++vi) {
- for (unsigned i = 0; i != nElts; i += 2) {
- Indices.push_back(ConstantInt::get(I32Ty, i+vi));
- Indices.push_back(ConstantInt::get(I32Ty, i+nElts+vi));
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+ Indices.push_back(ConstantInt::get(Ty, i+vi));
+ Indices.push_back(ConstantInt::get(Ty, i+e+vi));
}
+ Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+ SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vtrn");
+ SV = Builder.CreateStore(SV, Addr);
}
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
- Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
- return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vtrn");
+ return SV;
}
case ARM::BI__builtin_neon_vuzp_v:
case ARM::BI__builtin_neon_vuzpq_v: {
- const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext);
- SmallVector<Constant*, 32> Indices;
- unsigned nElts = cast<llvm::VectorType>(Ty)->getNumElements();
- for (unsigned vi = 0; vi != 2; ++vi)
- for (unsigned i = 0; i != nElts; ++i)
- Indices.push_back(ConstantInt::get(I32Ty, 2*i+vi));
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
- return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vuzp");
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ty = llvm::Type::getInt32Ty(VMContext);
+ Value *SV;
+
+ for (unsigned vi = 0; vi != 2; ++vi) {
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
+ Indices.push_back(ConstantInt::get(Ty, 2*i+vi));
+
+ Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+ SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vuzp");
+ SV = Builder.CreateStore(SV, Addr);
+ }
+ return SV;
}
case ARM::BI__builtin_neon_vzip_v:
case ARM::BI__builtin_neon_vzipq_v: {
- const llvm::Type *I32Ty = llvm::Type::getInt32Ty(VMContext);
- SmallVector<Constant*, 32> Indices;
- unsigned nElts = cast<llvm::VectorType>(Ty)->getNumElements();
- for (unsigned i = 0; i != nElts; ++i) {
- Indices.push_back(ConstantInt::get(I32Ty, i));
- Indices.push_back(ConstantInt::get(I32Ty, i+nElts));
- }
- Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
- Value* SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
- return Builder.CreateShuffleVector(Ops[0], Ops[1], SV, "vzip");
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ty = llvm::Type::getInt32Ty(VMContext);
+ Value *SV;
+
+ for (unsigned vi = 0; vi != 2; ++vi) {
+ SmallVector<Constant*, 16> Indices;
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
+ Indices.push_back(ConstantInt::get(Ty, (i >> 1)));
+ Indices.push_back(ConstantInt::get(Ty, (i >> 1)+e));
+ }
+ Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ops[0], vi);
+ SV = llvm::ConstantVector::get(Indices.begin(), Indices.size());
+ SV = Builder.CreateShuffleVector(Ops[1], Ops[2], SV, "vzip");
+ SV = Builder.CreateStore(SV, Addr);
+ }
+ return SV;
}
}
}
More information about the cfe-commits
mailing list