r189463 - ARM: Emit normal IR for vaddhn/vsubhn NEON intrinsics
Tim Northover
tnorthover at apple.com
Wed Aug 28 02:46:37 PDT 2013
Author: tnorthover
Date: Wed Aug 28 04:46:37 2013
New Revision: 189463
URL: http://llvm.org/viewvc/llvm-project?rev=189463&view=rev
Log:
ARM: Emit normal IR for vaddhn/vsubhn NEON intrinsics
These operations "vector add high-half narrow" actually correspond to the
sequence:
%sum = add <4 x i32> %lhs, %rhs
%high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
%res = trunc <4 x i32> %high to <4 x i16>
Now that LLVM can spot this, Clang should emit the corresponding LLVM IR.
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=189463&r1=189462&r2=189463&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Aug 28 04:46:37 2013
@@ -2202,9 +2202,24 @@ Value *CodeGenFunction::EmitARMBuiltinEx
case ARM::BI__builtin_neon_vabsq_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vabs, Ty),
Ops, "vabs");
- case ARM::BI__builtin_neon_vaddhn_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vaddhn, Ty),
- Ops, "vaddhn");
+ case ARM::BI__builtin_neon_vaddhn_v: {
+ llvm::VectorType *SrcTy =
+ llvm::VectorType::getExtendedElementVectorType(VTy);
+
+ // %sum = add <4 x i32> %lhs, %rhs
+ Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
+ Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
+
+ // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+ Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
+ SrcTy->getScalarSizeInBits() / 2);
+ ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+ Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
+
+ // %res = trunc <4 x i32> %high to <4 x i16>
+ return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
+ }
case ARM::BI__builtin_neon_vcale_v:
std::swap(Ops[0], Ops[1]);
case ARM::BI__builtin_neon_vcage_v: {
@@ -2782,9 +2797,24 @@ Value *CodeGenFunction::EmitARMBuiltinEx
Ops.push_back(Align);
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst4lane, Ty),
Ops, "");
- case ARM::BI__builtin_neon_vsubhn_v:
- return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vsubhn, Ty),
- Ops, "vsubhn");
+ case ARM::BI__builtin_neon_vsubhn_v: {
+ llvm::VectorType *SrcTy =
+ llvm::VectorType::getExtendedElementVectorType(VTy);
+
+ // %sum = add <4 x i32> %lhs, %rhs
+ Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
+ Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
+
+ // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
+ Constant *ShiftAmt = ConstantInt::get(SrcTy->getElementType(),
+ SrcTy->getScalarSizeInBits() / 2);
+ ShiftAmt = ConstantVector::getSplat(VTy->getNumElements(), ShiftAmt);
+ Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
+
+ // %res = trunc <4 x i32> %high to <4 x i16>
+ return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
+ }
case ARM::BI__builtin_neon_vtbl1_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
Ops, "vtbl1");
More information about the cfe-commits
mailing list