r194889 - Implemented aarch64 Neon scalar vmulx_lane intrinsics
Ana Pazos
apazos at codeaurora.org
Fri Nov 15 15:33:32 PST 2013
Author: apazos
Date: Fri Nov 15 17:33:31 2013
New Revision: 194889
URL: http://llvm.org/viewvc/llvm-project?rev=194889&view=rev
Log:
Implemented aarch64 Neon scalar vmulx_lane intrinsics
Implemented aarch64 Neon scalar vfma_lane intrinsics
Implemented aarch64 Neon scalar vfms_lane intrinsics
Implemented legacy vmul_n_f64, vmul_lane_f64, vmul_laneq_f64
intrinsics (v1f64 parameter type) using Neon scalar instructions.
Implemented legacy vfma_lane_f64, vfms_lane_f64,
vfma_laneq_f64, vfms_laneq_f64 intrinsics (v1f64 parameter type)
using Neon scalar instructions.
Added:
cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
cfe/trunk/utils/TableGen/NeonEmitter.cpp
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=194889&r1=194888&r2=194889&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Fri Nov 15 17:33:31 2013
@@ -113,6 +113,12 @@ def OP_MOVL_HI : Op;
def OP_COPY_LN : Op;
def OP_COPYQ_LN : Op;
def OP_COPY_LNQ : Op;
+def OP_SCALAR_MUL_LN : Op;
+def OP_SCALAR_MUL_LNQ : Op;
+def OP_SCALAR_MULX_LN : Op;
+def OP_SCALAR_MULX_LNQ : Op;
+def OP_SCALAR_VMULX_LN : Op;
+def OP_SCALAR_VMULX_LNQ : Op;
class Inst <string n, string p, string t, Op o> {
string Name = n;
@@ -845,10 +851,12 @@ def VQDMLSL_HIGH_LANEQ : SOpInst<"vqdml
OP_QDMLSLHi_LN>;
// Newly add double parameter for vmul_lane in aarch64
-def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "dQd", OP_MUL_LN>;
+// Note: d type is handled by SCALAR_VMUL_LANE
+def VMUL_LANE_A64 : IOpInst<"vmul_lane", "ddgi", "Qd", OP_MUL_LN>;
+// Note: d type is handled by SCALAR_VMUL_LANEQ
def VMUL_LANEQ : IOpInst<"vmul_laneq", "ddji",
- "sifdUsUiQsQiQfQUsQUiQfQd", OP_MUL_LN>;
+ "sifUsUiQsQiQfQUsQUiQfQd", OP_MUL_LN>;
def VMULL_LANEQ : SOpInst<"vmull_laneq", "wdki", "siUsUi", OP_MULL_LN>;
def VMULL_HIGH_LANE : SOpInst<"vmull_high_lane", "wkdi", "siUsUi",
OP_MULLHi_LN>;
@@ -864,8 +872,10 @@ def VQDMULL_HIGH_LANEQ : SOpInst<"vqdmu
def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>;
def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>;
-def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fdQfQd", OP_MULX_LN>;
-def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fdQfQd", OP_MULX_LN>;
+// Note: d type implemented by SCALAR_VMULX_LANE
+def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>;
+// Note: d type is implemented by SCALAR_VMULX_LANEQ
+def VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "fQfQd", OP_MULX_LN>;
////////////////////////////////////////////////////////////////////////////////
// Across vectors class
@@ -1145,4 +1155,34 @@ def SCALAR_SQXTN : SInst<"vqmovn", "zs",
////////////////////////////////////////////////////////////////////////////////
// Scalar Unsigned Saturating Extract Narrow
def SCALAR_UQXTN : SInst<"vqmovn", "zs", "SUsSUiSUl">;
+
+// Scalar Floating Point multiply (scalar, by element)
+def SCALAR_FMUL_LANE : IOpInst<"vmul_lane", "ssdi", "SfSd", OP_SCALAR_MUL_LN>;
+def SCALAR_FMUL_LANEQ : IOpInst<"vmul_laneq", "ssji", "SfSd", OP_SCALAR_MUL_LNQ>;
+
+// Scalar Floating Point multiply extended (scalar, by element)
+def SCALAR_FMULX_LANE : IOpInst<"vmulx_lane", "ssdi", "SfSd", OP_SCALAR_MULX_LN>;
+def SCALAR_FMULX_LANEQ : IOpInst<"vmulx_laneq", "ssji", "SfSd", OP_SCALAR_MULX_LNQ>;
+
+def SCALAR_VMUL_N : IInst<"vmul_n", "dds", "d">;
+
+// VMUL_LANE_A64 d type implemented using scalar mul lane
+def SCALAR_VMUL_LANE : IInst<"vmul_lane", "ddgi", "d">;
+
+// VMUL_LANEQ d type implemented using scalar mul lane
+def SCALAR_VMUL_LANEQ : IInst<"vmul_laneq", "ddji", "d">;
+
+// VMULX_LANE d type implemented using scalar vmulx_lane
+def SCALAR_VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "d", OP_SCALAR_VMULX_LN>;
+
+// VMULX_LANEQ d type implemented using scalar vmulx_laneq
+def SCALAR_VMULX_LANEQ : IOpInst<"vmulx_laneq", "ddji", "d", OP_SCALAR_VMULX_LNQ>;
+
+// Scalar Floating Point fused multiply-add (scalar, by element)
+def SCALAR_FMLA_LANE : IInst<"vfma_lane", "sssdi", "SfSd">;
+def SCALAR_FMLA_LANEQ : IInst<"vfma_laneq", "sssji", "SfSd">;
+
+// Scalar Floating Point fused multiply-subtract (scalar, by element)
+def SCALAR_FMLS_LANE : IOpInst<"vfms_lane", "sssdi", "SfSd", OP_FMS_LN>;
+def SCALAR_FMLS_LANEQ : IOpInst<"vfms_laneq", "sssji", "SfSd", OP_FMS_LNQ>;
}
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=194889&r1=194888&r2=194889&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Nov 15 17:33:31 2013
@@ -1772,6 +1772,37 @@ static Value *EmitAArch64ScalarBuiltinEx
// argument that specifies the vector type, need to handle each case.
switch (BuiltinID) {
default: break;
+ case AArch64::BI__builtin_neon_vfmas_lane_f32:
+ case AArch64::BI__builtin_neon_vfmas_laneq_f32:
+ case AArch64::BI__builtin_neon_vfmad_lane_f64:
+ case AArch64::BI__builtin_neon_vfmad_laneq_f64: {
+ bool Quad = false;
+ if (BuiltinID == AArch64::BI__builtin_neon_vfmas_laneq_f32 ||
+ BuiltinID == AArch64::BI__builtin_neon_vfmad_laneq_f64)
+ Quad = true;
+ llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
+ Value *F = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
+ // extract lane acc += x * v[i]
+ Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+ return CGF.Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ }
+ // Scalar Floating-point Multiply Extended
+ case AArch64::BI__builtin_neon_vmulxs_f32:
+ case AArch64::BI__builtin_neon_vmulxd_f64: {
+ Int = Intrinsic::aarch64_neon_vmulx;
+ llvm::Type *Ty = CGF.ConvertType(E->getCallReturnType());
+ return CGF.EmitNeonCall(CGF.CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
+ }
+ case AArch64::BI__builtin_neon_vmul_n_f64: {
+ // v1f64 vmul_n_f64 should be mapped to Neon scalar mul lane
+ llvm::Type *VTy = GetNeonType(&CGF,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, false));
+ Ops[0] = CGF.Builder.CreateBitCast(Ops[0], VTy);
+ llvm::Value *Idx = llvm::ConstantInt::get(CGF.Int32Ty, 0);
+ Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], Idx, "extract");
+ Value *Result = CGF.Builder.CreateFMul(Ops[0], Ops[1]);
+ return CGF.Builder.CreateBitCast(Result, VTy);
+ }
case AArch64::BI__builtin_neon_vget_lane_i8:
case AArch64::BI__builtin_neon_vget_lane_i16:
case AArch64::BI__builtin_neon_vget_lane_i32:
@@ -2006,11 +2037,6 @@ static Value *EmitAArch64ScalarBuiltinEx
case AArch64::BI__builtin_neon_vqrdmulhs_s32:
Int = Intrinsic::arm_neon_vqrdmulh;
s = "vqrdmulh"; OverloadInt = true; break;
- // Scalar Floating-point Multiply Extended
- case AArch64::BI__builtin_neon_vmulxs_f32:
- case AArch64::BI__builtin_neon_vmulxd_f64:
- Int = Intrinsic::aarch64_neon_vmulx;
- s = "vmulx"; OverloadInt = true; break;
// Scalar Floating-point Reciprocal Step and
case AArch64::BI__builtin_neon_vrecpss_f32:
case AArch64::BI__builtin_neon_vrecpsd_f64:
@@ -3094,9 +3120,22 @@ Value *CodeGenFunction::EmitAArch64Built
case AArch64::BI__builtin_neon_vsha256su1q_v:
return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha256su1, Ty),
Ops, "sha256su1");
+ case AArch64::BI__builtin_neon_vmul_lane_v:
+ case AArch64::BI__builtin_neon_vmul_laneq_v: {
+ // v1f64 vmul_lane should be mapped to Neon scalar mul lane
+ bool Quad = false;
+ if (BuiltinID == AArch64::BI__builtin_neon_vmul_laneq_v)
+ Quad = true;
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ llvm::Type *VTy = GetNeonType(this,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, Quad ? true : false));
+ Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+ Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
+ return Builder.CreateBitCast(Result, Ty);
+ }
// AArch64-only builtins
- case AArch64::BI__builtin_neon_vfma_lane_v:
case AArch64::BI__builtin_neon_vfmaq_laneq_v: {
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
@@ -3121,12 +3160,46 @@ Value *CodeGenFunction::EmitAArch64Built
return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
}
- case AArch64::BI__builtin_neon_vfma_laneq_v: {
+ case AArch64::BI__builtin_neon_vfma_lane_v: {
+ llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+ // v1f64 fma should be mapped to Neon scalar f64 fma
+ if (VTy && VTy->getElementType() == DoubleTy) {
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
+ llvm::Type *VTy = GetNeonType(this,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, false));
+ Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+ Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
+ Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ return Builder.CreateBitCast(Result, Ty);
+ }
Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+ Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
+ Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
+ return Builder.CreateCall3(F, Ops[2], Ops[1], Ops[0]);
+ }
+ case AArch64::BI__builtin_neon_vfma_laneq_v: {
llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
+ // v1f64 fma should be mapped to Neon scalar f64 fma
+ if (VTy && VTy->getElementType() == DoubleTy) {
+ Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
+ Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
+ llvm::Type *VTy = GetNeonType(this,
+ NeonTypeFlags(NeonTypeFlags::Float64, false, true));
+ Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
+ Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
+ Value *Result = Builder.CreateCall3(F, Ops[1], Ops[2], Ops[0]);
+ return Builder.CreateBitCast(Result, Ty);
+ }
+ Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
+ Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
+ Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
+
llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
VTy->getNumElements() * 2);
Ops[2] = Builder.CreateBitCast(Ops[2], STy);
Modified: cfe/trunk/test/CodeGen/aarch64-neon-2velem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-2velem.c?rev=194889&r1=194888&r2=194889&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-2velem.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-neon-2velem.c Fri Nov 15 17:33:31 2013
@@ -722,6 +722,14 @@ float32x2_t test_vmul_lane_f32(float32x2
// CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
}
+
+float64x1_t test_vmul_lane_f64(float64x1_t a, float64x1_t v) {
+ // CHECK: test_vmul_lane_f64
+ return vmul_lane_f64(a, v, 0);
+ // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+
float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t v) {
// CHECK: test_vmulq_lane_f32
return vmulq_lane_f32(a, v, 1);
@@ -740,6 +748,13 @@ float32x2_t test_vmul_laneq_f32(float32x
// CHECK: fmul {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
}
+float64x1_t test_vmul_laneq_f64_0(float64x1_t a, float64x2_t v) {
+ // CHECK: test_vmul_laneq_f64_0
+ return vmul_laneq_f64(a, v, 0);
+ // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+
float32x4_t test_vmulq_laneq_f32(float32x4_t a, float32x4_t v) {
// CHECK: test_vmulq_laneq_f32
return vmulq_laneq_f32(a, v, 1);
Added: cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c?rev=194889&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c (added)
+++ cfe/trunk/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c Fri Nov 15 17:33:31 2013
@@ -0,0 +1,131 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon \
+// RUN: -ffp-contract=fast -S -O3 -o - %s | FileCheck %s
+
+// Test new aarch64 intrinsics and types
+
+#include <arm_neon.h>
+
+
+float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) {
+ // CHECK: test_vmuls_lane_f32
+ return vmuls_lane_f32(a, b, 1);
+ // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) {
+ // CHECK: test_vmuld_lane_f64
+ return vmuld_lane_f64(a, b, 0);
+ // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) {
+ // CHECK: test_vmuls_laneq_f32
+ return vmuls_laneq_f32(a, b, 3);
+ // CHECK: fmul {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) {
+ // CHECK: test_vmuld_laneq_f64
+ return vmuld_laneq_f64(a, b, 1);
+ // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+float64x1_t test_vmul_n_f64(float64x1_t a, float64_t b) {
+ // CHECK: test_vmul_n_f64
+ return vmul_n_f64(a, b);
+ // CHECK: fmul {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) {
+// CHECK: test_vmulxs_lane_f32
+ return vmulxs_lane_f32(a, b, 1);
+// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) {
+// CHECK: test_vmulxs_laneq_f32
+ return vmulxs_laneq_f32(a, b, 3);
+// CHECK: fmulx {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+}
+
+float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) {
+// CHECK: test_vmulxd_lane_f64
+ return vmulxd_lane_f64(a, b, 0);
+// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) {
+// CHECK: test_vmulxd_laneq_f64
+ return vmulxd_laneq_f64(a, b, 1);
+// CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK_AARCH64: test_vmulx_lane_f64
+float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) {
+ return vmulx_lane_f64(a, b, 0);
+ // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+
+// CHECK_AARCH64: test_vmulx_laneq_f64_0
+float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) {
+ return vmulx_laneq_f64(a, b, 0);
+ // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK_AARCH64: test_vmulx_laneq_f64_1
+float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) {
+ return vmulx_laneq_f64(a, b, 1);
+ // CHECK: fmulx {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+
+// CHECK_AARCH64: test_vfmas_lane_f32
+float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) {
+ return vfmas_lane_f32(a, b, c, 1);
+ // CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK_AARCH64: test_vfmad_lane_f64
+float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) {
+ return vfmad_lane_f64(a, b, c, 0);
+ // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK_AARCH64: test_vfmad_laneq_f64
+float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) {
+ return vfmad_laneq_f64(a, b, c, 1);
+ // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+}
+
+// CHECK_AARCH64: test_vfmss_lane_f32
+float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) {
+ return vfmss_lane_f32(a, b, c, 1);
+ // CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+}
+
+// CHECK_AARCH64: test_vfma_lane_f64
+float64x1_t test_vfma_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
+ return vfma_lane_f64(a, b, v, 0);
+ // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK_AARCH64: test_vfms_lane_f64
+float64x1_t test_vfms_lane_f64(float64x1_t a, float64x1_t b, float64x1_t v) {
+ return vfms_lane_f64(a, b, v, 0);
+ // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK_AARCH64: test_vfma_laneq_f64
+float64x1_t test_vfma_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
+ return vfma_laneq_f64(a, b, v, 0);
+ // CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
+// CHECK_AARCH64: test_vfms_laneq_f64
+float64x1_t test_vfms_laneq_f64(float64x1_t a, float64x1_t b, float64x2_t v) {
+ return vfms_laneq_f64(a, b, v, 0);
+ // CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[0]
+}
+
Modified: cfe/trunk/utils/TableGen/NeonEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/utils/TableGen/NeonEmitter.cpp?rev=194889&r1=194888&r2=194889&view=diff
==============================================================================
--- cfe/trunk/utils/TableGen/NeonEmitter.cpp (original)
+++ cfe/trunk/utils/TableGen/NeonEmitter.cpp Fri Nov 15 17:33:31 2013
@@ -134,7 +134,13 @@ enum OpKind {
OpMovlHi,
OpCopyLane,
OpCopyQLane,
- OpCopyLaneQ
+ OpCopyLaneQ,
+ OpScalarMulLane,
+ OpScalarMulLaneQ,
+ OpScalarMulXLane,
+ OpScalarMulXLaneQ,
+ OpScalarVMulXLane,
+ OpScalarVMulXLaneQ
};
enum ClassKind {
@@ -295,6 +301,12 @@ public:
OpMap["OP_COPY_LN"] = OpCopyLane;
OpMap["OP_COPYQ_LN"] = OpCopyQLane;
OpMap["OP_COPY_LNQ"] = OpCopyLaneQ;
+ OpMap["OP_SCALAR_MUL_LN"]= OpScalarMulLane;
+ OpMap["OP_SCALAR_MUL_LNQ"]= OpScalarMulLaneQ;
+ OpMap["OP_SCALAR_MULX_LN"]= OpScalarMulXLane;
+ OpMap["OP_SCALAR_MULX_LNQ"]= OpScalarMulXLaneQ;
+ OpMap["OP_SCALAR_VMULX_LN"]= OpScalarVMulXLane;
+ OpMap["OP_SCALAR_VMULX_LNQ"]= OpScalarVMulXLaneQ;
Record *SI = R.getClass("SInst");
Record *II = R.getClass("IInst");
@@ -2004,6 +2016,77 @@ static std::string GenOpString(const std
"(__c1, __d1); \\\n vset_lane_" + typeCode + "(__c2, __a1, __b1);";
break;
}
+ case OpScalarMulLane: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
+ "(__b, __c);\\\n __a * __d1;";
+ break;
+ }
+ case OpScalarMulLaneQ: {
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vgetq_lane_" + typeCode +
+ "(__b, __c);\\\n __a * __d1;";
+ break;
+ }
+ case OpScalarMulXLane: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" + typeCode +
+ "(__b, __c);\\\n vmulx" + type + "_" +
+ typeCode + "(__a, __d1);";
+ break;
+ }
+ case OpScalarMulXLaneQ: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vgetq_lane_" +
+ typeCode + "(__b, __c);\\\n vmulx" + type +
+ "_" + typeCode + "(__a, __d1);";
+ break;
+ }
+
+ case OpScalarVMulXLane: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" +
+ typeCode + "(__a, 0);\\\n" +
+ " " + TypeString('s', typestr) + " __e1 = vget_lane_" +
+ typeCode + "(__b, __c);\\\n" +
+ " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
+ typeCode + "(__d1, __e1);\\\n" +
+ " " + TypeString('d', typestr) + " __g1;\\\n" +
+ " vset_lane_" + typeCode + "(__f1, __g1, __c);";
+ break;
+ }
+
+ case OpScalarVMulXLaneQ: {
+ bool dummy = false;
+ char type = ClassifyType(typestr, dummy, dummy, dummy);
+ if (type == 'f') type = 's';
+ std::string typeCode = "";
+ InstructionTypeCode(typestr, ClassS, quad, typeCode);
+ s += TypeString('s', typestr) + " __d1 = vget_lane_" +
+ typeCode + "(__a, 0);\\\n" +
+ " " + TypeString('s', typestr) + " __e1 = vgetq_lane_" +
+ typeCode + "(__b, __c);\\\n" +
+ " " + TypeString('s', typestr) + " __f1 = vmulx" + type + "_" +
+ typeCode + "(__d1, __e1);\\\n" +
+ " " + TypeString('d', typestr) + " __g1;\\\n" +
+ " vset_lane_" + typeCode + "(__f1, __g1, 0);";
+ break;
+ }
+
default:
PrintFatalError("unknown OpKind!");
}
@@ -2972,8 +3055,8 @@ static std::string GenTest(const std::st
StringRef outTypeStr, StringRef inTypeStr,
bool isShift, bool isHiddenLOp,
ClassKind ck, const std::string &InstName,
- bool isA64,
- std::string & testFuncProto) {
+ bool isA64,
+ std::string & testFuncProto) {
assert(!proto.empty() && "");
std::string s;
More information about the cfe-commits
mailing list