r328038 - [AArch64] Add vmulxh_lane fp16 vector intrinsic
Abderrazek Zaafrani via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 20 13:37:31 PDT 2018
Author: az
Date: Tue Mar 20 13:37:31 2018
New Revision: 328038
URL: http://llvm.org/viewvc/llvm-project?rev=328038&view=rev
Log:
[AArch64] Add vmulxh_lane fp16 vector intrinsic
https://reviews.llvm.org/D44591
Modified:
cfe/trunk/include/clang/Basic/arm_neon.td
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=328038&r1=328037&r2=328038&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Tue Mar 20 13:37:31 2018
@@ -1499,11 +1499,9 @@ let ArchGuard = "defined(__ARM_FEATURE_F
def VMULX_LANEH : IOpInst<"vmulx_lane", "ddgi", "hQh", OP_MULX_LN>;
def VMULX_LANEQH : IOpInst<"vmulx_laneq", "ddji", "hQh", OP_MULX_LN>;
def VMULX_NH : IOpInst<"vmulx_n", "dds", "hQh", OP_MULX_N>;
- // TODO: Scalar floating point multiply extended (scalar, by element)
- // Below ones are commented out because they need vmulx_f16(float16_t, float16_t)
- // which will be implemented later with fp16 scalar intrinsic (arm_fp16.h)
- //def SCALAR_FMULX_LANEH : IOpInst<"vmulx_lane", "ssdi", "Sh", OP_SCALAR_MUL_LN>;
- //def SCALAR_FMULX_LANEQH : IOpInst<"vmulx_laneq", "ssji", "Sh", OP_SCALAR_MUL_LN>;
+ // Scalar floating point mulx (scalar, by element)
+ def SCALAR_FMULX_LANEH : IInst<"vmulx_lane", "ssdi", "Sh">;
+ def SCALAR_FMULX_LANEQH : IInst<"vmulx_laneq", "ssji", "Sh">;
// ARMv8.2-A FP16 reduction vector intrinsics.
def VMAXVH : SInst<"vmaxv", "sd", "hQh">;
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=328038&r1=328037&r2=328038&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Mar 20 13:37:31 2018
@@ -7238,6 +7238,16 @@ Value *CodeGenFunction::EmitAArch64Built
Int = Intrinsic::aarch64_neon_fmulx;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
}
+ case NEON::BI__builtin_neon_vmulxh_lane_f16:
+ case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
+ // vmulx_lane should be mapped to Neon scalar mulx after
+ // extracting the scalar element
+ Ops.push_back(EmitScalarExpr(E->getArg(2)));
+ Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
+ Ops.pop_back();
+ Int = Intrinsic::aarch64_neon_fmulx;
+ return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
+ }
case NEON::BI__builtin_neon_vmul_lane_v:
case NEON::BI__builtin_neon_vmul_laneq_v: {
// v1f64 vmul_lane should be mapped to Neon scalar mul lane
Modified: cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c?rev=328038&r1=328037&r2=328038&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c Tue Mar 20 13:37:31 2018
@@ -1223,27 +1223,25 @@ float16x8_t test_vmulxq_n_f16(float16x8_
return vmulxq_n_f16(a, b);
}
-/* TODO: Not implemented yet (needs scalar intrinsic from arm_fp16.h)
-// CCHECK-LABEL: test_vmulxh_lane_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_lane_f16
+// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3
+// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]
+// CHECK: ret half [[MULX]]
float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) {
return vmulxh_lane_f16(a, b, 3);
}
-// CCHECK-LABEL: test_vmulxh_laneq_f16
-// CCHECK: [[CONV0:%.*]] = fpext half %a to float
-// CCHECK: [[CONV1:%.*]] = fpext half %{{.*}} to float
-// CCHECK: [[MUL:%.*]] = fmul float [[CONV0:%.*]], [[CONV0:%.*]]
-// CCHECK: [[CONV3:%.*]] = fptrunc float %mul to half
-// CCHECK: ret half [[CONV3:%.*]]
+// CHECK-LABEL: test_vmulxh_laneq_f16
+// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8>
+// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half>
+// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7
+// CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]])
+// CHECK: ret half [[MULX]]
float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) {
return vmulxh_laneq_f16(a, b, 7);
}
-*/
// CHECK-LABEL: test_vmaxv_f16
// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
More information about the cfe-commits
mailing list