[clang] dbeb3d0 - Add missing vrnd intrinsics
Victor Campos via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 11 05:00:00 PDT 2023
Author: Max Iyengar
Date: 2023-09-11T12:59:18+01:00
New Revision: dbeb3d029d8e3120668288a284d0babeb81545fd
URL: https://github.com/llvm/llvm-project/commit/dbeb3d029d8e3120668288a284d0babeb81545fd
DIFF: https://github.com/llvm/llvm-project/commit/dbeb3d029d8e3120668288a284d0babeb81545fd.diff
LOG: Add missing vrnd intrinsics
This patch adds 8 missing intrinsics as specified in the Arm ACLE document section 2.12.1.1 : [[ https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#rounding-3 | https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#rounding-3]]
The intrinsics implemented are:
- vrnd32z_f64
- vrnd32zq_f64
- vrnd64z_f64
- vrnd64zq_f64
- vrnd32x_f64
- vrnd32xq_f64
- vrnd64x_f64
- vrnd64xq_f64
Reviewed By: dmgreen
Differential Revision: https://reviews.llvm.org/D158626
Added:
Modified:
clang/include/clang/Basic/arm_neon.td
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index ba3764d2f778e86..9cb7e0981384b0b 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -1232,6 +1232,11 @@ def FRINT32X_S32 : SInst<"vrnd32x", "..", "fQf">;
def FRINT32Z_S32 : SInst<"vrnd32z", "..", "fQf">;
def FRINT64X_S32 : SInst<"vrnd64x", "..", "fQf">;
def FRINT64Z_S32 : SInst<"vrnd64z", "..", "fQf">;
+
+def FRINT32X_S64 : SInst<"vrnd32x", "..", "dQd">;
+def FRINT32Z_S64 : SInst<"vrnd32z", "..", "dQd">;
+def FRINT64X_S64 : SInst<"vrnd64x", "..", "dQd">;
+def FRINT64Z_S64 : SInst<"vrnd64z", "..", "dQd">;
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index df0c4cc6354d0f2..27e4eb630356412 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6430,13 +6430,21 @@ static const ARMVectorIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
NEONMAP1(vrnd32x_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32x_f64, aarch64_neon_frint32x, Add1ArgType),
NEONMAP1(vrnd32xq_f32, aarch64_neon_frint32x, Add1ArgType),
+ NEONMAP1(vrnd32xq_f64, aarch64_neon_frint32x, Add1ArgType),
NEONMAP1(vrnd32z_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32z_f64, aarch64_neon_frint32z, Add1ArgType),
NEONMAP1(vrnd32zq_f32, aarch64_neon_frint32z, Add1ArgType),
+ NEONMAP1(vrnd32zq_f64, aarch64_neon_frint32z, Add1ArgType),
NEONMAP1(vrnd64x_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64x_f64, aarch64_neon_frint64x, Add1ArgType),
NEONMAP1(vrnd64xq_f32, aarch64_neon_frint64x, Add1ArgType),
+ NEONMAP1(vrnd64xq_f64, aarch64_neon_frint64x, Add1ArgType),
NEONMAP1(vrnd64z_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64z_f64, aarch64_neon_frint64z, Add1ArgType),
NEONMAP1(vrnd64zq_f32, aarch64_neon_frint64z, Add1ArgType),
+ NEONMAP1(vrnd64zq_f64, aarch64_neon_frint64z, Add1ArgType),
NEONMAP0(vrndi_v),
NEONMAP0(vrndiq_v),
NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
@@ -11798,25 +11806,33 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
}
case NEON::BI__builtin_neon_vrnd32x_f32:
- case NEON::BI__builtin_neon_vrnd32xq_f32: {
+ case NEON::BI__builtin_neon_vrnd32xq_f32:
+ case NEON::BI__builtin_neon_vrnd32x_f64:
+ case NEON::BI__builtin_neon_vrnd32xq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint32x;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32x");
}
case NEON::BI__builtin_neon_vrnd32z_f32:
- case NEON::BI__builtin_neon_vrnd32zq_f32: {
+ case NEON::BI__builtin_neon_vrnd32zq_f32:
+ case NEON::BI__builtin_neon_vrnd32z_f64:
+ case NEON::BI__builtin_neon_vrnd32zq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint32z;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd32z");
}
case NEON::BI__builtin_neon_vrnd64x_f32:
- case NEON::BI__builtin_neon_vrnd64xq_f32: {
+ case NEON::BI__builtin_neon_vrnd64xq_f32:
+ case NEON::BI__builtin_neon_vrnd64x_f64:
+ case NEON::BI__builtin_neon_vrnd64xq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint64x;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64x");
}
case NEON::BI__builtin_neon_vrnd64z_f32:
- case NEON::BI__builtin_neon_vrnd64zq_f32: {
+ case NEON::BI__builtin_neon_vrnd64zq_f32:
+ case NEON::BI__builtin_neon_vrnd64z_f64:
+ case NEON::BI__builtin_neon_vrnd64zq_f64: {
Ops.push_back(EmitScalarExpr(E->getArg(0)));
Int = Intrinsic::aarch64_neon_frint64z;
return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnd64z");
diff --git a/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c b/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
index 537815c8b26ef4f..b6362d4bc21f502 100644
--- a/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
+++ b/clang/test/CodeGen/aarch64-v8.5a-neon-frint3264-intrinsic.c
@@ -62,3 +62,59 @@ float32x2_t test_vrnd64z_f32(float32x2_t a) {
float32x4_t test_vrnd64zq_f32(float32x4_t a) {
return vrnd64zq_f32(a);
}
+
+// CHECK-LABEL: test_vrnd32x_f64
+// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+// CHECK: ret <1 x double> [[RND]]
+float64x1_t test_vrnd32x_f64(float64x1_t a) {
+ return vrnd32x_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32xq_f64
+// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+// CHECK: ret <2 x double> [[RND]]
+float64x2_t test_vrnd32xq_f64(float64x2_t a) {
+ return vrnd32xq_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32z_f64
+// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+// CHECK: ret <1 x double> [[RND]]
+float64x1_t test_vrnd32z_f64(float64x1_t a) {
+ return vrnd32z_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd32zq_f64
+// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+// CHECK: ret <2 x double> [[RND]]
+float64x2_t test_vrnd32zq_f64(float64x2_t a) {
+ return vrnd32zq_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64x_f64
+// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+// CHECK: ret <1 x double> [[RND]]
+float64x1_t test_vrnd64x_f64(float64x1_t a) {
+ return vrnd64x_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64xq_f64
+// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+// CHECK: ret <2 x double> [[RND]]
+float64x2_t test_vrnd64xq_f64(float64x2_t a) {
+ return vrnd64xq_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64z_f64
+// CHECK: [[RND:%.*]] = call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+// CHECK: ret <1 x double> [[RND]]
+float64x1_t test_vrnd64z_f64(float64x1_t a) {
+ return vrnd64z_f64(a);
+}
+
+// CHECK-LABEL: test_vrnd64zq_f64
+// CHECK: [[RND:%.*]] = call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+// CHECK: ret <2 x double> [[RND]]
+float64x2_t test_vrnd64zq_f64(float64x2_t a) {
+ return vrnd64zq_f64(a);
+}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 4a1f46f2576aeca..82b79cd7232cc90 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -4447,6 +4447,16 @@ let Predicates = [HasFRInt3264] in {
defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
} // HasFRInt3264
+// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
+def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
+ (FRINT32ZDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
+ (FRINT64ZDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
+ (FRINT32XDr FPR64:$Rn)>;
+def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
+ (FRINT64XDr FPR64:$Rn)>;
+
// Emitting strict_lrint as two instructions is valid as any exceptions that
// occur will happen in exactly one of the instructions (e.g. if the input is
// not an integer the inexact exception will happen in the FRINTX but not then
diff --git a/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll b/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
index 3a4dd734fb5bd4a..1979d97952cb8f8 100644
--- a/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
+++ b/llvm/test/CodeGen/AArch64/v8.5a-neon-frint3264-intrinsic.ll
@@ -81,3 +81,85 @@ entry:
%val = tail call <4 x float> @llvm.aarch64.neon.frint64z.v4f32(<4 x float> %a)
ret <4 x float> %val
}
+
+declare <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd32x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32x_f64:
+; CHECK: frint32x d0, d0
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <1 x double> @llvm.aarch64.neon.frint32x.v1f64(<1 x double> %a)
+ ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32xq_f64:
+; CHECK: frint32x v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <2 x double> @llvm.aarch64.neon.frint32x.v2f64(<2 x double> %a)
+ ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd32z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd32z_f64:
+; CHECK: frint32z d0, d0
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <1 x double> @llvm.aarch64.neon.frint32z.v1f64(<1 x double> %a)
+ ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd32zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd32zq_f64:
+; CHECK: frint32z v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <2 x double> @llvm.aarch64.neon.frint32z.v2f64(<2 x double> %a)
+ ret <2 x double> %val
+}
+
+declare <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double>)
+declare <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double>)
+declare <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double>)
+
+define dso_local <1 x double> @t_vrnd64x_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64x_f64:
+; CHECK: frint64x d0, d0
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <1 x double> @llvm.aarch64.neon.frint64x.v1f64(<1 x double> %a)
+ ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64xq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64xq_f64:
+; CHECK: frint64x v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <2 x double> @llvm.aarch64.neon.frint64x.v2f64(<2 x double> %a)
+ ret <2 x double> %val
+}
+
+define dso_local <1 x double> @t_vrnd64z_f64(<1 x double> %a) {
+; CHECK-LABEL: t_vrnd64z_f64:
+; CHECK: frint64z d0, d0
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <1 x double> @llvm.aarch64.neon.frint64z.v1f64(<1 x double> %a)
+ ret <1 x double> %val
+}
+
+define dso_local <2 x double> @t_vrnd64zq_f64(<2 x double> %a) {
+; CHECK-LABEL: t_vrnd64zq_f64:
+; CHECK: frint64z v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %val = tail call <2 x double> @llvm.aarch64.neon.frint64z.v2f64(<2 x double> %a)
+ ret <2 x double> %val
+}
More information about the cfe-commits
mailing list