[clang] c8b3196 - [ARM, MVE] Add intrinsics for FP rounding operations.

Tue Feb 18 01:36:39 PST 2020

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: c8b3196e54308b0113d2a0888d13ccc92e3b7ccc

URL: https://github.com/llvm/llvm-project/commit/c8b3196e54308b0113d2a0888d13ccc92e3b7ccc
DIFF: https://github.com/llvm/llvm-project/commit/c8b3196e54308b0113d2a0888d13ccc92e3b7ccc.diff

LOG: [ARM,MVE] Add intrinsics for FP rounding operations.

Summary:
This adds the unpredicated forms of six different MVE intrinsics which
all round a vector of floating-point numbers to integer values,
leaving them still in FP format, differing only in rounding mode and
exception settings.

Five of them map to existing target-independent intrinsics in LLVM IR,
such as @llvm.trunc and @llvm.rint. The sixth, mapping to the `vrintn`
instruction, is done by inventing a target-specific intrinsic.

(`vrintn` behaves the same as `vrintx` in terms of the output value:
the side effects on the FPSCR flags are the only difference between
the two. But ACLE specifies separate user-callable intrinsics for the
two, so the side effects matter enough to make sure we generate the
right one of the two instructions in each case.)

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: miyuki

Subscribers: kristof.beyls, hiraditya, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D74333

Added: 
    clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
    llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll

Modified: 
    clang/include/clang/Basic/arm_mve.td
    llvm/include/llvm/IR/IntrinsicsARM.td
    llvm/lib/Target/ARM/ARMInstrMVE.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index 5b20f23c75c7..a2bf7afad41e 100644

--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -417,6 +417,21 @@ defm : float_int_conversions<f16, u16, fptoui, uitofp>;
 defm : float_int_conversions<f32, s32, fptosi, sitofp>;
 defm : float_int_conversions<f16, s16, fptosi, sitofp>;
 
+let params = T.Float in {
+  def vrndq: Intrinsic<Vector, (args Vector:$a),
+      (IRIntBase<"trunc", [Vector]> $a)>;
+  def vrndmq: Intrinsic<Vector, (args Vector:$a),
+      (IRIntBase<"floor", [Vector]> $a)>;
+  def vrndpq: Intrinsic<Vector, (args Vector:$a),
+      (IRIntBase<"ceil", [Vector]> $a)>;
+  def vrndaq: Intrinsic<Vector, (args Vector:$a),
+      (IRIntBase<"round", [Vector]> $a)>;
+  def vrndxq: Intrinsic<Vector, (args Vector:$a),
+      (IRIntBase<"rint", [Vector]> $a)>;
+  def vrndnq: Intrinsic<Vector, (args Vector:$a),
+      (IRInt<"vrintn", [Vector]> $a)>;
+}
+
 multiclass compare_with_pred<string condname, dag arguments,
                              dag cmp, string suffix> {
   // Make the predicated and unpredicated versions of a single comparison.

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
new file mode 100644
index 000000000000..a324c36ed838
--- /dev/null
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
+// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa -early-cse | FileCheck %s
+
+#include <arm_mve.h>
+
+// CHECK-LABEL: @test_vrndaq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.round.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndaq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndaq(a);
+#else /* POLYMORPHIC */
+    return vrndaq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndaq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.round.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndaq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndaq(a);
+#else /* POLYMORPHIC */
+    return vrndaq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.floor.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndmq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndmq(a);
+#else /* POLYMORPHIC */
+    return vrndmq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndmq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndmq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndmq(a);
+#else /* POLYMORPHIC */
+    return vrndmq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.ceil.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndpq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndpq(a);
+#else /* POLYMORPHIC */
+    return vrndpq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndpq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndpq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndpq(a);
+#else /* POLYMORPHIC */
+    return vrndpq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.trunc.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndq(a);
+#else /* POLYMORPHIC */
+    return vrndq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndq(a);
+#else /* POLYMORPHIC */
+    return vrndq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.rint.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndxq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndxq(a);
+#else /* POLYMORPHIC */
+    return vrndxq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndxq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndxq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndxq(a);
+#else /* POLYMORPHIC */
+    return vrndxq_f32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> [[A:%.*]])
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vrndnq_f16(float16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndnq(a);
+#else /* POLYMORPHIC */
+    return vrndnq_f16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vrndnq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> [[A:%.*]])
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vrndnq_f32(float32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vrndnq(a);
+#else /* POLYMORPHIC */
+    return vrndnq_f32(a);
+#endif /* POLYMORPHIC */
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td
index 9c9339cb9f88..80ab3a73d391 100644
--- a/llvm/include/llvm/IR/IntrinsicsARM.td
+++ b/llvm/include/llvm/IR/IntrinsicsARM.td
@@ -1158,4 +1158,8 @@ defm int_arm_mve_vcvt_fix: MVEMXPredicated<
   [llvm_anyvector_ty /* output */], [llvm_i32_ty],
   [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */],
   LLVMMatchType<0>, llvm_anyvector_ty>;
+
+def int_arm_mve_vrintn: Intrinsic<
+  [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
 } // end TargetPrefix

diff  --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 5a2bb9c89c28..b0ec20494b8a 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -3179,6 +3179,10 @@ let Predicates = [HasMVEFloat] in {
             (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>;
   def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))),
             (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>;
+  def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))),
+            (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>;
+  def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))),
+            (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>;
 }
 
 class MVEFloatArithNeon<string iname, string suffix, bit size,

diff  --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
new file mode 100644
index 000000000000..a70975e1e731
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrintn.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s
+
+define arm_aapcs_vfpcc <8 x half> @test_vrndnq_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vrndnq_f16:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrintn.f16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half> %a)
+  ret <8 x half> %0
+}
+
+define arm_aapcs_vfpcc <4 x float> @test_vrndnq_f32(<4 x float> %a) {
+; CHECK-LABEL: test_vrndnq_f32:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    vrintn.f32 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float> %a)
+  ret <4 x float> %0
+}
+
+declare <8 x half> @llvm.arm.mve.vrintn.v8f16(<8 x half>)
+declare <4 x float> @llvm.arm.mve.vrintn.v4f32(<4 x float>)