[clang] df3ed6c - [ARM, MVE] Add intrinsics for int <-> float conversion.

Tue Feb 18 01:36:37 PST 2020

Author: Simon Tatham
Date: 2020-02-18T09:34:50Z
New Revision: df3ed6c0fe31094941e4cd814cdf924b63993c4e

URL: https://github.com/llvm/llvm-project/commit/df3ed6c0fe31094941e4cd814cdf924b63993c4e
DIFF: https://github.com/llvm/llvm-project/commit/df3ed6c0fe31094941e4cd814cdf924b63993c4e.diff

LOG: [ARM,MVE] Add intrinsics for int <-> float conversion.

Summary:
This adds the unpredicated versions of the family of vcvtq intrinsics
that convert between a vector of floats and a vector of the same size
of integer. These are represented in IR using the standard fptosi,
fptoui, sitofp and uitofp operations, which existing LLVM codegen
already handles.

Reviewers: dmgreen, miyuki, MarkMurrayARM, ostannard

Reviewed By: MarkMurrayARM

Subscribers: kristof.beyls, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D74332

Added: 
    

Modified: 
    clang/include/clang/Basic/arm_mve.td
    clang/include/clang/Basic/arm_mve_defs.td
    clang/test/CodeGen/arm-mve-intrinsics/vcvt.c

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td
index dfc0ee87bb2f..5b20f23c75c7 100644

--- a/clang/include/clang/Basic/arm_mve.td
+++ b/clang/include/clang/Basic/arm_mve.td
@@ -400,6 +400,23 @@ foreach half = [ "b", "t" ] in {
   } // params = [f32], pnt = PNT_None
 } // loop over half = "b", "t"
 
+multiclass float_int_conversions<Type FScalar, Type IScalar, IRBuilderBase ftoi, IRBuilderBase itof> {
+  defvar FVector = VecOf<FScalar>;
+  defvar IVector = VecOf<IScalar>;
+
+  let params = [IScalar], pnt = PNT_2Type in
+    def : Intrinsic<FVector, (args IVector:$a), (itof $a, FVector)>,
+          NameOverride<"vcvtq_" # FScalar>;
+  let params = [FScalar], pnt = PNT_None in
+    def : Intrinsic<IVector, (args FVector:$a), (ftoi $a, IVector)>,
+          NameOverride<"vcvtq_" # IScalar>;
+}
+
+defm : float_int_conversions<f32, u32, fptoui, uitofp>;
+defm : float_int_conversions<f16, u16, fptoui, uitofp>;
+defm : float_int_conversions<f32, s32, fptosi, sitofp>;
+defm : float_int_conversions<f16, s16, fptosi, sitofp>;
+
 multiclass compare_with_pred<string condname, dag arguments,
                              dag cmp, string suffix> {
   // Make the predicated and unpredicated versions of a single comparison.

diff  --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td
index 2d080f2653aa..c2e4a4232c23 100644
--- a/clang/include/clang/Basic/arm_mve_defs.td
+++ b/clang/include/clang/Basic/arm_mve_defs.td
@@ -121,6 +121,10 @@ def fcmp_le: IRBuilder<"CreateFCmpOLE">;
 def splat: CGHelperFn<"ARMMVEVectorSplat">;
 def select: IRBuilder<"CreateSelect">;
 def fneg: IRBuilder<"CreateFNeg">;
+def sitofp: IRBuilder<"CreateSIToFP">;
+def uitofp: IRBuilder<"CreateUIToFP">;
+def fptosi: IRBuilder<"CreateFPToSI">;
+def fptoui: IRBuilder<"CreateFPToUI">;
 
 // A node that makes an Address out of a pointer-typed Value, by
 // providing an alignment as the second argument.

diff  --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
index a1c99de62ebb..3220100d7b89 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c
@@ -4,6 +4,102 @@
 
 #include <arm_mve.h>
 
+// CHECK-LABEL: @test_vcvtq_f16_s16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvtq_f16_s16(int16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f16_s16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f16_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = uitofp <8 x i16> [[A:%.*]] to <8 x half>
+// CHECK-NEXT:    ret <8 x half> [[TMP0]]
+//
+float16x8_t test_vcvtq_f16_u16(uint16x8_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f16_u16(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f32_s32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = sitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtq_f32_s32(int32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f32_s32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_f32_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = uitofp <4 x i32> [[A:%.*]] to <4 x float>
+// CHECK-NEXT:    ret <4 x float> [[TMP0]]
+//
+float32x4_t test_vcvtq_f32_u32(uint32x4_t a)
+{
+#ifdef POLYMORPHIC
+    return vcvtq(a);
+#else /* POLYMORPHIC */
+    return vcvtq_f32_u32(a);
+#endif /* POLYMORPHIC */
+}
+
+// CHECK-LABEL: @test_vcvtq_s16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptosi <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+int16x8_t test_vcvtq_s16_f16(float16x8_t a)
+{
+    return vcvtq_s16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_s32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptosi <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+int32x4_t test_vcvtq_s32_f32(float32x4_t a)
+{
+    return vcvtq_s32_f32(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_u16_f16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptoui <8 x half> [[A:%.*]] to <8 x i16>
+// CHECK-NEXT:    ret <8 x i16> [[TMP0]]
+//
+uint16x8_t test_vcvtq_u16_f16(float16x8_t a)
+{
+    return vcvtq_u16_f16(a);
+}
+
+// CHECK-LABEL: @test_vcvtq_u32_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = fptoui <4 x float> [[A:%.*]] to <4 x i32>
+// CHECK-NEXT:    ret <4 x i32> [[TMP0]]
+//
+uint32x4_t test_vcvtq_u32_f32(float32x4_t a)
+{
+    return vcvtq_u32_f32(a);
+}
+
 // CHECK-LABEL: @test_vcvttq_f16_f32(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1)