r217242 - [ARMv8] Add support for 32-bit MIN/MAXNM and directed rounding.

Fri Sep 5 06:50:34 PDT 2014

Author: jamesm
Date: Fri Sep  5 08:50:34 2014
New Revision: 217242

URL: http://llvm.org/viewvc/llvm-project?rev=217242&view=rev
Log:
[ARMv8] Add support for 32-bit MIN/MAXNM and directed rounding.

This patch adds support for the 32bit numeric max/min and directed round-to-integral NEON intrinsics that were added as part of v8, along with unit tests.

Patch by Graham Hunter!


Added:
    cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c
    cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c
Modified:
    cfe/trunk/include/clang/Basic/arm_neon.td
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp

Modified: cfe/trunk/include/clang/Basic/arm_neon.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/arm_neon.td?rev=217242&r1=217241&r2=217242&view=diff
==============================================================================

--- cfe/trunk/include/clang/Basic/arm_neon.td (original)
+++ cfe/trunk/include/clang/Basic/arm_neon.td Fri Sep  5 08:50:34 2014
@@ -944,13 +944,6 @@ def VCVT_F64 : SInst<"vcvt_f64", "Fd",
 def VCVT_HIGH_F64_F32 : SOpInst<"vcvt_high_f64", "wj", "f", OP_VCVT_EX_HI_F64>;
 def VCVTX_F32_F64 : SInst<"vcvtx_f32", "fj",  "d">;
 def VCVTX_HIGH_F32_F64 : SOpInst<"vcvtx_high_f32", "qfj", "d", OP_VCVTX_HI>;
-def FRINTN : SInst<"vrndn", "dd", "fdQfQd">;
-def FRINTA : SInst<"vrnda", "dd", "fdQfQd">;
-def FRINTP : SInst<"vrndp", "dd", "fdQfQd">;
-def FRINTM : SInst<"vrndm", "dd", "fdQfQd">;
-def FRINTX : SInst<"vrndx", "dd", "fdQfQd">;
-def FRINTZ : SInst<"vrnd", "dd", "fdQfQd">;
-def FRINTI : SInst<"vrndi", "dd", "fdQfQd">;
 def VCVT_S64 : SInst<"vcvt_s64", "xd",  "dQd">;
 def VCVT_U64 : SInst<"vcvt_u64", "ud",  "dQd">;
 def FRECPE  : SInst<"vrecpe", "dd", "dQd">;
@@ -983,11 +976,6 @@ def MAX : SInst<"vmax", "ddd", "dQd">;
 def MIN : SInst<"vmin", "ddd", "dQd">;
 
 ////////////////////////////////////////////////////////////////////////////////
-// MaxNum/MinNum Floating Point
-def FMAXNM : SInst<"vmaxnm", "ddd", "fdQfQd">;
-def FMINNM : SInst<"vminnm", "ddd", "fdQfQd">;
-
-////////////////////////////////////////////////////////////////////////////////
 // Pairwise Max/Min
 def MAXP : SInst<"vpmax", "ddd", "QcQsQiQUcQUsQUiQfQd">;
 def MINP : SInst<"vpmin", "ddd", "QcQsQiQUcQUsQUiQfQd">;
@@ -1223,6 +1211,41 @@ def FCVTAU_S64 : SInst<"vcvta_u64", "ud"
 }
 
 ////////////////////////////////////////////////////////////////////////////////
+// Round to Integral
+
+let ArchGuard = "__ARM_ARCH >= 8" in {
+def FRINTN_S32 : SInst<"vrndn", "dd", "fQf">;
+def FRINTA_S32 : SInst<"vrnda", "dd", "fQf">;
+def FRINTP_S32 : SInst<"vrndp", "dd", "fQf">;
+def FRINTM_S32 : SInst<"vrndm", "dd", "fQf">;
+def FRINTX_S32 : SInst<"vrndx", "dd", "fQf">;
+def FRINTZ_S32 : SInst<"vrnd", "dd", "fQf">;
+}
+
+let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in {
+def FRINTN_S64 : SInst<"vrndn", "dd", "dQd">;
+def FRINTA_S64 : SInst<"vrnda", "dd", "dQd">;
+def FRINTP_S64 : SInst<"vrndp", "dd", "dQd">;
+def FRINTM_S64 : SInst<"vrndm", "dd", "dQd">;
+def FRINTX_S64 : SInst<"vrndx", "dd", "dQd">;
+def FRINTZ_S64 : SInst<"vrnd", "dd", "dQd">;
+def FRINTI_S64 : SInst<"vrndi", "dd", "fdQfQd">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// MaxNum/MinNum Floating Point
+
+let ArchGuard = "__ARM_ARCH >= 8" in {
+def FMAXNM_S32 : SInst<"vmaxnm", "ddd", "fQf">;
+def FMINNM_S32 : SInst<"vminnm", "ddd", "fQf">;
+}
+
+let ArchGuard = "__ARM_ARCH >= 8 && defined(__aarch64__)" in {
+def FMAXNM_S64 : SInst<"vmaxnm", "ddd", "dQd">;
+def FMINNM_S64 : SInst<"vminnm", "ddd", "dQd">;
+}
+
+////////////////////////////////////////////////////////////////////////////////
 // Permutation
 def VTRN1 : SOpInst<"vtrn1", "ddd",
                     "csiUcUsUifPcPsQcQsQiQlQUcQUsQUiQUlQfQdQPcQPsQPl", OP_TRN1>;

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=217242&r1=217241&r2=217242&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Sep  5 08:50:34 2014
@@ -1995,8 +1995,12 @@ static NeonIntrinsicInfo ARMSIMDIntrinsi
   NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
   NEONMAP1(vld4q_v, arm_neon_vld4, 0),
   NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
+  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
   NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
   NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
+  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
   NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
   NEONMAP0(vmovl_v),
   NEONMAP0(vmovn_v),
@@ -2043,6 +2047,18 @@ static NeonIntrinsicInfo ARMSIMDIntrinsi
   NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
   NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
   NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
+  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
+  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
+  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
+  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
+  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
+  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
+  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
+  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
+  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
+  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
+  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
+  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
   NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
   NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
   NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),

Added: cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c?rev=217242&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c (added)
+++ cfe/trunk/test/CodeGen/arm-neon-directed-rounding.c Fri Sep  5 08:50:34 2014
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -O1 -emit-llvm %s -o - | FileCheck %s
+
+#include <arm_neon.h>
+
+float32x2_t test_vrnda_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vrnda_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vrinta.v2f32(<2 x float> %a)
+  return vrnda_f32(a);
+}
+
+float32x4_t test_vrndaq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vrndaq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vrinta.v4f32(<4 x float> %a)
+  return vrndaq_f32(a);
+}
+
+float32x2_t test_vrndm_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vrndm_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vrintm.v2f32(<2 x float> %a)
+  return vrndm_f32(a);
+}
+
+float32x4_t test_vrndmq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vrndmq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vrintm.v4f32(<4 x float> %a)
+  return vrndmq_f32(a);
+}
+
+float32x2_t test_vrndn_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vrndn_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vrintn.v2f32(<2 x float> %a)
+  return vrndn_f32(a);
+}
+
+float32x4_t test_vrndnq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vrndnq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vrintn.v4f32(<4 x float> %a)
+  return vrndnq_f32(a);
+}
+
+float32x2_t test_vrndp_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vrndp_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vrintp.v2f32(<2 x float> %a)
+  return vrndp_f32(a);
+}
+
+float32x4_t test_vrndpq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vrndpq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vrintp.v4f32(<4 x float> %a)
+  return vrndpq_f32(a);
+}
+
+float32x2_t test_vrndx_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vrndx_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vrintx.v2f32(<2 x float> %a)
+  return vrndx_f32(a);
+}
+
+float32x4_t test_vrndxq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vrndxq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vrintx.v4f32(<4 x float> %a)
+  return vrndxq_f32(a);
+}
+
+float32x2_t test_vrnd_f32(float32x2_t a) {
+  // CHECK-LABEL: test_vrnd_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vrintz.v2f32(<2 x float> %a)
+  return vrnd_f32(a);
+}
+
+float32x4_t test_vrndq_f32(float32x4_t a) {
+  // CHECK-LABEL: test_vrndq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vrintz.v4f32(<4 x float> %a)
+  return vrndq_f32(a);
+}

Added: cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c?rev=217242&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c (added)
+++ cfe/trunk/test/CodeGen/arm-neon-numeric-maxmin.c Fri Sep  5 08:50:34 2014
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -triple thumbv8-linux-gnueabihf -target-cpu cortex-a57 -ffreestanding -O1 -emit-llvm %s -o - | FileCheck %s
+
+#include <arm_neon.h>
+
+float32x2_t test_vmaxnm_f32(float32x2_t a, float32x2_t b) {
+  // CHECK-LABEL: test_vmaxnm_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vmaxnm.v2f32(<2 x float> %a, <2 x float> %b)
+  return vmaxnm_f32(a, b);
+}
+
+float32x4_t test_vmaxnmq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK-LABEL: test_vmaxnmq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vmaxnm.v4f32(<4 x float> %a, <4 x float> %b)
+  return vmaxnmq_f32(a, b);
+}
+
+float32x2_t test_vminnm_f32(float32x2_t a, float32x2_t b) {
+  // CHECK-LABEL: test_vminnm_f32
+  // CHECK: call <2 x float> @llvm.arm.neon.vminnm.v2f32(<2 x float> %a, <2 x float> %b)
+  return vminnm_f32(a, b);
+}
+
+float32x4_t test_vminnmq_f32(float32x4_t a, float32x4_t b) {
+  // CHECK-LABEL: test_vminnmq_f32
+  // CHECK: call <4 x float> @llvm.arm.neon.vminnm.v4f32(<4 x float> %a, <4 x float> %b)
+  return vminnmq_f32(a, b);
+}