[llvm] r339227 - [ARM] FP16: support vector INT_TO_FP and FP_TO_INT

Wed Aug 8 02:45:34 PDT 2018

Author: sjoerdmeijer
Date: Wed Aug  8 02:45:34 2018
New Revision: 339227

URL: http://llvm.org/viewvc/llvm-project?rev=339227&view=rev
Log:
[ARM] FP16: support vector INT_TO_FP and FP_TO_INT

This adds codegen support for the different vcvt_f16 variants.

Differential Revision: https://reviews.llvm.org/D50393

Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=339227&r1=339226&r2=339227&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Aug  8 02:45:34 2018
@@ -651,9 +651,13 @@ ARMTargetLowering::ARMTargetLowering(con
     // it have a FP_TO_[SU]INT instruction with a narrower destination than
     // source.
     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Custom);
     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Custom);
 
     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
@@ -4844,12 +4848,24 @@ static SDValue LowerVectorFP_TO_INT(SDVa
     return DAG.UnrollVectorOp(Op.getNode());
   }
 
-  assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
-         "Invalid type for custom lowering!");
-  if (VT != MVT::v4i16)
+  const bool HasFullFP16 =
+    static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+  EVT NewTy;
+  const EVT OpTy = Op.getOperand(0).getValueType();
+  if (OpTy == MVT::v4f32)
+    NewTy = MVT::v4i32;
+  else if (OpTy == MVT::v4f16 && HasFullFP16)
+    NewTy = MVT::v4i16;
+  else if (OpTy == MVT::v8f16 && HasFullFP16)
+    NewTy = MVT::v8i16;
+  else
+    llvm_unreachable("Invalid type for custom lowering!");
+
+  if (VT != MVT::v4i16 && VT != MVT::v8i16)
     return DAG.UnrollVectorOp(Op.getNode());
 
-  Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
+  Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
 }
 
@@ -4882,9 +4898,21 @@ static SDValue LowerVectorINT_TO_FP(SDVa
     return DAG.UnrollVectorOp(Op.getNode());
   }
 
-  assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
+  assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
+          Op.getOperand(0).getValueType() == MVT::v8i16) &&
          "Invalid type for custom lowering!");
-  if (VT != MVT::v4f32)
+
+  const bool HasFullFP16 =
+    static_cast<const ARMSubtarget&>(DAG.getSubtarget()).hasFullFP16();
+
+  EVT DestVecType;
+  if (VT == MVT::v4f32)
+    DestVecType = MVT::v4i32;
+  else if (VT == MVT::v4f16 && HasFullFP16)
+    DestVecType = MVT::v4i16;
+  else if (VT == MVT::v8f16 && HasFullFP16)
+    DestVecType = MVT::v8i16;
+  else
     return DAG.UnrollVectorOp(Op.getNode());
 
   unsigned CastOpc;
@@ -4901,7 +4929,7 @@ static SDValue LowerVectorINT_TO_FP(SDVa
     break;
   }
 
-  Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
+  Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
   return DAG.getNode(Opc, dl, VT, Op);
 }
 

Modified: llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll?rev=339227&r1=339226&r2=339227&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll Wed Aug  8 02:45:34 2018
@@ -121,55 +121,77 @@ entry:
   ret <8 x i16> %vcltz.i
 }
 
-; FIXME (PR38404)
-;
-;define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
-;entry:
-;  %vcvt.i = sitofp <4 x i16> %a to <4 x half>
-;  ret <4 x half> %vcvt.i
-;}
-;
-;define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
-;entry:
-;  %vcvt.i = sitofp <8 x i16> %a to <8 x half>
-;  ret <8 x half> %vcvt.i
-;}
-
-;define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
-;entry:
-;  %vcvt.i = uitofp <4 x i16> %a to <4 x half>
-;  ret <4 x half> %vcvt.i
-;}
-
-;define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
-;entry:
-;  %vcvt.i = uitofp <8 x i16> %a to <8 x half>
-;  ret <8 x half> %vcvt.i
-;}
-
-;define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
-;entry:
-;  %vcvt.i = fptosi <4 x half> %a to <4 x i16>
-;  ret <4 x i16> %vcvt.i
-;}
-
-;define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
-;entry:
-;  %vcvt.i = fptosi <8 x half> %a to <8 x i16>
-;  ret <8 x i16> %vcvt.i
-;}
-
-;define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
-;entry:
-;  %vcvt.i = fptoui <4 x half> %a to <4 x i16>
-;  ret <4 x i16> %vcvt.i
-;}
-
-;define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
-;entry:
-;  %vcvt.i = fptoui <8 x half> %a to <8 x i16>
-;  ret <8 x i16> %vcvt.i
-;}
+define dso_local <4 x half> @test_vcvt_f16_s16(<4 x i16> %a) {
+; CHECK-LABEL: test_vcvt_f16_s16:
+; CHECK:         vcvt.f16.s16 d0, d0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = sitofp <4 x i16> %a to <4 x half>
+  ret <4 x half> %vcvt.i
+}
+
+define dso_local <8 x half> @test_vcvtq_f16_s16(<8 x i16> %a) {
+; CHECK-LABEL: test_vcvtq_f16_s16:
+; CHECK:         vcvt.f16.s16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = sitofp <8 x i16> %a to <8 x half>
+  ret <8 x half> %vcvt.i
+}
+
+define dso_local <4 x half> @test_vcvt_f16_u16(<4 x i16> %a) {
+; CHECK-LABEL: test_vcvt_f16_u16:
+; CHECK:         vcvt.f16.u16 d0, d0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = uitofp <4 x i16> %a to <4 x half>
+  ret <4 x half> %vcvt.i
+}
+
+define dso_local <8 x half> @test_vcvtq_f16_u16(<8 x i16> %a) {
+; CHECK-LABEL: test_vcvtq_f16_u16:
+; CHECK:         vcvt.f16.u16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = uitofp <8 x i16> %a to <8 x half>
+  ret <8 x half> %vcvt.i
+}
+
+define dso_local <4 x i16> @test_vcvt_s16_f16(<4 x half> %a) {
+; CHECK-LABEL: test_vcvt_s16_f16:
+; CHECK:         vcvt.s16.f16 d0, d0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = fptosi <4 x half> %a to <4 x i16>
+  ret <4 x i16> %vcvt.i
+}
+
+define dso_local <8 x i16> @test_vcvtq_s16_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vcvtq_s16_f16:
+; CHECK:         vcvt.s16.f16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = fptosi <8 x half> %a to <8 x i16>
+  ret <8 x i16> %vcvt.i
+}
+
+define dso_local <4 x i16> @test_vcvt_u16_f16(<4 x half> %a) {
+; CHECK-LABEL: test_vcvt_u16_f16:
+; CHECK:         vcvt.u16.f16 d0, d0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = fptoui <4 x half> %a to <4 x i16>
+  ret <4 x i16> %vcvt.i
+}
+
+define dso_local <8 x i16> @test_vcvtq_u16_f16(<8 x half> %a) {
+; CHECK-LABEL: test_vcvtq_u16_f16:
+; CHECK:         vcvt.u16.f16 q0, q0
+; CHECK-NEXT:    bx lr
+entry:
+  %vcvt.i = fptoui <8 x half> %a to <8 x i16>
+  ret <8 x i16> %vcvt.i
+}
 
 define dso_local <4 x i16> @test_vcvta_s16_f16(<4 x half> %a) {
 ; CHECK-LABEL: test_vcvta_s16_f16: