[llvm] r199462 - [AArch64 NEON] Custom lower conversion between vector integer and vector floating point if element bit-width doesn't match.

Thu Jan 16 21:52:36 PST 2014

Author: kevinqin
Date: Thu Jan 16 23:52:35 2014
New Revision: 199462

URL: http://llvm.org/viewvc/llvm-project?rev=199462&view=rev
Log:
[AArch64 NEON] Custom lower conversion between vector integer and vector floating point if element bit-width doesn't match.

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/neon-misc.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=199462&r1=199461&r2=199462&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jan 16 23:52:35 2014
@@ -375,6 +375,34 @@ AArch64TargetLowering::AArch64TargetLowe
     setOperationAction(ISD::FROUND, MVT::v1f64, Legal);
     setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
 
+    setOperationAction(ISD::SINT_TO_FP, MVT::v1i8, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v1i16, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v1i32, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
+    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
+
+    setOperationAction(ISD::UINT_TO_FP, MVT::v1i8, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v1i16, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v1i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
+    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
+
+    setOperationAction(ISD::FP_TO_SINT, MVT::v1i8, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v1i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v1i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Custom);
+
+    setOperationAction(ISD::FP_TO_UINT, MVT::v1i8, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v1i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v1i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Custom);
+
     // Vector ExtLoad and TruncStore are expanded.
     for (unsigned I = MVT::FIRST_VECTOR_VALUETYPE;
          I <= MVT::LAST_VECTOR_VALUETYPE; ++I) {
@@ -2119,9 +2147,42 @@ AArch64TargetLowering::LowerFP_EXTEND(SD
   return LowerF128ToCall(Op, DAG, LC);
 }
 
+static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG,
+                                    bool IsSigned) {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+  SDValue Vec = Op.getOperand(0);
+  EVT OpVT = Vec.getValueType();
+  unsigned Opc = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT;
+
+  if (VT.getVectorNumElements() == 1) {
+    assert(OpVT == MVT::v1f64 && "Unexpected vector type!");
+    if (VT.getSizeInBits() == OpVT.getSizeInBits())
+      return Op;
+    return DAG.UnrollVectorOp(Op.getNode());
+  }
+
+  if (VT.getSizeInBits() > OpVT.getSizeInBits()) {
+    assert(Vec.getValueType() == MVT::v2f32 && VT == MVT::v2i64 &&
+           "Unexpected vector type!");
+    Vec = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Vec);
+    return DAG.getNode(Opc, dl, VT, Vec);
+  } else if (VT.getSizeInBits() < OpVT.getSizeInBits()) {
+    EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
+                                   OpVT.getVectorElementType().getSizeInBits());
+    CastVT =
+        EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
+    Vec = DAG.getNode(Opc, dl, CastVT, Vec);
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Vec);
+  }
+  return DAG.getNode(Opc, dl, VT, Vec);
+}
+
 SDValue
 AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
                                       bool IsSigned) const {
+  if (Op.getValueType().isVector())
+    return LowerVectorFP_TO_INT(Op, DAG, IsSigned);
   if (Op.getOperand(0).getValueType() != MVT::f128) {
     // It's legal except when f128 is involved
     return Op;
@@ -2467,9 +2528,42 @@ AArch64TargetLowering::LowerGlobalTLSAdd
   return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
 }
 
+static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG,
+                                    bool IsSigned) {
+  SDLoc dl(Op);
+  EVT VT = Op.getValueType();
+  SDValue Vec = Op.getOperand(0);
+  unsigned Opc = IsSigned ? ISD::SINT_TO_FP : ISD::UINT_TO_FP;
+
+  if (VT.getVectorNumElements() == 1) {
+    assert(VT == MVT::v1f64 && "Unexpected vector type!");
+    if (VT.getSizeInBits() == Vec.getValueSizeInBits())
+      return Op;
+    return DAG.UnrollVectorOp(Op.getNode());
+  }
+
+  if (VT.getSizeInBits() < Vec.getValueSizeInBits()) {
+    assert(Vec.getValueType() == MVT::v2i64 && VT == MVT::v2f32 &&
+           "Unexpected vector type!");
+    Vec = DAG.getNode(Opc, dl, MVT::v2f64, Vec);
+    return DAG.getNode(ISD::FP_ROUND, dl, VT, Vec, DAG.getIntPtrConstant(0));
+  } else if (VT.getSizeInBits() > Vec.getValueSizeInBits()) {
+    unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+    EVT CastVT = EVT::getIntegerVT(*DAG.getContext(),
+                                   VT.getVectorElementType().getSizeInBits());
+    CastVT =
+        EVT::getVectorVT(*DAG.getContext(), CastVT, VT.getVectorNumElements());
+    Vec = DAG.getNode(CastOpc, dl, CastVT, Vec);
+  }
+
+  return DAG.getNode(Opc, dl, VT, Vec);
+}
+
 SDValue
 AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
                                       bool IsSigned) const {
+  if (Op.getValueType().isVector())
+    return LowerVectorINT_TO_FP(Op, DAG, IsSigned);
   if (Op.getValueType() != MVT::f128) {
     // Legal for everything except f128.
     return Op;

Modified: llvm/trunk/test/CodeGen/AArch64/neon-misc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/neon-misc.ll?rev=199462&r1=199461&r2=199462&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/neon-misc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/neon-misc.ll Thu Jan 16 23:52:35 2014
@@ -1080,6 +1080,90 @@ define <2 x i64> @test_vcvtq_u64_f64(<2
   ret <2 x i64> %vcvt.i
 }
 
+define <2 x i64> @test_vcvt_s64_f32(<2 x float> %a) #0 {
+; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
+; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = fptosi <2 x float> %a to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+define <2 x i64> @test_vcvt_u64_f32(<2 x float> %a) #0 {
+; CHECK: fcvtl v{{[0-9]+}}.2d, v{{[0-9]+}}.2s
+; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = fptoui <2 x float> %a to <2 x i64>
+  ret <2 x i64> %vcvt.i
+}
+
+define <4 x i16> @test_vcvt_s16_f32(<4 x float> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vcvt.i = fptosi <4 x float> %a to <4 x i16>
+  ret <4 x i16> %vcvt.i
+}
+
+define <4 x i16> @test_vcvt_u16_f32(<4 x float> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+; CHECK: xtn v{{[0-9]+}}.4h, v{{[0-9]+}}.4s
+  %vcvt.i = fptoui <4 x float> %a to <4 x i16>
+  ret <4 x i16> %vcvt.i
+}
+
+define <2 x i32> @test_vcvt_s32_f64(<2 x double> %a) #0 {
+; CHECK: fcvtzs v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvt.i = fptosi <2 x double> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <2 x i32> @test_vcvt_u32_f64(<2 x double> %a) #0 {
+; CHECK: fcvtzu v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+; CHECK: xtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvt.i = fptoui <2 x double> %a to <2 x i32>
+  ret <2 x i32> %vcvt.i
+}
+
+define <1 x i8> @test_vcvt_s8_f64(<1 x double> %a) #0 {
+; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}}
+  %vcvt.i = fptosi <1 x double> %a to <1 x i8>
+  ret <1 x i8> %vcvt.i
+}
+
+define <1 x i8> @test_vcvt_u8_f64(<1 x double> %a) #0 {
+; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: ins v{{[0-9]+}}.b[0], w{{[0-9]+}}
+  %vcvt.i = fptoui <1 x double> %a to <1 x i8>
+  ret <1 x i8> %vcvt.i
+}
+
+define <1 x i16> @test_vcvt_s16_f64(<1 x double> %a) #0 {
+; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}}
+  %vcvt.i = fptosi <1 x double> %a to <1 x i16>
+  ret <1 x i16> %vcvt.i
+}
+
+define <1 x i16> @test_vcvt_u16_f64(<1 x double> %a) #0 {
+; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: ins v{{[0-9]+}}.h[0], w{{[0-9]+}}
+  %vcvt.i = fptoui <1 x double> %a to <1 x i16>
+  ret <1 x i16> %vcvt.i
+}
+
+define <1 x i32> @test_vcvt_s32_f64_v1(<1 x double> %a) #0 {
+; CHECK: fcvtzs w{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = fptosi <1 x double> %a to <1 x i32>
+  ret <1 x i32> %vcvt.i
+}
+
+define <1 x i32> @test_vcvt_u32_f64_v1(<1 x double> %a) #0 {
+; CHECK: fcvtzu w{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: fmov s{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = fptoui <1 x double> %a to <1 x i32>
+  ret <1 x i32> %vcvt.i
+}
+
 define <2 x i32> @test_vcvtn_s32_f32(<2 x float> %a) {
 ; CHECK-LABEL: test_vcvtn_s32_f32
 ; CHECK: fcvtns v{{[0-9]+}}.2s, v{{[0-9]+}}.2s
@@ -1350,6 +1434,94 @@ define <2 x double> @test_vcvtq_f64_u64(
   ret <2 x double> %vcvt.i
 }
 
+define <2 x float> @test_vcvt_f32_s64(<2 x i64> %a) #0 {
+; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvt.i = sitofp <2 x i64> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <2 x float> @test_vcvt_f32_u64(<2 x i64> %a) #0 {
+; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+; CHECK: fcvtn v{{[0-9]+}}.2s, v{{[0-9]+}}.2d
+  %vcvt.i = uitofp <2 x i64> %a to <2 x float>
+  ret <2 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvt_f32_s16(<4 x i16> %a) #0 {
+; CHECK: sshll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
+; CHECK: scvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = sitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <4 x float> @test_vcvt_f32_u16(<4 x i16> %a) #0 {
+; CHECK: ushll v{{[0-9]+}}.4s, v{{[0-9]+}}.4h, #0
+; CHECK: ucvtf v{{[0-9]+}}.4s, v{{[0-9]+}}.4s
+  %vcvt.i = uitofp <4 x i16> %a to <4 x float>
+  ret <4 x float> %vcvt.i
+}
+
+define <2 x double> @test_vcvt_f64_s32(<2 x i32> %a) #0 {
+; CHECK: sshll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
+; CHECK: scvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = sitofp <2 x i32> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+define <2 x double> @test_vcvt_f64_u32(<2 x i32> %a) #0 {
+; CHECK: ushll v{{[0-9]+}}.2d, v{{[0-9]+}}.2s, #0
+; CHECK: ucvtf v{{[0-9]+}}.2d, v{{[0-9]+}}.2d
+  %vcvt.i = uitofp <2 x i32> %a to <2 x double>
+  ret <2 x double> %vcvt.i
+}
+
+define <1 x double> @test_vcvt_f64_s8(<1 x i8> %a) #0 {
+; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0]
+; CHECK: sxtb w{{[0-9]+}}, w{{[0-9]+}}
+; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = sitofp <1 x i8> %a to <1 x double>
+  ret <1 x double> %vcvt.i
+}
+
+define <1 x double> @test_vcvt_f64_u8(<1 x i8> %a) #0 {
+; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.b[0]
+; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xff
+; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = uitofp <1 x i8> %a to <1 x double>
+  ret <1 x double> %vcvt.i
+}
+
+define <1 x double> @test_vcvt_f64_s16(<1 x i16> %a) #0 {
+; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0]
+; CHECK: sxth w{{[0-9]+}}, w{{[0-9]+}}
+; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = sitofp <1 x i16> %a to <1 x double>
+  ret <1 x double> %vcvt.i
+}
+
+define <1 x double> @test_vcvt_f64_u16(<1 x i16> %a) #0 {
+; CHECK: umov w{{[0-9]+}}, v{{[0-9]+}}.h[0]
+; CHECK: and w{{[0-9]+}}, w{{[0-9]+}}, #0xffff
+; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = uitofp <1 x i16> %a to <1 x double>
+  ret <1 x double> %vcvt.i
+}
+
+define <1 x double> @test_vcvt_f64_s32_v1(<1 x i32> %a) #0 {
+; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}}
+; CHECK: scvtf d{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = sitofp <1 x i32> %a to <1 x double>
+  ret <1 x double> %vcvt.i
+}
+
+define <1 x double> @test_vcvt_f64_u32_v1(<1 x i32> %a) #0 {
+; CHECK: fmov w{{[0-9]+}}, s{{[0-9]+}}
+; CHECK: ucvtf d{{[0-9]+}}, w{{[0-9]+}}
+  %vcvt.i = uitofp <1 x i32> %a to <1 x double>
+  ret <1 x double> %vcvt.i
+}
+
 declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #2
 
 declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #2