[llvm] r185158 - ARM: ensure fixed-point conversions have sane types

Fri Jun 28 08:29:25 PDT 2013

Author: tnorthover
Date: Fri Jun 28 10:29:25 2013
New Revision: 185158

URL: http://llvm.org/viewvc/llvm-project?rev=185158&view=rev
Log:
ARM: ensure fixed-point conversions have sane types

We were generating intrinsics for NEON fixed-point conversions that didn't
exist (e.g. float -> i16). There are two cases to consider:
  + iN is smaller than float. In this case we can do the conversion but need an
    extend or truncate as well.
  + iN is larger than float. In this case using the NEON conversion would be
    incorrect so we don't perform any combining.

Modified:
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/test/CodeGen/ARM/vcvt.ll
    llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=185158&r1=185157&r2=185158&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Fri Jun 28 10:29:25 2013
@@ -9141,12 +9141,27 @@ static SDValue PerformVCVTCombine(SDNode
       !isConstVecPow2(ConstVec, isSigned, C))
     return SDValue();
 
+  MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
+  MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
+  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+    // These instructions only exist converting from f32 to i32. We can handle
+    // smaller integers by generating an extra truncate, but larger ones would
+    // be lossy.
+    return SDValue();
+  }
+
   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
     Intrinsic::arm_neon_vcvtfp2fxu;
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
-                     N->getValueType(0),
-                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
-                     DAG.getConstant(Log2_64(C), MVT::i32));
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  SDValue FixConv =  DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
+                                 NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+                                 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+                                 DAG.getConstant(Log2_64(C), MVT::i32));
+
+  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+    FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv);
+
+  return FixConv;
 }
 
 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
@@ -9177,12 +9192,28 @@ static SDValue PerformVDIVCombine(SDNode
       !isConstVecPow2(ConstVec, isSigned, C))
     return SDValue();
 
+  MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
+  MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
+  if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) {
+    // These instructions only exist converting from i32 to f32. We can handle
+    // smaller integers by generating an extra extend, but larger ones would
+    // be lossy.
+    return SDValue();
+  }
+
+  SDValue ConvInput = Op.getOperand(0);
+  unsigned NumLanes = Op.getValueType().getVectorNumElements();
+  if (IntTy.getSizeInBits() < FloatTy.getSizeInBits())
+    ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
+                            SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
+                            ConvInput);
+
   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
     Intrinsic::arm_neon_vcvtfxu2fp;
   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N),
                      Op.getValueType(),
                      DAG.getConstant(IntrinsicOpcode, MVT::i32),
-                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+                     ConvInput, DAG.getConstant(Log2_64(C), MVT::i32));
 }
 
 /// Getvshiftimm - Check if this is a valid build_vector for the immediate

Modified: llvm/trunk/test/CodeGen/ARM/vcvt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vcvt.ll?rev=185158&r1=185157&r2=185158&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vcvt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vcvt.ll Fri Jun 28 10:29:25 2013
@@ -156,3 +156,44 @@ define <4 x i16> @vcvt_f32tof16(<4 x flo
 
 declare <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float>) nounwind readnone
+
+
+define <4 x i16> @fix_float_to_i16(<4 x float> %in) {
+; CHECK: fix_float_to_i16:
+; CHECK: vcvt.u32.f32 [[TMP:q[0-9]+]], {{q[0-9]+}}, #1
+; CHECK: vmovn.i32 {{d[0-9]+}}, [[TMP]]
+
+  %scale = fmul <4 x float> %in, <float 2.0, float 2.0, float 2.0, float 2.0>
+  %conv = fptoui <4 x float> %scale to <4 x i16>
+  ret <4 x i16> %conv
+}
+
+define <2 x i64> @fix_float_to_i64(<2 x float> %in) {
+; CHECK: fix_float_to_i64:
+; CHECK: bl
+; CHECK: bl
+
+  %scale = fmul <2 x float> %in, <float 2.0, float 2.0>
+  %conv = fptoui <2 x float> %scale to <2 x i64>
+  ret <2 x i64> %conv
+}
+
+define <4 x i16> @fix_double_to_i16(<4 x double> %in) {
+; CHECK: fix_double_to_i16:
+; CHECK: vcvt.s32.f64
+; CHECK: vcvt.s32.f64
+
+  %scale = fmul <4 x double> %in, <double 2.0, double 2.0, double 2.0, double 2.0>
+  %conv = fptoui <4 x double> %scale to <4 x i16>
+  ret <4 x i16> %conv
+}
+
+define <2 x i64> @fix_double_to_i64(<2 x double> %in) {
+; CHECK: fix_double_to_i64:
+; CHECK: bl
+; CHECK: bl
+  %scale = fmul <2 x double> %in, <double 2.0, double 2.0>
+  %conv = fptoui <2 x double> %scale to <2 x i64>
+  ret <2 x i64> %conv
+}
+

Modified: llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll?rev=185158&r1=185157&r2=185158&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/vdiv_combine.ll Fri Jun 28 10:29:25 2013
@@ -95,3 +95,44 @@ entry:
 }
 
 declare void @foo_float32x4_t(<4 x float>)
+
+define <4 x float> @fix_unsigned_i16_to_float(<4 x i16> %in) {
+; CHECK: fix_unsigned_i16_to_float:
+; CHECK: vmovl.u16 [[TMP:q[0-9]+]], {{d[0-9]+}}
+; CHECK: vcvt.f32.u32 {{q[0-9]+}}, [[TMP]], #1
+
+    %conv = uitofp <4 x i16> %in to <4 x float>
+    %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+    ret <4 x float> %shift
+}
+
+define <4 x float> @fix_signed_i16_to_float(<4 x i16> %in) {
+; CHECK: fix_signed_i16_to_float:
+; CHECK: vmovl.s16 [[TMP:q[0-9]+]], {{d[0-9]+}}
+; CHECK: vcvt.f32.s32 {{q[0-9]+}}, [[TMP]], #1
+
+    %conv = sitofp <4 x i16> %in to <4 x float>
+    %shift = fdiv <4 x float> %conv, <float 2.0, float 2.0, float 2.0, float 2.0>
+    ret <4 x float> %shift
+}
+
+define <2 x float> @fix_i64_to_float(<2 x i64> %in) {
+; CHECK: fix_i64_to_float:
+; CHECK: bl
+; CHECK: bl
+
+    %conv = uitofp <2 x i64> %in to <2 x float>
+    %shift = fdiv <2 x float> %conv, <float 2.0, float 2.0>
+    ret <2 x float> %shift
+}
+
+define <2 x double> @fix_i64_to_double(<2 x i64> %in) {
+; CHECK: fix_i64_to_double:
+; CHECK: bl
+; CHECK: bl
+
+    %conv = uitofp <2 x i64> %in to <2 x double>
+    %shift = fdiv <2 x double> %conv, <double 2.0, double 2.0>
+    ret <2 x double> %shift
+}
+