[llvm-commits] [llvm] r161894 - in /llvm/trunk: lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h lib/Target/X86/X86InstrFragmentsSIMD.td lib/Target/X86/X86InstrSSE.td test/CodeGen/X86/pr11334.ll

Tue Aug 14 14:24:47 PDT 2012

Author: hliao
Date: Tue Aug 14 16:24:47 2012
New Revision: 161894

URL: http://llvm.org/viewvc/llvm-project?rev=161894&view=rev
Log:
fix PR11334

- FP_EXTEND only support extending from vectors with matching elements.
  This results in the scalarization of extending to v2f64 from v2f32,
  which will be legalized to v4f32 not matching with v2f64.
- add X86-specific VFPEXT supproting extending from v4f32 to v2f64.
- add BUILD_VECTOR lowering helper to recover back the original
  extending from v4f32 to v2f64.
- test case is enhanced to include different vector width.


Added:
    llvm/trunk/test/CodeGen/X86/pr11334.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86ISelLowering.h
    llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=161894&r1=161893&r2=161894&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug 14 16:24:47 2012
@@ -5114,6 +5114,82 @@
   return SDValue();
 }
 
+// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
+// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
+// constraint of matching input/output vector elements.
+SDValue
+X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
+  DebugLoc DL = Op.getDebugLoc();
+  SDNode *N = Op.getNode();
+  EVT VT = Op.getValueType();
+  unsigned NumElts = Op.getNumOperands();
+
+  // Check supported types and sub-targets.
+  //
+  // Only v2f32 -> v2f64 needs special handling.
+  if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
+    return SDValue();
+
+  SDValue VecIn;
+  EVT VecInVT;
+  SmallVector<int, 8> Mask;
+  EVT SrcVT = MVT::Other;
+
+  // Check the patterns could be translated into X86vfpext.
+  for (unsigned i = 0; i < NumElts; ++i) {
+    SDValue In = N->getOperand(i);
+    unsigned Opcode = In.getOpcode();
+
+    // Skip if the element is undefined.
+    if (Opcode == ISD::UNDEF) {
+      Mask.push_back(-1);
+      continue;
+    }
+
+    // Quit if one of the elements is not defined from 'fpext'.
+    if (Opcode != ISD::FP_EXTEND)
+      return SDValue();
+
+    // Check how the source of 'fpext' is defined.
+    SDValue L2In = In.getOperand(0);
+    EVT L2InVT = L2In.getValueType();
+
+    // Check the original type
+    if (SrcVT == MVT::Other)
+      SrcVT = L2InVT;
+    else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
+      return SDValue();
+
+    // Check whether the value being 'fpext'ed is extracted from the same
+    // source.
+    Opcode = L2In.getOpcode();
+
+    // Quit if it's not extracted with a constant index.
+    if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
+        !isa<ConstantSDNode>(L2In.getOperand(1)))
+      return SDValue();
+
+    SDValue ExtractedFromVec = L2In.getOperand(0);
+
+    if (VecIn.getNode() == 0) {
+      VecIn = ExtractedFromVec;
+      VecInVT = ExtractedFromVec.getValueType();
+    } else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
+      return SDValue();
+
+    Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
+  }
+
+  // Fill the remaining mask as undef.
+  for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
+    Mask.push_back(-1);
+
+  return DAG.getNode(X86ISD::VFPEXT, DL, VT,
+                     DAG.getVectorShuffle(VecInVT, DL,
+                                          VecIn, DAG.getUNDEF(VecInVT),
+                                          &Mask[0]));
+}
+
 SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   DebugLoc dl = Op.getDebugLoc();
@@ -5146,6 +5222,10 @@
   if (Broadcast.getNode())
     return Broadcast;
 
+  SDValue FpExt = LowerVectorFpExtend(Op, DAG);
+  if (FpExt.getNode())
+    return FpExt;
+
   unsigned EVTBits = ExtVT.getSizeInBits();
 
   unsigned NumZero  = 0;
@@ -11343,6 +11423,7 @@
   case X86ISD::ATOMNAND64_DAG:     return "X86ISD::ATOMNAND64_DAG";
   case X86ISD::VZEXT_MOVL:         return "X86ISD::VZEXT_MOVL";
   case X86ISD::VZEXT_LOAD:         return "X86ISD::VZEXT_LOAD";
+  case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
   case X86ISD::VSHL:               return "X86ISD::VSHL";

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=161894&r1=161893&r2=161894&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Tue Aug 14 16:24:47 2012
@@ -227,6 +227,9 @@
       // VSEXT_MOVL - Vector move low and sign extend.
       VSEXT_MOVL,
 
+      // VFPEXT - Vector FP extend.
+      VFPEXT,
+
       // VSHL, VSRL - 128-bit vector logical left / right shift
       VSHLDQ, VSRLDQ,
 
@@ -828,6 +831,8 @@
     SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
     SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
 
+    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv, bool isVarArg,

Modified: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td?rev=161894&r1=161893&r2=161894&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td Tue Aug 14 16:24:47 2012
@@ -81,6 +81,11 @@
 
 def X86vzload  : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
                         [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+
+def X86vfpext  : SDNode<"X86ISD::VFPEXT",
+                        SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                             SDTCisFP<0>, SDTCisFP<1>]>>;
+
 def X86vshldq  : SDNode<"X86ISD::VSHLDQ",    SDTIntShiftOp>;
 def X86vshrdq  : SDNode<"X86ISD::VSRLDQ",    SDTIntShiftOp>;
 def X86cmpp    : SDNode<"X86ISD::CMPP",      SDTX86VFCMP>;

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=161894&r1=161893&r2=161894&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Tue Aug 14 16:24:47 2012
@@ -2101,12 +2101,20 @@
   def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
             (VCVTPD2PSYrm addr:$src)>;
 
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (VCVTPS2PDrr VR128:$src)>;
   def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
             (VCVTPS2PDYrr VR128:$src)>;
   def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
             (VCVTPS2PDYrm addr:$src)>;
 }
 
+let Predicates = [HasSSE2] in {
+  // Match fextend for 128 conversions
+  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
+            (CVTPS2PDrr VR128:$src)>;
+}
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Compare Instructions
 //===----------------------------------------------------------------------===//

Added: llvm/trunk/test/CodeGen/X86/pr11334.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr11334.ll?rev=161894&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr11334.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr11334.ll Tue Aug 14 16:24:47 2012
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
+
+define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
+entry:
+; CHECK: v2f2d_ext_vec
+; CHECK: cvtps2pd
+; AVX:   v2f2d_ext_vec
+; AVX:   vcvtps2pd
+  %f1 = fpext <2 x float> %v1 to <2 x double>
+  ret <2 x double> %f1
+}
+
+define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
+entry:
+; CHECK: v3f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v3f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <3 x float> %v1 to <3 x double>
+  ret <3 x double> %f1
+}
+
+define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
+entry:
+; CHECK: v4f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v4f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <4 x float> %v1 to <4 x double>
+  ret <4 x double> %f1
+}
+
+define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
+entry:
+; CHECK: v8f2d_ext_vec
+; CHECK: cvtps2pd
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; CHECK: movhlps
+; CHECK: cvtps2pd
+; AVX:   v8f2d_ext_vec
+; AVX:   vcvtps2pd
+; AVX:   vextractf128
+; AVX:   vcvtps2pd
+; AVX:   ret
+  %f1 = fpext <8 x float> %v1 to <8 x double>
+  ret <8 x double> %f1
+}