[llvm-commits] [llvm] r142488 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeTypes.h lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp test/CodeGen/X86/2011-10-19-widen_vselect.ll

Wed Oct 19 02:45:11 PDT 2011

Author: nadav
Date: Wed Oct 19 04:45:11 2011
New Revision: 142488

URL: http://llvm.org/viewvc/llvm-project?rev=142488&view=rev
Log:
Add support for the vector-widening of vselect and vector-setcc

Added:
    llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=142488&r1=142487&r2=142488&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Wed Oct 19 04:45:11 2011
@@ -633,6 +633,7 @@
   SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
   SDValue WidenVecOp_STORE(SDNode* N);
+  SDValue WidenVecOp_SETCC(SDNode* N, unsigned ResNo);
 
   SDValue WidenVecOp_Convert(SDNode *N);
 

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=142488&r1=142487&r2=142488&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Wed Oct 19 04:45:11 2011
@@ -1239,6 +1239,7 @@
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
   case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
+  case ISD::VSELECT:
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             Res = WidenVecRes_SETCC(N); break;
@@ -1928,7 +1929,7 @@
   SDValue InOp1 = GetWidenedVector(N->getOperand(1));
   SDValue InOp2 = GetWidenedVector(N->getOperand(2));
   assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
-  return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
+  return DAG.getNode(N->getOpcode(), N->getDebugLoc(),
                      WidenVT, Cond1, InOp1, InOp2);
 }
 
@@ -2032,6 +2033,7 @@
   case ISD::EXTRACT_SUBVECTOR:  Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
   case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
   case ISD::STORE:              Res = WidenVecOp_STORE(N); break;
+  case ISD::SETCC:              Res = WidenVecOp_SETCC(N, ResNo); break;
 
   case ISD::FP_EXTEND:
   case ISD::FP_TO_SINT:
@@ -2165,6 +2167,30 @@
                        MVT::Other,&StChain[0],StChain.size());
 }
 
+SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N, unsigned ResNo) {
+  assert(ResNo < 2 && "Invalid res num to widen");
+  SDValue InOp0 = GetWidenedVector(N->getOperand(0));
+  SDValue InOp1 = GetWidenedVector(N->getOperand(1));
+  EVT VT = InOp0.getValueType();
+  DebugLoc dl = N->getDebugLoc();
+
+  // WARNING: In this code we widen the compare instruction with garbage.
+  // This garbage may contain denormal floats which may be slow. Is this a real
+  // concern ? Should we zero the unused lanes if this is a float compare ?
+
+  SDValue Zero = DAG.getIntPtrConstant(0);
+  EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
+                               N->getValueType(0).getVectorElementType(),
+                               VT.getVectorNumElements());
+
+  SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(),
+                     ResVT, InOp0, InOp1, N->getOperand(2));
+
+  return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, N->getValueType(0),
+                     WideSETCC, Zero);
+}
+
+
 //===----------------------------------------------------------------------===//
 // Vector Widening Utilities
 //===----------------------------------------------------------------------===//

Added: llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll?rev=142488&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2011-10-19-widen_vselect.ll Wed Oct 19 04:45:11 2011
@@ -0,0 +1,68 @@
+; RUN: llc < %s -march=x86-64 -mcpu=corei7 | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that we don't crash when legalizng vselect and vsetcc and that
+; we are able to generate vector blend instructions.
+
+; CHECK: simple_widen
+; CHECK: blend
+; CHECK: ret
+define void @simple_widen() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> undef
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: complex_inreg_work
+; CHECK: blend
+; CHECK: ret
+
+define void @complex_inreg_work() {
+entry:
+  %0 = fcmp oeq <2 x float> undef, undef
+  %1 = select <2 x i1> %0, <2 x float> undef, <2 x float> undef
+  store <2 x float> %1, <2 x float>* undef
+  ret void
+}
+
+; CHECK: zero_test
+; CHECK: blend
+; CHECK: ret
+
+define void @zero_test() {
+entry:
+  %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
+  store <2 x float> %0, <2 x float>* undef
+  ret void
+}
+
+; CHECK: full_test
+; CHECK: blend
+; CHECK: ret
+
+define void @full_test() {
+ entry:
+   %Cy300 = alloca <4 x float>
+   %Cy11a = alloca <2 x float>
+   %Cy118 = alloca <2 x float>
+   %Cy119 = alloca <2 x float>
+   br label %B1
+
+ B1:                                               ; preds = %entry
+   %0 = load <2 x float>* %Cy119
+   %1 = fptosi <2 x float> %0 to <2 x i32>
+   %2 = sitofp <2 x i32> %1 to <2 x float>
+   %3 = fcmp ogt <2 x float> %0, zeroinitializer
+   %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
+   %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
+   %6 = fcmp oeq <2 x float> %2, %0
+   %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
+   store <2 x float> %7, <2 x float>* %Cy118
+   %8 = load <2 x float>* %Cy118
+   store <2 x float> %8, <2 x float>* %Cy11a
+   ret void
+}
+
+