[llvm-commits] [llvm] r139623 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp lib/CodeGen/SelectionDAG/SelectionDAG.cpp lib/Target/X86/X86ISelLowering.cpp test/CodeGen/X86/sse2-blend.ll
Bruno Cardoso Lopes
bruno.cardoso at gmail.com
Tue Sep 13 12:27:51 PDT 2011
Hi Nadav,
On Tue, Sep 13, 2011 at 12:17 PM, Nadav Rotem <nadav.rotem at intel.com> wrote:
> Author: nadav
> Date: Tue Sep 13 14:17:42 2011
> New Revision: 139623
>
> URL: http://llvm.org/viewvc/llvm-project?rev=139623&view=rev
> Log:
> Add vselect target support for targets that do not support blend but do support
> xor/and/or (For example SSE2).
>
>
> Added:
> llvm/trunk/test/CodeGen/X86/sse2-blend.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
> llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp?rev=139623&r1=139622&r2=139623&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp Tue Sep 13 14:17:42 2011
> @@ -61,6 +61,9 @@
> // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
> // SINT_TO_FLOAT and SHR on vectors isn't legal.
> SDValue ExpandUINT_TO_FLOAT(SDValue Op);
> + // Implement vselect in terms of XOR, AND,OR when blend is not supported
> + // by the target.
> + SDValue ExpandVSELECT(SDValue Op);
> SDValue ExpandFNEG(SDValue Op);
> // Implements vector promotion; this is essentially just bitcasting the
> // operands to a different type and bitcasting the result back to the
> @@ -157,6 +160,7 @@
> case ISD::CTLZ:
> case ISD::CTPOP:
> case ISD::SELECT:
> + case ISD::VSELECT:
> case ISD::SELECT_CC:
> case ISD::SETCC:
> case ISD::ZERO_EXTEND:
> @@ -210,7 +214,9 @@
> // FALL THROUGH
> }
> case TargetLowering::Expand:
> - if (Node->getOpcode() == ISD::UINT_TO_FP)
> + if (Node->getOpcode() == ISD::VSELECT)
> + Result = ExpandVSELECT(Op);
> + else if (Node->getOpcode() == ISD::UINT_TO_FP)
> Result = ExpandUINT_TO_FLOAT(Op);
> else if (Node->getOpcode() == ISD::FNEG)
> Result = ExpandFNEG(Op);
> @@ -256,9 +262,42 @@
> return DAG.getNode(ISD::BITCAST, dl, VT, Op);
> }
>
> -SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
> +SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
> + // Implement VSELECT in terms of XOR, AND, OR
> + // on platforms which do not support blend natively.
> + EVT VT = Op.getOperand(0).getValueType();
> + EVT OVT = Op.getOperand(0).getValueType();
> + DebugLoc DL = Op.getDebugLoc();
> +
> + SDValue Mask = Op.getOperand(0);
> + SDValue Op1 = Op.getOperand(1);
> + SDValue Op2 = Op.getOperand(2);
> +
> + // If we can't even use the basic vector operations of
> + // AND,OR,XOR, we will have to scalarize the op.
> + if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) ||
> + !TLI.isOperationLegalOrCustom(ISD::XOR, VT) ||
> + !TLI.isOperationLegalOrCustom(ISD::OR, VT)) {
> + return DAG.UnrollVectorOp(Op.getNode());
> + }
>
> + assert(VT.getSizeInBits() == OVT.getSizeInBits() && "Invalid mask size");
> + // Bitcast the operands to be the same type as the mask.
> + // This is needed when we select between FP types because
> + // the mask is a vector of integers.
> + Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
> + Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
> +
> + SDValue AllOnes = DAG.getConstant(
> + APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), VT);
> + SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
> +
> + Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
> + Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
> + return DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
> +}
>
> +SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
> EVT VT = Op.getOperand(0).getValueType();
> DebugLoc DL = Op.getDebugLoc();
>
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=139623&r1=139622&r2=139623&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Tue Sep 13 14:17:42 2011
> @@ -6440,6 +6440,10 @@
> Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
> &Operands[0], Operands.size()));
> break;
> + case ISD::VSELECT:
> + Scalars.push_back(getNode(ISD::SELECT, dl, EltVT,
> + &Operands[0], Operands.size()));
> + break;
> case ISD::SHL:
> case ISD::SRA:
> case ISD::SRL:
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=139623&r1=139622&r2=139623&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Sep 13 14:17:42 2011
> @@ -727,6 +727,7 @@
> setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
> setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
> setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
> + setOperationAction(ISD::VSELECT, (MVT::SimpleValueType)VT, Expand);
> for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
> InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
> setTruncStoreAction((MVT::SimpleValueType)VT,
>
> Added: llvm/trunk/test/CodeGen/X86/sse2-blend.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse2-blend.ll?rev=139623&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/sse2-blend.ll (added)
> +++ llvm/trunk/test/CodeGen/X86/sse2-blend.ll Tue Sep 13 14:17:42 2011
> @@ -0,0 +1,37 @@
> +; RUN: llc < %s -march=x86 -mcpu=yonah -promote-elements -mattr=+sse2,-sse41
> +
> +define void at vsel_float(<4 x float>* %v1, <4 x float>* %v2) {
> + %A = load <4 x float>* %v1
> + %B = load <4 x float>* %v2
> + %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %A, <4 x float> %B
> + store <4 x float > %vsel, <4 x float>* %v1
> + ret void
> +}
> +
> +define void at vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
> + %A = load <4 x i32>* %v1
> + %B = load <4 x i32>* %v2
> + %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> %A, <4 x i32> %B
> + store <4 x i32 > %vsel, <4 x i32>* %v1
> + ret void
> +}
> +
> +
> +define void at vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
> + %A = load <4 x i64>* %v1
> + %B = load <4 x i64>* %v2
> + %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> %A, <4 x i64> %B
> + store <4 x i64 > %vsel, <4 x i64>* %v1
> + ret void
> +}
> +
> +
> +define void at vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
> + %A = load <4 x double>* %v1
> + %B = load <4 x double>* %v2
> + %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> %A, <4 x double> %B
> + store <4 x double > %vsel, <4 x double>* %v1
> + ret void
> +}
> +
Can you make the testcase check for what you expect?
--
Bruno Cardoso Lopes
http://www.brunocardoso.cc
More information about the llvm-commits
mailing list