[llvm] r174325 - SelectionDAG: Teach FoldConstantArithmetic how to deal with vectors.

Mon Feb 4 07:43:14 PST 2013

----- Original Message -----
> From: "Benjamin Kramer" <benny.kra at googlemail.com>
> To: llvm-commits at cs.uiuc.edu
> Sent: Monday, February 4, 2013 9:19:18 AM
> Subject: [llvm] r174325 - SelectionDAG: Teach FoldConstantArithmetic how to	deal with vectors.
> 
> Author: d0k
> Date: Mon Feb  4 09:19:18 2013
> New Revision: 174325
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=174325&view=rev
> Log:
> SelectionDAG: Teach FoldConstantArithmetic how to deal with vectors.
> 
> This required disabling a PowerPC optimization that did the
> following:
> input:
> x = BUILD_VECTOR <i32 16, i32 16, i32 16, i32 16>
> lowered to:
> tmp = BUILD_VECTOR <i32 8, i32 8, i32 8, i32 8>
> x = ADD tmp, tmp
> 
> The add now gets folded immediately and we're back at the
> BUILD_VECTOR we
> started from. I don't see a way to fix this currently so I left it
> disabled
> for now.

Can we add a TLI callback to disable this so that PPC can avoid this optimization?

Thanks again,
Hal

> 
> Fix some trivially foldable X86 tests too.
> 
> Modified:
>     llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
>     llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
>     llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
>     llvm/trunk/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
>     llvm/trunk/test/CodeGen/X86/2011-11-30-or.ll
>     llvm/trunk/test/CodeGen/X86/blend-msb.ll
>     llvm/trunk/test/CodeGen/X86/vector-gep.ll
> 
> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Mon Feb  4
> 09:19:18 2013
> @@ -992,10 +992,8 @@ public:
>    SDValue CreateStackTemporary(EVT VT1, EVT VT2);
>  
>    /// FoldConstantArithmetic -
> -  SDValue FoldConstantArithmetic(unsigned Opcode,
> -                                 EVT VT,
> -                                 ConstantSDNode *Cst1,
> -                                 ConstantSDNode *Cst2);
> +  SDValue FoldConstantArithmetic(unsigned Opcode, EVT VT,
> +                                 SDNode *Cst1, SDNode *Cst2);
>  
>    /// FoldSetCC - Constant fold a setcc to true or false.
>    SDValue FoldSetCC(EVT VT, SDValue N1,
> 
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp Mon Feb  4
> 09:19:18 2013
> @@ -2680,44 +2680,117 @@ SDValue SelectionDAG::getNode(unsigned O
>    return SDValue(N, 0);
>  }
>  
> -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode,
> -                                             EVT VT,
> -                                             ConstantSDNode *Cst1,
> -                                             ConstantSDNode *Cst2) {
> -  const APInt &C1 = Cst1->getAPIntValue(), &C2 =
> Cst2->getAPIntValue();
> +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT
> VT,
> +                                             SDNode *Cst1, SDNode
> *Cst2) {
> +  SmallVector<std::pair<ConstantSDNode *, ConstantSDNode *>, 4>
> Inputs;
> +  SmallVector<SDValue, 4> Outputs;
> +  EVT SVT = VT.getScalarType();
> +
> +  ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1);
> +  ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2);
> +  if (Scalar1 && Scalar2) {
> +    // Scalar instruction.
> +    Inputs.push_back(std::make_pair(Scalar1, Scalar2));
> +  } else {
> +    // For vectors extract each constant element into Inputs so we
> can constant
> +    // fold them individually.
> +    BuildVectorSDNode *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
> +    BuildVectorSDNode *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
> +    if (!BV1 || !BV2)
> +      return SDValue();
> +
> +    assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of
> sync!");
> +
> +    for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
> +      ConstantSDNode *V1 =
> dyn_cast<ConstantSDNode>(BV1->getOperand(I));
> +      ConstantSDNode *V2 =
> dyn_cast<ConstantSDNode>(BV2->getOperand(I));
> +      if (!V1 || !V2) // Not a constant, bail.
> +        return SDValue();
> +
> +      // Avoid BUILD_VECTOR nodes that perform implicit truncation.
> +      // FIXME: This is valid and could be handled by truncating the
> APInts.
> +      if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
> +        return SDValue();
> +
> +      Inputs.push_back(std::make_pair(V1, V2));
> +    }
> +  }
>  
> -  switch (Opcode) {
> -  case ISD::ADD:  return getConstant(C1 + C2, VT);
> -  case ISD::SUB:  return getConstant(C1 - C2, VT);
> -  case ISD::MUL:  return getConstant(C1 * C2, VT);
> -  case ISD::UDIV:
> -    if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT);
> -    break;
> -  case ISD::UREM:
> -    if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT);
> -    break;
> -  case ISD::SDIV:
> -    if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT);
> -    break;
> -  case ISD::SREM:
> -    if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT);
> -    break;
> -  case ISD::AND:  return getConstant(C1 & C2, VT);
> -  case ISD::OR:   return getConstant(C1 | C2, VT);
> -  case ISD::XOR:  return getConstant(C1 ^ C2, VT);
> -  case ISD::SHL:  return getConstant(C1 << C2, VT);
> -  case ISD::SRL:  return getConstant(C1.lshr(C2), VT);
> -  case ISD::SRA:  return getConstant(C1.ashr(C2), VT);
> -  case ISD::ROTL: return getConstant(C1.rotl(C2), VT);
> -  case ISD::ROTR: return getConstant(C1.rotr(C2), VT);
> -  default: break;
> +  // We have a number of constant values, constant fold them element
> by element.
> +  for (unsigned I = 0, E = Inputs.size(); I != E; ++I) {
> +    const APInt &C1 = Inputs[I].first->getAPIntValue();
> +    const APInt &C2 = Inputs[I].second->getAPIntValue();
> +
> +    switch (Opcode) {
> +    case ISD::ADD:
> +      Outputs.push_back(getConstant(C1 + C2, SVT));
> +      break;
> +    case ISD::SUB:
> +      Outputs.push_back(getConstant(C1 - C2, SVT));
> +      break;
> +    case ISD::MUL:
> +      Outputs.push_back(getConstant(C1 * C2, SVT));
> +      break;
> +    case ISD::UDIV:
> +      if (!C2.getBoolValue())
> +        return SDValue();
> +      Outputs.push_back(getConstant(C1.udiv(C2), SVT));
> +      break;
> +    case ISD::UREM:
> +      if (!C2.getBoolValue())
> +        return SDValue();
> +      Outputs.push_back(getConstant(C1.urem(C2), SVT));
> +      break;
> +    case ISD::SDIV:
> +      if (!C2.getBoolValue())
> +        return SDValue();
> +      Outputs.push_back(getConstant(C1.sdiv(C2), SVT));
> +      break;
> +    case ISD::SREM:
> +      if (!C2.getBoolValue())
> +        return SDValue();
> +      Outputs.push_back(getConstant(C1.srem(C2), SVT));
> +      break;
> +    case ISD::AND:
> +      Outputs.push_back(getConstant(C1 & C2, SVT));
> +      break;
> +    case ISD::OR:
> +      Outputs.push_back(getConstant(C1 | C2, SVT));
> +      break;
> +    case ISD::XOR:
> +      Outputs.push_back(getConstant(C1 ^ C2, SVT));
> +      break;
> +    case ISD::SHL:
> +      Outputs.push_back(getConstant(C1 << C2, SVT));
> +      break;
> +    case ISD::SRL:
> +      Outputs.push_back(getConstant(C1.lshr(C2), SVT));
> +      break;
> +    case ISD::SRA:
> +      Outputs.push_back(getConstant(C1.ashr(C2), SVT));
> +      break;
> +    case ISD::ROTL:
> +      Outputs.push_back(getConstant(C1.rotl(C2), SVT));
> +      break;
> +    case ISD::ROTR:
> +      Outputs.push_back(getConstant(C1.rotr(C2), SVT));
> +      break;
> +    default:
> +      return SDValue();
> +    }
>    }
>  
> -  return SDValue();
> +  // Handle the scalar case first.
> +  if (Outputs.size() == 1)
> +    return Outputs.back();
> +
> +  // Otherwise build a big vector out of the scalar elements we
> generated.
> +  return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(),
> +                 Outputs.size());
>  }
>  
> -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
> -                              SDValue N1, SDValue N2) {
> +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
> SDValue N1,
> +                              SDValue N2) {
>    ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
>    ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
>    switch (Opcode) {
> @@ -3013,16 +3086,14 @@ SDValue SelectionDAG::getNode(unsigned O
>    }
>    }
>  
> -  if (N1C) {
> -    if (N2C) {
> -      SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C);
> -      if (SV.getNode()) return SV;
> -    } else {      // Cannonicalize constant to RHS if commutative
> -      if (isCommutativeBinOp(Opcode)) {
> -        std::swap(N1C, N2C);
> -        std::swap(N1, N2);
> -      }
> -    }
> +  // Perform trivial constant folding.
> +  SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(),
> N2.getNode());
> +  if (SV.getNode()) return SV;
> +
> +  // Canonicalize constant to RHS if commutative.
> +  if (N1C && !N2C && isCommutativeBinOp(Opcode)) {
> +    std::swap(N1C, N2C);
> +    std::swap(N1, N2);
>    }
>  
>    // Constant fold FP operations.
> @@ -3030,7 +3101,7 @@ SDValue SelectionDAG::getNode(unsigned O
>    ConstantFPSDNode *N2CFP =
>    dyn_cast<ConstantFPSDNode>(N2.getNode());
>    if (N1CFP) {
>      if (!N2CFP && isCommutativeBinOp(Opcode)) {
> -      // Cannonicalize constant to RHS if commutative
> +      // Canonicalize constant to RHS if commutative.
>        std::swap(N1CFP, N2CFP);
>        std::swap(N1, N2);
>      } else if (N2CFP) {
> 
> Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Feb  4
> 09:19:18 2013
> @@ -5032,9 +5032,14 @@ SDValue PPCTargetLowering::LowerBUILD_VE
>    // If this value is in the range [-32,30] and is even, use:
>    //    tmp = VSPLTI[bhw], result = add tmp, tmp
>    if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) {
> +    // FIXME: This is currently disabled because the ADD will be
> folded back
> +    // into an invalid BUILD_VECTOR immediately.
> +    return SDValue();
> +#if 0
>      SDValue Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other,
>      DAG, dl);
>      Res = DAG.getNode(ISD::ADD, dl, Res.getValueType(), Res, Res);
>      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
> +#endif
>    }
>  
>    // If this is 0x8000_0000 x 4, turn into vspltisw + vslw.  If it
>    is
> 
> Modified:
> llvm/trunk/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll
> (original)
> +++ llvm/trunk/test/CodeGen/PowerPC/2008-07-10-SplatMiscompile.ll Mon
> Feb  4 09:19:18 2013
> @@ -1,5 +1,6 @@
>  ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vadduhm
>  ; RUN: llc < %s -march=ppc32 -mcpu=g5 | grep vsubuhm
> +; XFAIL: *
>  
>  define <4 x i32> @test() nounwind {
>  	ret <4 x i32> < i32 4293066722, i32 4293066722, i32 4293066722, i32
>  	4293066722>
> 
> Modified: llvm/trunk/test/CodeGen/X86/2011-11-30-or.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2011-11-30-or.ll?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/2011-11-30-or.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/2011-11-30-or.ll Mon Feb  4 09:19:18
> 2013
> @@ -8,9 +8,9 @@ target triple = "x86_64-apple-macosx10.6
>  ; CHECK: pblendvb        %xmm1, %xmm2
>  ; CHECK: ret
>  
> -define void @select_func() {
> +define void @select_func(<8 x i16> %in) {
>  entry:
> -  %c.lobit.i.i.i = ashr <8 x i16> <i16 17, i16 5, i16 1, i16 15, i16
> 19, i16 15, i16 4, i16 1> , <i16 15, i16 15, i16 15, i16 15, i16 15,
> i16 15, i16 15, i16 15>
> +  %c.lobit.i.i.i = ashr <8 x i16> %in, <i16 15, i16 15, i16 15, i16
> 15, i16 15, i16 15, i16 15, i16 15>
>    %and.i56.i.i.i = and <8 x i16> %c.lobit.i.i.i, <i16 25, i16 8, i16
>    65, i16 25, i16 8, i16 95, i16 15, i16 45>
>    %and.i5.i.i.i = bitcast <8 x i16> %and.i56.i.i.i to <2 x i64>
>    %neg.i.i.i.i = xor <8 x i16> %c.lobit.i.i.i, <i16 -1, i16 -1, i16
>    -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
> 
> Modified: llvm/trunk/test/CodeGen/X86/blend-msb.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/blend-msb.ll?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/blend-msb.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/blend-msb.ll Mon Feb  4 09:19:18 2013
> @@ -5,7 +5,8 @@
>  ; shifting the needed bit to the MSB, and not using shl+sra.
>  
>  ;CHECK: vsel_float
> -;CHECK: pslld
> +;CHECK: movl $-2147483648
> +;CHECK-NEXT: movd
>  ;CHECK-NEXT: blendvps
>  ;CHECK: ret
>  define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
> @@ -14,7 +15,8 @@ define <4 x float> @vsel_float(<4 x floa
>  }
>  
>  ;CHECK: vsel_4xi8
> -;CHECK: pslld
> +;CHECK: movl $-2147483648
> +;CHECK-NEXT: movd
>  ;CHECK-NEXT: blendvps
>  ;CHECK: ret
>  define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
> 
> Modified: llvm/trunk/test/CodeGen/X86/vector-gep.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-gep.ll?rev=174325&r1=174324&r2=174325&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/vector-gep.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/vector-gep.ll Mon Feb  4 09:19:18
> 2013
> @@ -8,10 +8,8 @@ entry:
>    %vecinit2.i = insertelement <4 x i32*> %vecinit.i, i32* %ptr, i32
>    1
>    %vecinit4.i = insertelement <4 x i32*> %vecinit2.i, i32* %ptr, i32
>    2
>    %vecinit6.i = insertelement <4 x i32*> %vecinit4.i, i32* %ptr, i32
>    3
> -;CHECK: pslld $2
>  ;CHECK: padd
>    %A2 = getelementptr <4 x i32*> %vecinit6.i, <4 x i32> <i32 1, i32
>    2, i32 3, i32 4>
> -;CHECK: pslld $2
>  ;CHECK: padd
>    %A3 = getelementptr <4 x i32*> %A2, <4 x i32> <i32 10, i32 14, i32
>    19, i32 233>
>    ret <4 x i32*> %A3
> @@ -21,7 +19,6 @@ entry:
>  ;CHECK: AGEP1:
>  define i32 @AGEP1(<4 x i32*> %param) nounwind {
>  entry:
> -;CHECK: pslld $2
>  ;CHECK: padd
>    %A2 = getelementptr <4 x i32*> %param, <4 x i32> <i32 1, i32 2,
>    i32 3, i32 4>
>    %k = extractelement <4 x i32*> %A2, i32 3
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>