[llvm-commits] [PATCH] PowerPC: More support for Altivec compare operations

Wed Oct 24 16:23:45 PDT 2012

On Oct 24, 2012, at 7:49 AM, Adhemerval Zanella wrote:

> My previous patch for altivec compare support was incomplete, it didn't handle
> the other operators (!=, <, >, etc.). This patch add a better and more complete
> support for comparisons for altivec supported types (v16i8, v8i16, v4i32, and
> v4f32).
> 
> The testcase also covers all the supported comparison operators for the altivec
> types.
> 
> Any tips, suggestions, comments?
> From d129514d6fcde602109dcb27f108ca5642988685 Mon Sep 17 00:00:00 2001
> From: Adhemerval Zanella <azanella at linux.vnet.ibm.com>
> Date: Wed, 24 Oct 2012 12:43:56 -0200
> Subject: [PATCH] PowerPC: More support for Altivec compare operations
> 
> This patch adds more support for vector type comparisons using altivec.
> It adds correct support for v16i8, v8i16, v4i32, and v4f32 vector types
> for comparison operators ==, !=, >, >=, <, and <=.
> ---
>  lib/Target/PowerPC/PPCISelDAGToDAG.cpp |  174 +++++++++++-
>  lib/Target/PowerPC/PPCInstrAltivec.td  |    2 +-
>  test/CodeGen/PowerPC/vec_cmp.ll        |  470 +++++++++++++++++++++++++++++++-
>  3 files changed, 617 insertions(+), 29 deletions(-)
> 
> diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
> index 6195441..c3e89f0 100644
> --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
> +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
> @@ -623,6 +623,104 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
>    }
>  }
>  
> +// getVSPLATInst: return the vector splat instruction based on the specified
> +// vector type. Since this is for altivec specific code, only support the
> +// altivec types (v16i8, v8i16, v4i32, and v4f32).
> +static unsigned int getVSPLATInst(MVT::SimpleValueType VecVT) {
> +  switch (VecVT) {
> +    case MVT::v16i8: return PPC::VSPLTISB;
> +    case MVT::v8i16: return PPC::VSPLTISH;
> +    case MVT::v4i32: return PPC::VSPLTISW;
> +    case MVT::v4f32: return PPC::VSPLTISW;
> +    default:
> +      llvm_unreachable("Invalid vector type");
> +  }
> +}
> +
> +// getVCMPInst: return the vector compare instruction for the specified
> +// vector type and condition code. Since this is for altivec specific code,
> +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
> +static unsigned int getVCMPInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
> +  switch (CC) {
> +    case ISD::SETEQ:
> +    case ISD::SETUEQ:
> +    case ISD::SETNE:
> +    case ISD::SETUNE:
> +      switch (VecVT) {
> +        case MVT::v16i8: return PPC::VCMPEQUB;
> +        case MVT::v8i16: return PPC::VCMPEQUH;
> +        case MVT::v4i32: return PPC::VCMPEQUW;
> +        // v4f32 != v4f32 could be translate to unordered not equal
> +        case MVT::v4f32: return PPC::VCMPEQFP;
> +        default:
> +          llvm_unreachable("Invalid vector type with conditional code");
> +      }
> +    case ISD::SETLT:
> +    case ISD::SETGT:
> +    case ISD::SETLE:
> +    case ISD::SETGE:
> +      switch (VecVT) {
> +        case MVT::v16i8: return PPC::VCMPGTSB;
> +        case MVT::v8i16: return PPC::VCMPGTSH;
> +        case MVT::v4i32: return PPC::VCMPGTSW;
> +        default:
> +          llvm_unreachable("Invalid vector type with conditional code");
> +      }
> +    case ISD::SETULT:
> +    case ISD::SETUGT:
> +    case ISD::SETUGE:
> +    case ISD::SETULE:
> +      switch (VecVT) {
> +        case MVT::v16i8: return PPC::VCMPGTUB;
> +        case MVT::v8i16: return PPC::VCMPGTUH;
> +        case MVT::v4i32: return PPC::VCMPGTUW;
> +        default:
> +          llvm_unreachable("Invalid vector type with conditional code");
> +      }
> +    case ISD::SETOEQ:
> +      switch (VecVT) {
> +        case MVT::v4f32: return PPC::VCMPEQFP;
> +        default:
> +          llvm_unreachable("Invalid vector type with conditional code");
> +      }
> +    case ISD::SETOLT:
> +    case ISD::SETOGT:
> +    case ISD::SETOLE:
> +      switch (VecVT) {
> +        case MVT::v4f32: return PPC::VCMPGTFP;
> +        default:
> +          llvm_unreachable("Invalid vector type with conditional code");
> +      }
> +    case ISD::SETOGE:
> +      switch (VecVT) {
> +        case MVT::v4f32: return PPC::VCMPGEFP;
> +        default:
> +          llvm_unreachable("Invalid vector type with conditional code");
> +      }
> +    default:
> +      llvm_unreachable("Invalid integer vector compare condition");
> +  }
> +}
> +
> +// getVCMPEQInst: return the equal compare instruction for the specified vector
> +// type. Since this is for altivec specific code, only support the altivec
> +// types (v16i8, v8i16, v4i32, and v4f32).
> +static unsigned int getVCMPEQInst(MVT::SimpleValueType VecVT) {
> +  switch (VecVT) {
> +    case MVT::v16i8:
> +      return PPC::VCMPEQUB;
> +    case MVT::v8i16:
> +      return PPC::VCMPEQUH;
> +    case MVT::v4i32:
> +      return PPC::VCMPEQUW;
> +    case MVT::v4f32:
> +      return PPC::VCMPEQFP;
> +    default:
> +      llvm_unreachable("Invalid integer vector compare condition");
> +  }
> +}
> +
> +
>  SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
>    DebugLoc dl = N->getDebugLoc();
>    unsigned Imm;
> @@ -706,20 +804,70 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
>    SDValue LHS = N->getOperand(0);
>    SDValue RHS = N->getOperand(1);
>  
> -  // Altivec Vector compare instructions do not set any CR register by default
> +  // Altivec Vector compare instructions do not set any CR register by default and
> +  // vector compare operations return the same type as the operands.
>    if (LHS.getValueType().isVector()) {
> -    unsigned int Opc;
> -    if (LHS.getValueType() == MVT::v16i8)
> -      Opc = PPC::VCMPEQUB;
> -    else if (LHS.getValueType() == MVT::v4i32)
> -      Opc = PPC::VCMPEQUW;
> -    else if (LHS.getValueType() == MVT::v8i16)
> -      Opc = PPC::VCMPEQUH;
> -    else if (LHS.getValueType() == MVT::v4f32)
> -      Opc = PPC::VCMPEQFP;
> -    else
> -      llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
> -    return CurDAG->SelectNodeTo(N, Opc, LHS.getValueType(), LHS, RHS);
> +    EVT VecVT = LHS.getValueType();
> +    MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
> +    unsigned int VSPLATInst = getVSPLATInst(VT);
> +    unsigned int VCMPInst = getVCMPInst(VT, CC);
> +
> +    // The VSET0 contains all bits sets to '0' while VMASK all '1'. They will be
> +    // used later on a 'vsel' instruction to select the result from the compare
> +    // instructions.
> +    SDValue VSET0(CurDAG->getMachineNode(PPC::V_SET0, dl, VecVT), 0);
> +    SDValue VMASK(CurDAG->getMachineNode(VSPLATInst, dl, VecVT, getI32Imm(-1)), 0);

The vcmpXXX instructions already emit lane masks. At most, one of these requires inverting the result. Materializing a 0 and a -1 value and using a vsel seems unnecessary.

> +    switch (CC) {
> +      case ISD::SETEQ:
> +      case ISD::SETOEQ:
> +      case ISD::SETUEQ: {
> +        SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> +        return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> +      }
> +      case ISD::SETNE:
> +      case ISD::SETONE:
> +      case ISD::SETUNE: {
> +        SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> +        return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VMASK, VSET0, VCMP);
> +      }
> +      case ISD::SETLT:
> +      case ISD::SETOLT:
> +      case ISD::SETULT: {
> +        SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, RHS, LHS), 0);
> +        return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> +      }
> +      case ISD::SETGT:
> +      case ISD::SETOGT:
> +      case ISD::SETUGT: {
> +        SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> +        return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> +      }
> +      case ISD::SETGE:
> +      case ISD::SETOGE:
> +      case ISD::SETUGE: {
> +        SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> +        // Small optimization: Altivec provides a 'Vector Compare Greater Than
> +        // or Equal To' instruction (vcmpgefp), so in this case there is no
> +        // need for extra logic for the equal compare.
> +        if (!VecVT.getSimpleVT().isFloatingPoint()) {
> +          unsigned int VCMPEQInst = getVCMPEQInst(VT);
> +          SDValue VCMPEQ(CurDAG->getMachineNode(VCMPEQInst, dl, VecVT, LHS, RHS), 0);
> +          VCMP = SDValue(CurDAG->getMachineNode(PPC::VOR, dl, VecVT, VCMP, VCMPEQ), 0);
> +        }
> +        return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> +      }
> +      case ISD::SETLE:
> +      case ISD::SETOLE:
> +      case ISD::SETULE: {
> +        SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, RHS, LHS), 0);
> +        unsigned int VCMPEQInst = getVCMPEQInst(VT);
> +        SDValue VCMPEQ(CurDAG->getMachineNode(VCMPEQInst, dl, VecVT, LHS, RHS), 0);
> +        VCMP = SDValue(CurDAG->getMachineNode(PPC::VOR, dl, VecVT, VCMP, VCMPEQ), 0);
> +        return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> +      }
> +      default:
> +        llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
> +    }
>    }
>  
>    bool Inv;
> diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
> index ba58c3e..9f406ae 100644
> --- a/lib/Target/PowerPC/PPCInstrAltivec.td
> +++ b/lib/Target/PowerPC/PPCInstrAltivec.td
> @@ -671,7 +671,7 @@ def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
>  def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
>            (VANDC VRRC:$A, VRRC:$B)>;
>  
> -def : Pat<(fmul VRRC:$vA, VRRC:$vB),
> +def : Pat<(v4f32 (fmul VRRC:$vA, VRRC:$vB)),
>            (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>; 

This is incorrect for an input of -0.0. You should add -0.0 to produce the correct results, at least without -ffast-math or similar choice. -0.0 can be materialized with a vslw of -1 by -1.

-----------------------------------------
Alex Rosenberg
Manager, Platform Architecture
Sony Computer Entertainment America, Inc.