[llvm-commits] [PATCH] PowerPC: More support for Altivec compare operations
Alex Rosenberg
alexr at leftfield.org
Wed Oct 24 16:23:45 PDT 2012
On Oct 24, 2012, at 7:49 AM, Adhemerval Zanella wrote:
> My previous patch for altivec compare support was incomplete, it didn't handle
> the other operators (!=, <, >, etc.). This patch add a better and more complete
> support for comparisons for altivec supported types (v16i8, v8i16, v4i32, and
> v4f32).
>
> The testcase also covers all the supported comparison operators for the altivec
> types.
>
> Any tips, suggestions, comments?
> From d129514d6fcde602109dcb27f108ca5642988685 Mon Sep 17 00:00:00 2001
> From: Adhemerval Zanella <azanella at linux.vnet.ibm.com>
> Date: Wed, 24 Oct 2012 12:43:56 -0200
> Subject: [PATCH] PowerPC: More support for Altivec compare operations
>
> This patch adds more support for vector type comparisons using altivec.
> It adds correct support for v16i8, v8i16, v4i32, and v4f32 vector types
> for comparison operators ==, !=, >, >=, <, and <=.
> ---
> lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 174 +++++++++++-
> lib/Target/PowerPC/PPCInstrAltivec.td | 2 +-
> test/CodeGen/PowerPC/vec_cmp.ll | 470 +++++++++++++++++++++++++++++++-
> 3 files changed, 617 insertions(+), 29 deletions(-)
>
> diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
> index 6195441..c3e89f0 100644
> --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
> +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
> @@ -623,6 +623,104 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
> }
> }
>
> +// getVSPLATInst: return the vector splat instruction based on the specified
> +// vector type. Since this is for altivec specific code, only support the
> +// altivec types (v16i8, v8i16, v4i32, and v4f32).
> +static unsigned int getVSPLATInst(MVT::SimpleValueType VecVT) {
> + switch (VecVT) {
> + case MVT::v16i8: return PPC::VSPLTISB;
> + case MVT::v8i16: return PPC::VSPLTISH;
> + case MVT::v4i32: return PPC::VSPLTISW;
> + case MVT::v4f32: return PPC::VSPLTISW;
> + default:
> + llvm_unreachable("Invalid vector type");
> + }
> +}
> +
> +// getVCMPInst: return the vector compare instruction for the specified
> +// vector type and condition code. Since this is for altivec specific code,
> +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
> +static unsigned int getVCMPInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) {
> + switch (CC) {
> + case ISD::SETEQ:
> + case ISD::SETUEQ:
> + case ISD::SETNE:
> + case ISD::SETUNE:
> + switch (VecVT) {
> + case MVT::v16i8: return PPC::VCMPEQUB;
> + case MVT::v8i16: return PPC::VCMPEQUH;
> + case MVT::v4i32: return PPC::VCMPEQUW;
> + // v4f32 != v4f32 could be translate to unordered not equal
> + case MVT::v4f32: return PPC::VCMPEQFP;
> + default:
> + llvm_unreachable("Invalid vector type with conditional code");
> + }
> + case ISD::SETLT:
> + case ISD::SETGT:
> + case ISD::SETLE:
> + case ISD::SETGE:
> + switch (VecVT) {
> + case MVT::v16i8: return PPC::VCMPGTSB;
> + case MVT::v8i16: return PPC::VCMPGTSH;
> + case MVT::v4i32: return PPC::VCMPGTSW;
> + default:
> + llvm_unreachable("Invalid vector type with conditional code");
> + }
> + case ISD::SETULT:
> + case ISD::SETUGT:
> + case ISD::SETUGE:
> + case ISD::SETULE:
> + switch (VecVT) {
> + case MVT::v16i8: return PPC::VCMPGTUB;
> + case MVT::v8i16: return PPC::VCMPGTUH;
> + case MVT::v4i32: return PPC::VCMPGTUW;
> + default:
> + llvm_unreachable("Invalid vector type with conditional code");
> + }
> + case ISD::SETOEQ:
> + switch (VecVT) {
> + case MVT::v4f32: return PPC::VCMPEQFP;
> + default:
> + llvm_unreachable("Invalid vector type with conditional code");
> + }
> + case ISD::SETOLT:
> + case ISD::SETOGT:
> + case ISD::SETOLE:
> + switch (VecVT) {
> + case MVT::v4f32: return PPC::VCMPGTFP;
> + default:
> + llvm_unreachable("Invalid vector type with conditional code");
> + }
> + case ISD::SETOGE:
> + switch (VecVT) {
> + case MVT::v4f32: return PPC::VCMPGEFP;
> + default:
> + llvm_unreachable("Invalid vector type with conditional code");
> + }
> + default:
> + llvm_unreachable("Invalid integer vector compare condition");
> + }
> +}
> +
> +// getVCMPEQInst: return the equal compare instruction for the specified vector
> +// type. Since this is for altivec specific code, only support the altivec
> +// types (v16i8, v8i16, v4i32, and v4f32).
> +static unsigned int getVCMPEQInst(MVT::SimpleValueType VecVT) {
> + switch (VecVT) {
> + case MVT::v16i8:
> + return PPC::VCMPEQUB;
> + case MVT::v8i16:
> + return PPC::VCMPEQUH;
> + case MVT::v4i32:
> + return PPC::VCMPEQUW;
> + case MVT::v4f32:
> + return PPC::VCMPEQFP;
> + default:
> + llvm_unreachable("Invalid integer vector compare condition");
> + }
> +}
> +
> +
> SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
> DebugLoc dl = N->getDebugLoc();
> unsigned Imm;
> @@ -706,20 +804,70 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
> SDValue LHS = N->getOperand(0);
> SDValue RHS = N->getOperand(1);
>
> - // Altivec Vector compare instructions do not set any CR register by default
> + // Altivec Vector compare instructions do not set any CR register by default and
> + // vector compare operations return the same type as the operands.
> if (LHS.getValueType().isVector()) {
> - unsigned int Opc;
> - if (LHS.getValueType() == MVT::v16i8)
> - Opc = PPC::VCMPEQUB;
> - else if (LHS.getValueType() == MVT::v4i32)
> - Opc = PPC::VCMPEQUW;
> - else if (LHS.getValueType() == MVT::v8i16)
> - Opc = PPC::VCMPEQUH;
> - else if (LHS.getValueType() == MVT::v4f32)
> - Opc = PPC::VCMPEQFP;
> - else
> - llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
> - return CurDAG->SelectNodeTo(N, Opc, LHS.getValueType(), LHS, RHS);
> + EVT VecVT = LHS.getValueType();
> + MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
> + unsigned int VSPLATInst = getVSPLATInst(VT);
> + unsigned int VCMPInst = getVCMPInst(VT, CC);
> +
> + // The VSET0 contains all bits sets to '0' while VMASK all '1'. They will be
> + // used later on a 'vsel' instruction to select the result from the compare
> + // instructions.
> + SDValue VSET0(CurDAG->getMachineNode(PPC::V_SET0, dl, VecVT), 0);
> + SDValue VMASK(CurDAG->getMachineNode(VSPLATInst, dl, VecVT, getI32Imm(-1)), 0);
The vcmpXXX instructions already emit lane masks. At most, one of these requires inverting the result. Materializing a 0 and a -1 value and using a vsel seems unnecessary.
> + switch (CC) {
> + case ISD::SETEQ:
> + case ISD::SETOEQ:
> + case ISD::SETUEQ: {
> + SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> + return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> + }
> + case ISD::SETNE:
> + case ISD::SETONE:
> + case ISD::SETUNE: {
> + SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> + return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VMASK, VSET0, VCMP);
> + }
> + case ISD::SETLT:
> + case ISD::SETOLT:
> + case ISD::SETULT: {
> + SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, RHS, LHS), 0);
> + return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> + }
> + case ISD::SETGT:
> + case ISD::SETOGT:
> + case ISD::SETUGT: {
> + SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> + return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> + }
> + case ISD::SETGE:
> + case ISD::SETOGE:
> + case ISD::SETUGE: {
> + SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, LHS, RHS), 0);
> + // Small optimization: Altivec provides a 'Vector Compare Greater Than
> + // or Equal To' instruction (vcmpgefp), so in this case there is no
> + // need for extra logic for the equal compare.
> + if (!VecVT.getSimpleVT().isFloatingPoint()) {
> + unsigned int VCMPEQInst = getVCMPEQInst(VT);
> + SDValue VCMPEQ(CurDAG->getMachineNode(VCMPEQInst, dl, VecVT, LHS, RHS), 0);
> + VCMP = SDValue(CurDAG->getMachineNode(PPC::VOR, dl, VecVT, VCMP, VCMPEQ), 0);
> + }
> + return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> + }
> + case ISD::SETLE:
> + case ISD::SETOLE:
> + case ISD::SETULE: {
> + SDValue VCMP(CurDAG->getMachineNode(VCMPInst, dl, VecVT, RHS, LHS), 0);
> + unsigned int VCMPEQInst = getVCMPEQInst(VT);
> + SDValue VCMPEQ(CurDAG->getMachineNode(VCMPEQInst, dl, VecVT, LHS, RHS), 0);
> + VCMP = SDValue(CurDAG->getMachineNode(PPC::VOR, dl, VecVT, VCMP, VCMPEQ), 0);
> + return CurDAG->SelectNodeTo(N, PPC::VSEL, VecVT, VSET0, VMASK, VCMP);
> + }
> + default:
> + llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
> + }
> }
>
> bool Inv;
> diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
> index ba58c3e..9f406ae 100644
> --- a/lib/Target/PowerPC/PPCInstrAltivec.td
> +++ b/lib/Target/PowerPC/PPCInstrAltivec.td
> @@ -671,7 +671,7 @@ def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
> def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
> (VANDC VRRC:$A, VRRC:$B)>;
>
> -def : Pat<(fmul VRRC:$vA, VRRC:$vB),
> +def : Pat<(v4f32 (fmul VRRC:$vA, VRRC:$vB)),
> (VMADDFP VRRC:$vA, VRRC:$vB, (v4i32 (V_SET0)))>;
This is incorrect for an input of -0.0. You should add -0.0 to produce the correct results, at least without -ffast-math or similar choice. -0.0 can be materialized with a vslw of -1 by -1.
-----------------------------------------
Alex Rosenberg
Manager, Platform Architecture
Sony Computer Entertainment America, Inc.
More information about the llvm-commits
mailing list