PATCH: SelectionDAG: More efficient legalization of unsupported SELECT_CC/SETCC condition codes + Fix NAN handling on R600
Tom Stellard
tom at stellard.net
Wed Sep 18 08:32:53 PDT 2013
Ping.
On Thu, Sep 12, 2013 at 02:54:31PM -0700, Tom Stellard wrote:
> Hi,
>
> The first three patches improve the legalization of SELECT_CC and SETCC
> nodes with illegal conditions. The current code legalizes
> conditions by lowering to AND/OR opcodes, but for targets like R600
> which support a limited number of conditions (oeq, oge, ogt, and une)
> it is usually better to legalize by either swapping the arguments or
> inverting the condition and swapping the true / false values.
>
> The last patch fixes the handling of NAN in comparison instructions for
> the R600 target.
>
> Please Review.
>
> -Tom
> From c6088b1cad0869d878613a1c4c71e774bd45874a Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Wed, 11 Sep 2013 09:41:21 -0700
> Subject: [PATCH 1/4] SelectionDAG: Clean up LegalizeSetCCCondCode() function
>
> Interpreting the results of this function is not very intuitive, so I
> cleaned it up to make it more clear whether or not a SETCC op was
> legalized and how it was legalized (either by swapping LHS and RHS or
> replacing with AND/OR).
>
> This patch does change functionality in the LHS and RHS swapping case,
> but unfortunately there are no in-tree tests for this. However, this
> patch is a prerequisite for R600 to take advantage of the LHS and RHS
> swapping, so tests will be added in subsequent commits.
> ---
> lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 77 +++++++++++++++++++++-----------
> 1 file changed, 51 insertions(+), 26 deletions(-)
>
> diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> index a252796..c58b8fd 100644
> --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -95,7 +95,7 @@ private:
> SDValue N1, SDValue N2,
> ArrayRef<int> Mask) const;
>
> - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
> + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
> SDLoc dl);
>
> SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
> @@ -1596,9 +1596,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
> }
>
> /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
> -/// condition code CC on the current target. This routine expands SETCC with
> -/// illegal condition code into AND / OR of multiple SETCC values.
> -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> +/// condition code CC on the current target.
> +/// If the SETCC has been legalized using AND / OR, then the legalized node
> +/// will be stored in LHS and RHS and CC will be set to SDValue().
> +/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
> +/// then the values of LHS and RHS will be swapped and CC will be set to the
> +/// new condition.
> +/// \returns true if the SetCC has been legalized, false if it hasn't.
> +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> SDValue &LHS, SDValue &RHS,
> SDValue &CC,
> SDLoc dl) {
> @@ -1659,10 +1664,9 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> // different manner of supporting expanding these cases.
> llvm_unreachable("Don't know how to expand this condition!");
> }
> - LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
> - RHS = SDValue();
> - CC = SDValue();
> - return;
> + std::swap(LHS, RHS);
> + CC = DAG.getCondCode(InvCC);
> + return true;
> }
>
> SDValue SetCC1, SetCC2;
> @@ -1679,9 +1683,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
> RHS = SDValue();
> CC = SDValue();
> - break;
> + return true;
> }
> }
> + return false;
> }
>
> /// EmitStackConvert - Emit a store/load combination to the stack. This stores
> @@ -3620,10 +3625,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
> Tmp1 = Node->getOperand(0);
> Tmp2 = Node->getOperand(1);
> Tmp3 = Node->getOperand(2);
> - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
> + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
> + Tmp3, dl);
> +
> + if (Legalized) {
> + // If we exapanded the SETCC by swapping LHS and RHS, create a new SETCC
> + // node..
> + if (Tmp3.getNode())
> + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
> + Tmp1, Tmp2, Tmp3);
>
> - // If we expanded the SETCC into an AND/OR, return the new node
> - if (Tmp2.getNode() == 0) {
> Results.push_back(Tmp1);
> break;
> }
> @@ -3654,14 +3665,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
> Tmp4 = Node->getOperand(3); // False
> SDValue CC = Node->getOperand(4);
>
> - LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
> - Tmp1, Tmp2, CC, dl);
> + bool Legalized = LegalizeSetCCCondCode(
> + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
>
> - assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
> - Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> - CC = DAG.getCondCode(ISD::SETNE);
> - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> - Tmp3, Tmp4, CC);
> + assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
> + // If we exapanded the SETCC by swapping LHS and RHS, create a new SELECT_CC
> + // node and return it.
> + if (CC.getNode()) {
> + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
> + Tmp1, Tmp2, Tmp3, Tmp4, CC);
> + } else {
> + Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> + CC = DAG.getCondCode(ISD::SETNE);
> + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> + Tmp3, Tmp4, CC);
> + }
> Results.push_back(Tmp1);
> break;
> }
> @@ -3671,14 +3689,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
> Tmp3 = Node->getOperand(3); // RHS
> Tmp4 = Node->getOperand(1); // CC
>
> - LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()),
> - Tmp2, Tmp3, Tmp4, dl);
> + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
> + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl);
> + assert(Legalized && "Can't legalize BR_CC with legal condition!");
>
> - assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
> - Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
> - Tmp4 = DAG.getCondCode(ISD::SETNE);
> - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
> - Tmp3, Node->getOperand(4));
> + // If we exapanded the SETCC by swapping LHS and RHS, create a new BR_CC
> + // node.
> + if (Tmp4.getNode()) {
> + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
> + Tmp4, Tmp2, Tmp3, Node->getOperand(4));
> + } else {
> + Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
> + Tmp4 = DAG.getCondCode(ISD::SETNE);
> + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
> + Tmp3, Node->getOperand(4));
> + }
> Results.push_back(Tmp1);
> break;
> }
> --
> 1.7.11.4
>
> From b591d35726cc34cba57e59a98e07b552f990e2ef Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Tue, 10 Sep 2013 19:32:55 -0700
> Subject: [PATCH 2/4] SelectionDAG: Try to expand all condition codes using
> getCCSwappedOperands()
>
> This is useful for targets like R600, which only support GT, GE, NE, and EQ
> condition codes as it removes the need to handle unsupported condition
> codes in target specific code.
>
> There are no tests with this commit, but R600 has been updated to take
> advantage of this new feature, so its existing selectcc tests are now
> testing the swapped operands path.
> ---
> lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 19 +++----
> lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 7 ++-
> lib/CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++++++---
> lib/Target/R600/R600ISelLowering.cpp | 86 +++++++++++++++++++----------
> lib/Target/R600/R600Instructions.td | 48 ----------------
> 5 files changed, 91 insertions(+), 94 deletions(-)
>
> diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> index c58b8fd..f6406b2 100644
> --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -1615,8 +1615,13 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> // Nothing to do.
> break;
> case TargetLowering::Expand: {
> + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
> + if (TLI.isCondCodeLegal(InvCC, OpVT)) {
> + std::swap(LHS, RHS);
> + CC = DAG.getCondCode(InvCC);
> + return true;
> + }
> ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
> - ISD::CondCode InvCC = ISD::SETCC_INVALID;
> unsigned Opc = 0;
> switch (CCCode) {
> default: llvm_unreachable("Don't know how to expand this condition!");
> @@ -1658,15 +1663,9 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> case ISD::SETLT:
> case ISD::SETNE:
> case ISD::SETEQ:
> - InvCC = ISD::getSetCCSwappedOperands(CCCode);
> - if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
> - // We only support using the inverted operation and not a
> - // different manner of supporting expanding these cases.
> - llvm_unreachable("Don't know how to expand this condition!");
> - }
> - std::swap(LHS, RHS);
> - CC = DAG.getCondCode(InvCC);
> - return true;
> + // We only support using the inverted operation, which is computed above
> + // and not a different manner of supporting expanding these cases.
> + llvm_unreachable("Don't know how to expand this condition!");
> }
>
> SDValue SetCC1, SetCC2;
> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> index 845b9a3..f7836d3 100644
> --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -1645,7 +1645,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
> }
> } else {
> // Ensure that the constant occurs on the RHS.
> - return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
> + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
> + MVT CompVT = N1.getValueType().getSimpleVT();
> + if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
> + return SDValue();
> +
> + return getSetCC(dl, VT, N2, N1, SwappedCond);
> }
> }
>
> diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> index f2199d7..2d70d7d 100644
> --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> @@ -1089,8 +1089,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
>
> // Ensure that the constant occurs on the RHS, and fold constant
> // comparisons.
> - if (isa<ConstantSDNode>(N0.getNode()))
> - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
> + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
> + if (isa<ConstantSDNode>(N0.getNode()) &&
> + (DCI.isBeforeLegalizeOps() ||
> + isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
> + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
>
> if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
> const APInt &C1 = N1C->getAPIntValue();
> @@ -1329,7 +1332,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
> ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
> CC = ISD::getSetCCInverse(CC,
> N0.getOperand(0).getValueType().isInteger());
> - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
> + if (DCI.isBeforeLegalizeOps() ||
> + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
> + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
> }
>
> if ((N0.getOpcode() == ISD::XOR ||
> @@ -1766,16 +1771,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
> if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
> if (ValueHasExactlyOneBitSet(N1, DAG)) {
> Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
> - SDValue Zero = DAG.getConstant(0, N1.getValueType());
> - return DAG.getSetCC(dl, VT, N0, Zero, Cond);
> + if (DCI.isBeforeLegalizeOps() ||
> + isCondCodeLegal(Cond, N0.getSimpleValueType())) {
> + SDValue Zero = DAG.getConstant(0, N1.getValueType());
> + return DAG.getSetCC(dl, VT, N0, Zero, Cond);
> + }
> }
> }
> if (N1.getOpcode() == ISD::AND)
> if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
> if (ValueHasExactlyOneBitSet(N0, DAG)) {
> Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
> - SDValue Zero = DAG.getConstant(0, N0.getValueType());
> - return DAG.getSetCC(dl, VT, N1, Zero, Cond);
> + if (DCI.isBeforeLegalizeOps() ||
> + isCondCodeLegal(Cond, N1.getSimpleValueType())) {
> + SDValue Zero = DAG.getConstant(0, N0.getValueType());
> + return DAG.getSetCC(dl, VT, N1, Zero, Cond);
> + }
> }
> }
> }
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index ff9ba52..778ee59 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -38,6 +38,18 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>
> computeRegisterProperties();
>
> + setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
> +
> + setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
> + setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
> + setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
> + setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
> +
> setOperationAction(ISD::FCOS, MVT::f32, Custom);
> setOperationAction(ISD::FSIN, MVT::f32, Custom);
>
> @@ -841,16 +853,19 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
> //
> // SET* can match the following patterns:
> //
> - // select_cc f32, f32, -1, 0, cc_any
> - // select_cc f32, f32, 1.0f, 0.0f, cc_any
> - // select_cc i32, i32, -1, 0, cc_any
> + // select_cc f32, f32, -1, 0, cc_supported
> + // select_cc f32, f32, 1.0f, 0.0f, cc_supported
> + // select_cc i32, i32, -1, 0, cc_supported
> //
>
> // Move hardware True/False values to the correct operand.
> - if (isHWTrueValue(False) && isHWFalseValue(True)) {
> - ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> + ISD::CondCode InverseCC =
> + ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> + if (isHWTrueValue(False) && isHWFalseValue(True) &&
> + isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
> std::swap(False, True);
> - CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
> + CC = DAG.getCondCode(InverseCC);
> }
>
> if (isHWTrueValue(True) && isHWFalseValue(False) &&
> @@ -863,14 +878,34 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
> //
> // CND* can match the following patterns:
> //
> - // select_cc f32, 0.0, f32, f32, cc_any
> - // select_cc f32, 0.0, i32, i32, cc_any
> - // select_cc i32, 0, f32, f32, cc_any
> - // select_cc i32, 0, i32, i32, cc_any
> + // select_cc f32, 0.0, f32, f32, cc_supported
> + // select_cc f32, 0.0, i32, i32, cc_supported
> + // select_cc i32, 0, f32, f32, cc_supported
> + // select_cc i32, 0, i32, i32, cc_supported
> //
> - if (isZero(LHS) || isZero(RHS)) {
> - SDValue Cond = (isZero(LHS) ? RHS : LHS);
> - SDValue Zero = (isZero(LHS) ? LHS : RHS);
> +
> + // Try to move the zero value to the RHS
> + if (isZero(LHS)) {
> + ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> + // Try swapping the operands
> + ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
> + if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
> + std::swap(LHS, RHS);
> + CC = DAG.getCondCode(CCSwapped);
> + } else {
> + // Try inverting the conditon and then swapping the operands
> + ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
> + CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
> + if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
> + std::swap(True, False);
> + std::swap(LHS, RHS);
> + CC = DAG.getCondCode(CCSwapped);
> + }
> + }
> + }
> + if (isZero(RHS)) {
> + SDValue Cond = LHS;
> + SDValue Zero = RHS;
> ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> if (CompareVT != VT) {
> // Bitcast True / False to the correct types. This will end up being
> @@ -880,20 +915,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
> True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
> False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
> }
> - if (isZero(LHS)) {
> - CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
> - }
>
> switch (CCOpcode) {
> case ISD::SETONE:
> case ISD::SETUNE:
> case ISD::SETNE:
> - case ISD::SETULE:
> - case ISD::SETULT:
> - case ISD::SETOLE:
> - case ISD::SETOLT:
> - case ISD::SETLE:
> - case ISD::SETLT:
> CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> Temp = True;
> True = False;
> @@ -1567,14 +1593,18 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
> ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
> LHSCC = ISD::getSetCCInverse(LHSCC,
> LHS.getOperand(0).getValueType().isInteger());
> - return DAG.getSelectCC(SDLoc(N),
> - LHS.getOperand(0),
> - LHS.getOperand(1),
> - LHS.getOperand(2),
> - LHS.getOperand(3),
> - LHSCC);
> + if (DCI.isBeforeLegalizeOps() ||
> + isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
> + return DAG.getSelectCC(SDLoc(N),
> + LHS.getOperand(0),
> + LHS.getOperand(1),
> + LHS.getOperand(2),
> + LHS.getOperand(3),
> + LHSCC);
> + break;
> }
> }
> + return SDValue();
> }
>
> case AMDGPUISD::EXPORT: {
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 24bc6b0..65ea04b 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -2324,54 +2324,6 @@ def KIL : Pat <
> (MASK_WRITE (KILLGT (f32 ZERO), $src0))
> >;
>
> -// SGT Reverse args
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
> - (SGT $src1, $src0)
> ->;
> -
> -// SGE Reverse args
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
> - (SGE $src1, $src0)
> ->;
> -
> -// SETGT_DX10 reverse args
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
> - (SETGT_DX10 $src1, $src0)
> ->;
> -
> -// SETGE_DX10 reverse args
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
> - (SETGE_DX10 $src1, $src0)
> ->;
> -
> -// SETGT_INT reverse args
> -def : Pat <
> - (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
> - (SETGT_INT $src1, $src0)
> ->;
> -
> -// SETGE_INT reverse args
> -def : Pat <
> - (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
> - (SETGE_INT $src1, $src0)
> ->;
> -
> -// SETGT_UINT reverse args
> -def : Pat <
> - (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
> - (SETGT_UINT $src1, $src0)
> ->;
> -
> -// SETGE_UINT reverse args
> -def : Pat <
> - (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
> - (SETGE_UINT $src1, $src0)
> ->;
> -
> // The next two patterns are special cases for handling 'true if ordered' and
> // 'true if unordered' conditionals. The assumption here is that the behavior of
> // SETE and SNE conforms to the Direct3D 10 rules for floating point values
> --
> 1.7.11.4
>
> From d62adcc8de93d4986535c2cd4ef5113a2afdc676 Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Thu, 12 Sep 2013 08:18:29 -0700
> Subject: [PATCH 3/4] SelectionDAG: Improve legalization of SELECT_CC with
> illegal condition codes
>
> SelectionDAG will now attempt to inverse an illegal conditon in order to
> find a legal one and if that doesn't work, it will attempt to swap the
> operands using the inverted condition.
>
> There are no new test cases for this, but a nubmer of the existing R600
> tests hit this path.
> ---
> lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 +++++++++++++++++++++++---------
> lib/Target/R600/R600ISelLowering.cpp | 16 +++++++---
> 2 files changed, 49 insertions(+), 17 deletions(-)
>
> diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> index f6406b2..e78caba 100644
> --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -3664,20 +3664,44 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
> Tmp4 = Node->getOperand(3); // False
> SDValue CC = Node->getOperand(4);
>
> - bool Legalized = LegalizeSetCCCondCode(
> - getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
> -
> - assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
> - // If we exapanded the SETCC by swapping LHS and RHS, create a new SELECT_CC
> - // node and return it.
> - if (CC.getNode()) {
> - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
> - Tmp1, Tmp2, Tmp3, Tmp4, CC);
> + bool Legalized = false;
> + // Try to legalize by inverting the condition. This is for targets that
> + // might support an ordered version of a condition, but not the unordered
> + // version (or vice versa).
> + ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
> + Tmp1.getValueType().isInteger());
> + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
> + // Use the new condition code and swap true and false
> + Legalized = true;
> + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
> } else {
> - Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> - CC = DAG.getCondCode(ISD::SETNE);
> - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> - Tmp3, Tmp4, CC);
> + // If The inverse is not legal, then try to swap the arguments using
> + // the inverse condition code.
> + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
> + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
> + // The swapped inverse condition is legal, so swap true and false,
> + // lhs and rhs.
> + Legalized = true;
> + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
> + }
> + }
> +
> + if (!Legalized) {
> + Legalized = LegalizeSetCCCondCode(
> + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
> +
> + assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
> + // If we exapanded the SETCC by swapping LHS and RHS, create a new
> + // SELECT_CC node.
> + if (CC.getNode()) {
> + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
> + Tmp1, Tmp2, Tmp3, Tmp4, CC);
> + } else {
> + Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> + CC = DAG.getCondCode(ISD::SETNE);
> + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> + Tmp3, Tmp4, CC);
> + }
> }
> Results.push_back(Tmp1);
> break;
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index 778ee59..6a02bdb 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -862,10 +862,18 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
> ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> ISD::CondCode InverseCC =
> ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> - if (isHWTrueValue(False) && isHWFalseValue(True) &&
> - isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
> - std::swap(False, True);
> - CC = DAG.getCondCode(InverseCC);
> + if (isHWTrueValue(False) && isHWFalseValue(True)) {
> + if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
> + std::swap(False, True);
> + CC = DAG.getCondCode(InverseCC);
> + } else {
> + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
> + if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
> + std::swap(False, True);
> + std::swap(LHS, RHS);
> + CC = DAG.getCondCode(SwapInvCC);
> + }
> + }
> }
>
> if (isHWTrueValue(True) && isHWFalseValue(False) &&
> --
> 1.7.11.4
>
> From 859f3f6346bb8b5d7bfb181cca9f103ac18f116f Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Thu, 12 Sep 2013 08:18:38 -0700
> Subject: [PATCH 4/4] R600: Fix handling of NAN in comparison instructions
>
> We were completely ignoring the unorder/ordered attributes of condition
> codes and also incorrectly lowering seto and setuo.
> ---
> lib/Target/R600/AMDGPUInstructions.td | 21 ++++++++++++
> lib/Target/R600/R600ISelLowering.cpp | 9 +++++-
> lib/Target/R600/R600Instructions.td | 54 +++++++------------------------
> test/CodeGen/R600/fmax.ll | 2 +-
> test/CodeGen/R600/kcache-fold.ll | 16 +++++-----
> test/CodeGen/R600/pv.ll | 2 +-
> test/CodeGen/R600/selectcc-opt.ll | 4 +--
> test/CodeGen/R600/set-dx10.ll | 60 +++++++++++++++++------------------
> test/CodeGen/R600/unsupported-cc.ll | 60 +++++++++++++++++++++++++----------
> test/CodeGen/R600/vselect.ll | 4 +--
> 10 files changed, 127 insertions(+), 105 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index e30abc0..5778a8c 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -43,12 +43,23 @@ def COND_EQ : PatLeaf <
> case ISD::SETEQ: return true;}}}]
> >;
>
> +def COND_OEQ : PatLeaf <
> + (cond),
> + [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
> +>;
> +
> def COND_NE : PatLeaf <
> (cond),
> [{switch(N->get()){{default: return false;
> case ISD::SETONE: case ISD::SETUNE:
> case ISD::SETNE: return true;}}}]
> >;
> +
> +def COND_UNE : PatLeaf <
> + (cond),
> + [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
> +>;
> +
> def COND_GT : PatLeaf <
> (cond),
> [{switch(N->get()){{default: return false;
> @@ -56,6 +67,11 @@ def COND_GT : PatLeaf <
> case ISD::SETGT: return true;}}}]
> >;
>
> +def COND_OGT : PatLeaf <
> + (cond),
> + [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
> +>;
> +
> def COND_GE : PatLeaf <
> (cond),
> [{switch(N->get()){{default: return false;
> @@ -63,6 +79,11 @@ def COND_GE : PatLeaf <
> case ISD::SETGE: return true;}}}]
> >;
>
> +def COND_OGE : PatLeaf <
> + (cond),
> + [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
> +>;
> +
> def COND_LT : PatLeaf <
> (cond),
> [{switch(N->get()){{default: return false;
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index 6a02bdb..a8e8a77 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -38,10 +38,17 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>
> computeRegisterProperties();
>
> - setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
> + // Set condition code actions
> + setCondCodeAction(ISD::SETO, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
> setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
> setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
> setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
> + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
> setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
> setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
>
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 65ea04b..e92385d 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -689,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
> // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
> def SETE : R600_2OP <
> 0x08, "SETE",
> - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
> + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
> >;
>
> def SGT : R600_2OP <
> 0x09, "SETGT",
> - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
> + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
> >;
>
> def SGE : R600_2OP <
> 0xA, "SETGE",
> - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
> + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
> >;
>
> def SNE : R600_2OP <
> 0xB, "SETNE",
> - [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
> + [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
> >;
>
> def SETE_DX10 : R600_2OP <
> 0xC, "SETE_DX10",
> - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
> + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
> >;
>
> def SETGT_DX10 : R600_2OP <
> 0xD, "SETGT_DX10",
> - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
> + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
> >;
>
> def SETGE_DX10 : R600_2OP <
> 0xE, "SETGE_DX10",
> - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
> + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
> >;
>
> def SETNE_DX10 : R600_2OP <
> 0xF, "SETNE_DX10",
> - [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
> + [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
> >;
>
> def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
> @@ -920,19 +920,19 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
>
> class CNDE_Common <bits<5> inst> : R600_3OP <
> inst, "CNDE",
> - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
> + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
> >;
>
> class CNDGT_Common <bits<5> inst> : R600_3OP <
> inst, "CNDGT",
> - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
> + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
> > {
> let Itinerary = VecALU;
> }
>
> class CNDGE_Common <bits<5> inst> : R600_3OP <
> inst, "CNDGE",
> - [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
> + [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
> > {
> let Itinerary = VecALU;
> }
> @@ -2324,38 +2324,6 @@ def KIL : Pat <
> (MASK_WRITE (KILLGT (f32 ZERO), $src0))
> >;
>
> -// The next two patterns are special cases for handling 'true if ordered' and
> -// 'true if unordered' conditionals. The assumption here is that the behavior of
> -// SETE and SNE conforms to the Direct3D 10 rules for floating point values
> -// described here:
> -// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
> -// We assume that SETE returns false when one of the operands is NAN and
> -// SNE returns true when on of the operands is NAN
> -
> -//SETE - 'true if ordered'
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
> - (SETE $src0, $src1)
> ->;
> -
> -//SETE_DX10 - 'true if ordered'
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
> - (SETE_DX10 $src0, $src1)
> ->;
> -
> -//SNE - 'true if unordered'
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
> - (SNE $src0, $src1)
> ->;
> -
> -//SETNE_DX10 - 'true if ordered'
> -def : Pat <
> - (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
> - (SETNE_DX10 $src0, $src1)
> ->;
> -
> def : Extract_Element <f32, v4f32, 0, sub0>;
> def : Extract_Element <f32, v4f32, 1, sub1>;
> def : Extract_Element <f32, v4f32, 2, sub2>;
> diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
> index 8b704e5..be25c9c 100644
> --- a/test/CodeGen/R600/fmax.ll
> +++ b/test/CodeGen/R600/fmax.ll
> @@ -5,7 +5,7 @@
> define void @test() {
> %r0 = call float @llvm.R600.load.input(i32 0)
> %r1 = call float @llvm.R600.load.input(i32 1)
> - %r2 = fcmp uge float %r0, %r1
> + %r2 = fcmp oge float %r0, %r1
> %r3 = select i1 %r2, float %r0, float %r1
> call void @llvm.AMDGPU.store.output(float %r3, i32 0)
> ret void
> diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
> index 8bdb050..0baa3cd 100644
> --- a/test/CodeGen/R600/kcache-fold.ll
> +++ b/test/CodeGen/R600/kcache-fold.ll
> @@ -10,7 +10,7 @@ main_body:
> %3 = extractelement <4 x float> %2, i32 0
> %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
> %5 = extractelement <4 x float> %4, i32 0
> - %6 = fcmp ult float %1, 0.000000e+00
> + %6 = fcmp ogt float %1, 0.000000e+00
> %7 = select i1 %6, float %3, float %5
> %8 = load <4 x float> addrspace(8)* null
> %9 = extractelement <4 x float> %8, i32 1
> @@ -18,7 +18,7 @@ main_body:
> %11 = extractelement <4 x float> %10, i32 1
> %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
> %13 = extractelement <4 x float> %12, i32 1
> - %14 = fcmp ult float %9, 0.000000e+00
> + %14 = fcmp ogt float %9, 0.000000e+00
> %15 = select i1 %14, float %11, float %13
> %16 = load <4 x float> addrspace(8)* null
> %17 = extractelement <4 x float> %16, i32 2
> @@ -26,7 +26,7 @@ main_body:
> %19 = extractelement <4 x float> %18, i32 2
> %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
> %21 = extractelement <4 x float> %20, i32 2
> - %22 = fcmp ult float %17, 0.000000e+00
> + %22 = fcmp ogt float %17, 0.000000e+00
> %23 = select i1 %22, float %19, float %21
> %24 = load <4 x float> addrspace(8)* null
> %25 = extractelement <4 x float> %24, i32 3
> @@ -34,7 +34,7 @@ main_body:
> %27 = extractelement <4 x float> %26, i32 3
> %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
> %29 = extractelement <4 x float> %28, i32 3
> - %30 = fcmp ult float %25, 0.000000e+00
> + %30 = fcmp ogt float %25, 0.000000e+00
> %31 = select i1 %30, float %27, float %29
> %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
> %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
> @@ -58,7 +58,7 @@ main_body:
> %3 = extractelement <4 x float> %2, i32 0
> %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
> %5 = extractelement <4 x float> %4, i32 1
> - %6 = fcmp ult float %1, 0.000000e+00
> + %6 = fcmp ogt float %1, 0.000000e+00
> %7 = select i1 %6, float %3, float %5
> %8 = load <4 x float> addrspace(8)* null
> %9 = extractelement <4 x float> %8, i32 1
> @@ -66,7 +66,7 @@ main_body:
> %11 = extractelement <4 x float> %10, i32 0
> %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
> %13 = extractelement <4 x float> %12, i32 1
> - %14 = fcmp ult float %9, 0.000000e+00
> + %14 = fcmp ogt float %9, 0.000000e+00
> %15 = select i1 %14, float %11, float %13
> %16 = load <4 x float> addrspace(8)* null
> %17 = extractelement <4 x float> %16, i32 2
> @@ -74,7 +74,7 @@ main_body:
> %19 = extractelement <4 x float> %18, i32 3
> %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
> %21 = extractelement <4 x float> %20, i32 2
> - %22 = fcmp ult float %17, 0.000000e+00
> + %22 = fcmp ogt float %17, 0.000000e+00
> %23 = select i1 %22, float %19, float %21
> %24 = load <4 x float> addrspace(8)* null
> %25 = extractelement <4 x float> %24, i32 3
> @@ -82,7 +82,7 @@ main_body:
> %27 = extractelement <4 x float> %26, i32 3
> %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
> %29 = extractelement <4 x float> %28, i32 2
> - %30 = fcmp ult float %25, 0.000000e+00
> + %30 = fcmp ogt float %25, 0.000000e+00
> %31 = select i1 %30, float %27, float %29
> %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
> %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
> diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
> index 6e0b744..6d9396c 100644
> --- a/test/CodeGen/R600/pv.ll
> +++ b/test/CodeGen/R600/pv.ll
> @@ -1,7 +1,7 @@
> ; RUN: llc < %s -march=r600 | FileCheck %s
>
> ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
> -;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X
> +;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
>
> define void @main() #0 {
> main_body:
> diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
> index 7e2d559..834c030 100644
> --- a/test/CodeGen/R600/selectcc-opt.ll
> +++ b/test/CodeGen/R600/selectcc-opt.ll
> @@ -6,7 +6,7 @@
>
> define void @test_a(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ult float %in, 0.000000e+00
> + %0 = fcmp olt float %in, 0.000000e+00
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> @@ -34,7 +34,7 @@ ENDIF:
> ; CHECK-NEXT: ALU clause starting
> define void @test_b(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ult float %in, 0.0
> + %0 = fcmp olt float %in, 0.0
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
> index bdc2ff4..5c7d499 100644
> --- a/test/CodeGen/R600/set-dx10.ll
> +++ b/test/CodeGen/R600/set-dx10.ll
> @@ -30,13 +30,13 @@ entry:
> ret void
> }
>
> -; CHECK: @fcmp_ueq_select_fptosi
> +; CHECK: @fcmp_oeq_select_fptosi
> ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ueq float %in, 5.0
> + %0 = fcmp oeq float %in, 5.0
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> @@ -44,25 +44,25 @@ entry:
> ret void
> }
>
> -; CHECK: @fcmp_ueq_select_i32
> +; CHECK: @fcmp_oeq_select_i32
> ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ueq float %in, 5.0
> + %0 = fcmp oeq float %in, 5.0
> %1 = select i1 %0, i32 -1, i32 0
> store i32 %1, i32 addrspace(1)* %out
> ret void
> }
>
> -; CHECK: @fcmp_ugt_select_fptosi
> +; CHECK: @fcmp_ogt_select_fptosi
> ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ugt float %in, 5.0
> + %0 = fcmp ogt float %in, 5.0
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> @@ -70,25 +70,25 @@ entry:
> ret void
> }
>
> -; CHECK: @fcmp_ugt_select_i32
> +; CHECK: @fcmp_ogt_select_i32
> ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ugt float %in, 5.0
> + %0 = fcmp ogt float %in, 5.0
> %1 = select i1 %0, i32 -1, i32 0
> store i32 %1, i32 addrspace(1)* %out
> ret void
> }
>
> -; CHECK: @fcmp_uge_select_fptosi
> +; CHECK: @fcmp_oge_select_fptosi
> ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp uge float %in, 5.0
> + %0 = fcmp oge float %in, 5.0
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> @@ -96,25 +96,25 @@ entry:
> ret void
> }
>
> -; CHECK: @fcmp_uge_select_i32
> +; CHECK: @fcmp_oge_select_i32
> ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp uge float %in, 5.0
> + %0 = fcmp oge float %in, 5.0
> %1 = select i1 %0, i32 -1, i32 0
> store i32 %1, i32 addrspace(1)* %out
> ret void
> }
>
> -; CHECK: @fcmp_ule_select_fptosi
> +; CHECK: @fcmp_ole_select_fptosi
> ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ule float %in, 5.0
> + %0 = fcmp ole float %in, 5.0
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> @@ -122,25 +122,25 @@ entry:
> ret void
> }
>
> -; CHECK: @fcmp_ule_select_i32
> +; CHECK: @fcmp_ole_select_i32
> ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ule float %in, 5.0
> + %0 = fcmp ole float %in, 5.0
> %1 = select i1 %0, i32 -1, i32 0
> store i32 %1, i32 addrspace(1)* %out
> ret void
> }
>
> -; CHECK: @fcmp_ult_select_fptosi
> +; CHECK: @fcmp_olt_select_fptosi
> ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ult float %in, 5.0
> + %0 = fcmp olt float %in, 5.0
> %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
> %2 = fsub float -0.000000e+00, %1
> %3 = fptosi float %2 to i32
> @@ -148,13 +148,13 @@ entry:
> ret void
> }
>
> -; CHECK: @fcmp_ult_select_i32
> +; CHECK: @fcmp_olt_select_i32
> ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
> entry:
> - %0 = fcmp ult float %in, 5.0
> + %0 = fcmp olt float %in, 5.0
> %1 = select i1 %0, i32 -1, i32 0
> store i32 %1, i32 addrspace(1)* %out
> ret void
> diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
> index d3aa060..f986a02 100644
> --- a/test/CodeGen/R600/unsupported-cc.ll
> +++ b/test/CodeGen/R600/unsupported-cc.ll
> @@ -2,7 +2,7 @@
>
> ; These tests are for condition codes that are not supported by the hardware
>
> -; CHECK: @slt
> +; CHECK-LABEL: @slt
> ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 5(7.006492e-45)
> @@ -14,7 +14,7 @@ entry:
> ret void
> }
>
> -; CHECK: @ult_i32
> +; CHECK-LABEL: @ult_i32
> ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 5(7.006492e-45)
> @@ -26,10 +26,11 @@ entry:
> ret void
> }
>
> -; CHECK: @ult_float
> -; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> +; CHECK-LABEL: @ult_float
> +; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> +; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
> +; CHECK-NEXT: LSHR *
> define void @ult_float(float addrspace(1)* %out, float %in) {
> entry:
> %0 = fcmp ult float %in, 5.0
> @@ -38,10 +39,22 @@ entry:
> ret void
> }
>
> -; CHECK: @olt
> -; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> -;CHECK-NEXT: 1084227584(5.000000e+00)
> +; CHECK-LABEL: @ult_float_native
> +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
> +; CHECK-NEXT: LSHR *
> +; CHECK-NEXT: 1084227584(5.000000e+00)
> +define void @ult_float_native(float addrspace(1)* %out, float %in) {
> +entry:
> + %0 = fcmp ult float %in, 5.0
> + %1 = select i1 %0, float 0.0, float 1.0
> + store float %1, float addrspace(1)* %out
> + ret void
> +}
> +
> +; CHECK-LABEL: @olt
> +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> +; CHECK-NEXT: LSHR *
> +; CHECK-NEXT: 1084227584(5.000000e+00)
> define void @olt(float addrspace(1)* %out, float %in) {
> entry:
> %0 = fcmp olt float %in, 5.0
> @@ -50,7 +63,7 @@ entry:
> ret void
> }
>
> -; CHECK: @sle
> +; CHECK-LABEL: @sle
> ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 6(8.407791e-45)
> @@ -62,7 +75,7 @@ entry:
> ret void
> }
>
> -; CHECK: @ule_i32
> +; CHECK-LABEL: @ule_i32
> ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> ; CHECK-NEXT: LSHR
> ; CHECK-NEXT: 6(8.407791e-45)
> @@ -74,10 +87,11 @@ entry:
> ret void
> }
>
> -; CHECK: @ule_float
> -; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> +; CHECK-LABEL: @ule_float
> +; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
> ; CHECK-NEXT: 1084227584(5.000000e+00)
> +; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
> +; CHECK-NEXT: LSHR *
> define void @ule_float(float addrspace(1)* %out, float %in) {
> entry:
> %0 = fcmp ule float %in, 5.0
> @@ -86,9 +100,21 @@ entry:
> ret void
> }
>
> -; CHECK: @ole
> -; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> +; CHECK-LABEL: @ule_float_native
> +; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
> +; CHECK-NEXT: LSHR *
> +; CHECK-NEXT: 1084227584(5.000000e+00)
> +define void @ule_float_native(float addrspace(1)* %out, float %in) {
> +entry:
> + %0 = fcmp ule float %in, 5.0
> + %1 = select i1 %0, float 0.0, float 1.0
> + store float %1, float addrspace(1)* %out
> + ret void
> +}
> +
> +; CHECK-LABEL: @ole
> +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
> +; CHECK-NEXT: LSHR *
> ; CHECK-NEXT:1084227584(5.000000e+00)
> define void @ole(float addrspace(1)* %out, float %in) {
> entry:
> diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
> index 8e9c5b5..ee17e0f 100644
> --- a/test/CodeGen/R600/vselect.ll
> +++ b/test/CodeGen/R600/vselect.ll
> @@ -31,7 +31,7 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs
> entry:
> %0 = load <2 x float> addrspace(1)* %in0
> %1 = load <2 x float> addrspace(1)* %in1
> - %cmp = fcmp one <2 x float> %0, %1
> + %cmp = fcmp une <2 x float> %0, %1
> %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
> store <2 x float> %result, <2 x float> addrspace(1)* %out
> ret void
> @@ -69,7 +69,7 @@ define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrs
> entry:
> %0 = load <4 x float> addrspace(1)* %in0
> %1 = load <4 x float> addrspace(1)* %in1
> - %cmp = fcmp one <4 x float> %0, %1
> + %cmp = fcmp une <4 x float> %0, %1
> %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
> store <4 x float> %result, <4 x float> addrspace(1)* %out
> ret void
> --
> 1.7.11.4
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list