PATCH: SelectionDAG: More efficient legalization of unsupported SELECT_CC/SETCC condition codes + Fix NAN handling on R600

Tom Stellard tom at stellard.net
Wed Sep 18 08:32:53 PDT 2013


Ping.

On Thu, Sep 12, 2013 at 02:54:31PM -0700, Tom Stellard wrote:
> Hi,
> 
> The first three patches improve the legalization of SELECT_CC and SETCC
> nodes with illegal conditions.  The current code legalizes
> conditions by lowering to AND/OR opcodes, but for targets like R600
> which support a limited number of conditions (oeq, oge, ogt, and une)
> it is usually better to legalize by either swapping the arguments or
> inverting the condition and swapping the true / false values.
> 
> The last patch fixes the handling of NAN in comparison instructions for
> the R600 target.
> 
> Please Review.
> 
> -Tom

> From c6088b1cad0869d878613a1c4c71e774bd45874a Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Wed, 11 Sep 2013 09:41:21 -0700
> Subject: [PATCH 1/4] SelectionDAG: Clean up LegalizeSetCCCondCode() function
> 
> Interpreting the results of this function is not very intuitive, so I
> cleaned it up to make it more clear whether or not a SETCC op was
> legalized and how it was legalized (either by swapping LHS and RHS or
> replacing with AND/OR).
> 
> This patch does change functionality in the LHS and RHS swapping case,
> but unfortunately there are no in-tree tests for this.  However, this
> patch is a prerequisite for R600 to take advantage of the LHS and RHS
> swapping, so tests will be added in subsequent commits.
> ---
>  lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 77 +++++++++++++++++++++-----------
>  1 file changed, 51 insertions(+), 26 deletions(-)
> 
> diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> index a252796..c58b8fd 100644
> --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -95,7 +95,7 @@ private:
>                                       SDValue N1, SDValue N2,
>                                       ArrayRef<int> Mask) const;
>  
> -  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
> +  bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
>                               SDLoc dl);
>  
>    SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
> @@ -1596,9 +1596,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
>  }
>  
>  /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
> -/// condition code CC on the current target. This routine expands SETCC with
> -/// illegal condition code into AND / OR of multiple SETCC values.
> -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
> +/// condition code CC on the current target.
> +/// If the SETCC has been legalized using AND / OR, then the legalized node
> +/// will be stored in LHS and RHS and CC will be set to SDValue().
> +/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
> +/// then the values of LHS and RHS will be swapped and CC will be set to the
> +/// new condition.
> +/// \returns true if the SetCC has been legalized, false if it hasn't.
> +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
>                                                   SDValue &LHS, SDValue &RHS,
>                                                   SDValue &CC,
>                                                   SDLoc dl) {
> @@ -1659,10 +1664,9 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
>          // different manner of supporting expanding these cases.
>          llvm_unreachable("Don't know how to expand this condition!");
>        }
> -      LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
> -      RHS = SDValue();
> -      CC = SDValue();
> -      return;
> +      std::swap(LHS, RHS);
> +      CC = DAG.getCondCode(InvCC);
> +      return true;
>      }
>  
>      SDValue SetCC1, SetCC2;
> @@ -1679,9 +1683,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
>      LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
>      RHS = SDValue();
>      CC  = SDValue();
> -    break;
> +    return true;
>    }
>    }
> +  return false;
>  }
>  
>  /// EmitStackConvert - Emit a store/load combination to the stack.  This stores
> @@ -3620,10 +3625,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
>      Tmp1 = Node->getOperand(0);
>      Tmp2 = Node->getOperand(1);
>      Tmp3 = Node->getOperand(2);
> -    LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
> +    bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
> +                                           Tmp3, dl);
> +
> +    if (Legalized) {
> +      // If we exapanded the SETCC by swapping LHS and RHS, create a new SETCC
> +      // node..
> +      if (Tmp3.getNode())
> +        Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
> +                           Tmp1, Tmp2, Tmp3);
>  
> -    // If we expanded the SETCC into an AND/OR, return the new node
> -    if (Tmp2.getNode() == 0) {
>        Results.push_back(Tmp1);
>        break;
>      }
> @@ -3654,14 +3665,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
>      Tmp4 = Node->getOperand(3);   // False
>      SDValue CC = Node->getOperand(4);
>  
> -    LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
> -                          Tmp1, Tmp2, CC, dl);
> +    bool Legalized = LegalizeSetCCCondCode(
> +        getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
>  
> -    assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
> -    Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> -    CC = DAG.getCondCode(ISD::SETNE);
> -    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> -                       Tmp3, Tmp4, CC);
> +    assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
> +    // If we exapanded the SETCC by swapping LHS and RHS, create a new SELECT_CC
> +    // node and return it.
> +    if (CC.getNode()) {
> +      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
> +                         Tmp1, Tmp2, Tmp3, Tmp4, CC);
> +    } else {
> +      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> +      CC = DAG.getCondCode(ISD::SETNE);
> +      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> +                         Tmp3, Tmp4, CC);
> +    }
>      Results.push_back(Tmp1);
>      break;
>    }
> @@ -3671,14 +3689,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
>      Tmp3 = Node->getOperand(3);              // RHS
>      Tmp4 = Node->getOperand(1);              // CC
>  
> -    LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()),
> -                          Tmp2, Tmp3, Tmp4, dl);
> +    bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
> +        Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl);
> +    assert(Legalized && "Can't legalize BR_CC with legal condition!");
>  
> -    assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
> -    Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
> -    Tmp4 = DAG.getCondCode(ISD::SETNE);
> -    Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
> -                       Tmp3, Node->getOperand(4));
> +    // If we exapanded the SETCC by swapping LHS and RHS, create a new BR_CC
> +    // node.
> +    if (Tmp4.getNode()) {
> +      Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
> +                         Tmp4, Tmp2, Tmp3, Node->getOperand(4));
> +    } else {
> +      Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
> +      Tmp4 = DAG.getCondCode(ISD::SETNE);
> +      Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
> +                         Tmp3, Node->getOperand(4));
> +    }
>      Results.push_back(Tmp1);
>      break;
>    }
> -- 
> 1.7.11.4
> 

> From b591d35726cc34cba57e59a98e07b552f990e2ef Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Tue, 10 Sep 2013 19:32:55 -0700
> Subject: [PATCH 2/4] SelectionDAG: Try to expand all condition codes using
>  getCCSwappedOperands()
> 
> This is useful for targets like R600, which only support GT, GE, NE, and EQ
> condition codes as it removes the need to handle unsupported condition
> codes in target specific code.
> 
> There are no tests with this commit, but R600 has been updated to take
> advantage of this new feature, so its existing selectcc tests are now
> testing the swapped operands path.
> ---
>  lib/CodeGen/SelectionDAG/LegalizeDAG.cpp    | 19 +++----
>  lib/CodeGen/SelectionDAG/SelectionDAG.cpp   |  7 ++-
>  lib/CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++++++---
>  lib/Target/R600/R600ISelLowering.cpp        | 86 +++++++++++++++++++----------
>  lib/Target/R600/R600Instructions.td         | 48 ----------------
>  5 files changed, 91 insertions(+), 94 deletions(-)
> 
> diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> index c58b8fd..f6406b2 100644
> --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -1615,8 +1615,13 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
>      // Nothing to do.
>      break;
>    case TargetLowering::Expand: {
> +    ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
> +    if (TLI.isCondCodeLegal(InvCC, OpVT)) {
> +      std::swap(LHS, RHS);
> +      CC = DAG.getCondCode(InvCC);
> +      return true;
> +    }
>      ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
> -    ISD::CondCode InvCC = ISD::SETCC_INVALID;
>      unsigned Opc = 0;
>      switch (CCCode) {
>      default: llvm_unreachable("Don't know how to expand this condition!");
> @@ -1658,15 +1663,9 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
>      case ISD::SETLT:
>      case ISD::SETNE:
>      case ISD::SETEQ:
> -      InvCC = ISD::getSetCCSwappedOperands(CCCode);
> -      if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
> -        // We only support using the inverted operation and not a
> -        // different manner of supporting expanding these cases.
> -        llvm_unreachable("Don't know how to expand this condition!");
> -      }
> -      std::swap(LHS, RHS);
> -      CC = DAG.getCondCode(InvCC);
> -      return true;
> +      // We only support using the inverted operation, which is computed above
> +      // and not a different manner of supporting expanding these cases.
> +      llvm_unreachable("Don't know how to expand this condition!");
>      }
>  
>      SDValue SetCC1, SetCC2;
> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> index 845b9a3..f7836d3 100644
> --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
> @@ -1645,7 +1645,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
>        }
>      } else {
>        // Ensure that the constant occurs on the RHS.
> -      return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
> +      ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
> +      MVT CompVT = N1.getValueType().getSimpleVT();
> +      if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
> +        return SDValue();
> +
> +      return getSetCC(dl, VT, N2, N1, SwappedCond);
>      }
>    }
>  
> diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> index f2199d7..2d70d7d 100644
> --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
> @@ -1089,8 +1089,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
>  
>    // Ensure that the constant occurs on the RHS, and fold constant
>    // comparisons.
> -  if (isa<ConstantSDNode>(N0.getNode()))
> -    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
> +  ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
> +  if (isa<ConstantSDNode>(N0.getNode()) &&
> +      (DCI.isBeforeLegalizeOps() ||
> +       isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
> +    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
>  
>    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
>      const APInt &C1 = N1C->getAPIntValue();
> @@ -1329,7 +1332,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
>          ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
>          CC = ISD::getSetCCInverse(CC,
>                                    N0.getOperand(0).getValueType().isInteger());
> -        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
> +        if (DCI.isBeforeLegalizeOps() ||
> +            isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
> +          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
>        }
>  
>        if ((N0.getOpcode() == ISD::XOR ||
> @@ -1766,16 +1771,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
>        if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
>          if (ValueHasExactlyOneBitSet(N1, DAG)) {
>            Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
> -          SDValue Zero = DAG.getConstant(0, N1.getValueType());
> -          return DAG.getSetCC(dl, VT, N0, Zero, Cond);
> +          if (DCI.isBeforeLegalizeOps() ||
> +              isCondCodeLegal(Cond, N0.getSimpleValueType())) {
> +            SDValue Zero = DAG.getConstant(0, N1.getValueType());
> +            return DAG.getSetCC(dl, VT, N0, Zero, Cond);
> +          }
>          }
>        }
>      if (N1.getOpcode() == ISD::AND)
>        if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
>          if (ValueHasExactlyOneBitSet(N0, DAG)) {
>            Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
> -          SDValue Zero = DAG.getConstant(0, N0.getValueType());
> -          return DAG.getSetCC(dl, VT, N1, Zero, Cond);
> +          if (DCI.isBeforeLegalizeOps() ||
> +              isCondCodeLegal(Cond, N1.getSimpleValueType())) {
> +            SDValue Zero = DAG.getConstant(0, N0.getValueType());
> +            return DAG.getSetCC(dl, VT, N1, Zero, Cond);
> +          }
>          }
>        }
>    }
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index ff9ba52..778ee59 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -38,6 +38,18 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>  
>    computeRegisterProperties();
>  
> +  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
> +
> +  setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
> +  setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
> +  setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
> +  setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
> +
>    setOperationAction(ISD::FCOS, MVT::f32, Custom);
>    setOperationAction(ISD::FSIN, MVT::f32, Custom);
>  
> @@ -841,16 +853,19 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>    //
>    // SET* can match the following patterns:
>    //
> -  // select_cc f32, f32, -1,  0, cc_any
> -  // select_cc f32, f32, 1.0f, 0.0f, cc_any
> -  // select_cc i32, i32, -1,  0, cc_any
> +  // select_cc f32, f32, -1,  0, cc_supported
> +  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
> +  // select_cc i32, i32, -1,  0, cc_supported
>    //
>  
>    // Move hardware True/False values to the correct operand.
> -  if (isHWTrueValue(False) && isHWFalseValue(True)) {
> -    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +  ISD::CondCode InverseCC =
> +     ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> +  if (isHWTrueValue(False) && isHWFalseValue(True) &&
> +      isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
>      std::swap(False, True);
> -    CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
> +    CC = DAG.getCondCode(InverseCC);
>    }
>  
>    if (isHWTrueValue(True) && isHWFalseValue(False) &&
> @@ -863,14 +878,34 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>    //
>    // CND* can match the following patterns:
>    //
> -  // select_cc f32, 0.0, f32, f32, cc_any
> -  // select_cc f32, 0.0, i32, i32, cc_any
> -  // select_cc i32, 0,   f32, f32, cc_any
> -  // select_cc i32, 0,   i32, i32, cc_any
> +  // select_cc f32, 0.0, f32, f32, cc_supported
> +  // select_cc f32, 0.0, i32, i32, cc_supported
> +  // select_cc i32, 0,   f32, f32, cc_supported
> +  // select_cc i32, 0,   i32, i32, cc_supported
>    //
> -  if (isZero(LHS) || isZero(RHS)) {
> -    SDValue Cond = (isZero(LHS) ? RHS : LHS);
> -    SDValue Zero = (isZero(LHS) ? LHS : RHS);
> +
> +  // Try to move the zero value to the RHS
> +  if (isZero(LHS)) {
> +    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +    // Try swapping the operands
> +    ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
> +    if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
> +      std::swap(LHS, RHS);
> +      CC = DAG.getCondCode(CCSwapped);
> +    } else {
> +      // Try inverting the conditon and then swapping the operands
> +      ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
> +      CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
> +      if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
> +        std::swap(True, False);
> +        std::swap(LHS, RHS);
> +        CC = DAG.getCondCode(CCSwapped);
> +      }
> +    }
> +  }
> +  if (isZero(RHS)) {
> +    SDValue Cond = LHS;
> +    SDValue Zero = RHS;
>      ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
>      if (CompareVT != VT) {
>        // Bitcast True / False to the correct types.  This will end up being
> @@ -880,20 +915,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>        True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
>        False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
>      }
> -    if (isZero(LHS)) {
> -      CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
> -    }
>  
>      switch (CCOpcode) {
>      case ISD::SETONE:
>      case ISD::SETUNE:
>      case ISD::SETNE:
> -    case ISD::SETULE:
> -    case ISD::SETULT:
> -    case ISD::SETOLE:
> -    case ISD::SETOLT:
> -    case ISD::SETLE:
> -    case ISD::SETLT:
>        CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
>        Temp = True;
>        True = False;
> @@ -1567,14 +1593,18 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
>        ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
>        LHSCC = ISD::getSetCCInverse(LHSCC,
>                                    LHS.getOperand(0).getValueType().isInteger());
> -      return DAG.getSelectCC(SDLoc(N),
> -                             LHS.getOperand(0),
> -                             LHS.getOperand(1),
> -                             LHS.getOperand(2),
> -                             LHS.getOperand(3),
> -                             LHSCC);
> +      if (DCI.isBeforeLegalizeOps() ||
> +          isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
> +        return DAG.getSelectCC(SDLoc(N),
> +                               LHS.getOperand(0),
> +                               LHS.getOperand(1),
> +                               LHS.getOperand(2),
> +                               LHS.getOperand(3),
> +                               LHSCC);
> +      break;
>      }
>      }
> +    return SDValue();
>    }
>  
>    case AMDGPUISD::EXPORT: {
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 24bc6b0..65ea04b 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -2324,54 +2324,6 @@ def KIL : Pat <
>    (MASK_WRITE (KILLGT (f32 ZERO), $src0))
>  >;
>  
> -// SGT Reverse args
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
> -  (SGT $src1, $src0)
> ->;
> -
> -// SGE Reverse args
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
> -  (SGE $src1, $src0)
> ->;
> -
> -// SETGT_DX10 reverse args
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
> -  (SETGT_DX10 $src1, $src0)
> ->;
> -
> -// SETGE_DX10 reverse args
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
> -  (SETGE_DX10 $src1, $src0)
> ->;
> -
> -// SETGT_INT reverse args
> -def : Pat <
> -  (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
> -  (SETGT_INT $src1, $src0)
> ->;
> -
> -// SETGE_INT reverse args
> -def : Pat <
> -  (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
> -  (SETGE_INT $src1, $src0)
> ->;
> -
> -// SETGT_UINT reverse args
> -def : Pat <
> -  (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
> -  (SETGT_UINT $src1, $src0)
> ->;
> -
> -// SETGE_UINT reverse args
> -def : Pat <
> -  (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
> -  (SETGE_UINT $src1, $src0)
> ->;
> -
>  // The next two patterns are special cases for handling 'true if ordered' and
>  // 'true if unordered' conditionals.  The assumption here is that the behavior of
>  // SETE and SNE conforms to the Direct3D 10 rules for floating point values
> -- 
> 1.7.11.4
> 

> From d62adcc8de93d4986535c2cd4ef5113a2afdc676 Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Thu, 12 Sep 2013 08:18:29 -0700
> Subject: [PATCH 3/4] SelectionDAG: Improve legalization of SELECT_CC with
>  illegal condition codes
> 
> SelectionDAG will now attempt to inverse an illegal conditon in order to
> find a legal one and if that doesn't work, it will attempt to swap the
> operands using the inverted condition.
> 
> There are no new test cases for this, but a nubmer of the existing R600
> tests hit this path.
> ---
>  lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 +++++++++++++++++++++++---------
>  lib/Target/R600/R600ISelLowering.cpp     | 16 +++++++---
>  2 files changed, 49 insertions(+), 17 deletions(-)
> 
> diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> index f6406b2..e78caba 100644
> --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
> @@ -3664,20 +3664,44 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
>      Tmp4 = Node->getOperand(3);   // False
>      SDValue CC = Node->getOperand(4);
>  
> -    bool Legalized = LegalizeSetCCCondCode(
> -        getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
> -
> -    assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
> -    // If we exapanded the SETCC by swapping LHS and RHS, create a new SELECT_CC
> -    // node and return it.
> -    if (CC.getNode()) {
> -      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
> -                         Tmp1, Tmp2, Tmp3, Tmp4, CC);
> +    bool Legalized = false;
> +    // Try to legalize by inverting the condition.  This is for targets that
> +    // might support an ordered version of a condition, but not the unordered
> +    // version (or vice versa).
> +    ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
> +                                               Tmp1.getValueType().isInteger());
> +    if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
> +      // Use the new condition code and swap true and false
> +      Legalized = true;
> +      Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
>      } else {
> -      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> -      CC = DAG.getCondCode(ISD::SETNE);
> -      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> -                         Tmp3, Tmp4, CC);
> +      // If The inverse is not legal, then try to swap the arguments using
> +      // the inverse condition code.
> +      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
> +      if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
> +        // The swapped inverse condition is legal, so swap true and false,
> +        // lhs and rhs.
> +        Legalized = true;
> +        Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
> +      }
> +    }
> +
> +    if (!Legalized) {
> +      Legalized = LegalizeSetCCCondCode(
> +          getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
> +
> +      assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
> +      // If we exapanded the SETCC by swapping LHS and RHS, create a new
> +      // SELECT_CC node.
> +      if (CC.getNode()) {
> +        Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
> +                           Tmp1, Tmp2, Tmp3, Tmp4, CC);
> +      } else {
> +        Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
> +        CC = DAG.getCondCode(ISD::SETNE);
> +        Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
> +                           Tmp3, Tmp4, CC);
> +      }
>      }
>      Results.push_back(Tmp1);
>      break;
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index 778ee59..6a02bdb 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -862,10 +862,18 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
>    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
>    ISD::CondCode InverseCC =
>       ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
> -  if (isHWTrueValue(False) && isHWFalseValue(True) &&
> -      isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
> -    std::swap(False, True);
> -    CC = DAG.getCondCode(InverseCC);
> +  if (isHWTrueValue(False) && isHWFalseValue(True)) {
> +    if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
> +      std::swap(False, True);
> +      CC = DAG.getCondCode(InverseCC);
> +    } else {
> +      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
> +      if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
> +        std::swap(False, True);
> +        std::swap(LHS, RHS);
> +        CC = DAG.getCondCode(SwapInvCC);
> +      }
> +    }
>    }
>  
>    if (isHWTrueValue(True) && isHWFalseValue(False) &&
> -- 
> 1.7.11.4
> 

> From 859f3f6346bb8b5d7bfb181cca9f103ac18f116f Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Thu, 12 Sep 2013 08:18:38 -0700
> Subject: [PATCH 4/4] R600: Fix handling of NAN in comparison instructions
> 
> We were completely ignoring the unorder/ordered attributes of condition
> codes and also incorrectly lowering seto and setuo.
> ---
>  lib/Target/R600/AMDGPUInstructions.td | 21 ++++++++++++
>  lib/Target/R600/R600ISelLowering.cpp  |  9 +++++-
>  lib/Target/R600/R600Instructions.td   | 54 +++++++------------------------
>  test/CodeGen/R600/fmax.ll             |  2 +-
>  test/CodeGen/R600/kcache-fold.ll      | 16 +++++-----
>  test/CodeGen/R600/pv.ll               |  2 +-
>  test/CodeGen/R600/selectcc-opt.ll     |  4 +--
>  test/CodeGen/R600/set-dx10.ll         | 60 +++++++++++++++++------------------
>  test/CodeGen/R600/unsupported-cc.ll   | 60 +++++++++++++++++++++++++----------
>  test/CodeGen/R600/vselect.ll          |  4 +--
>  10 files changed, 127 insertions(+), 105 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> index e30abc0..5778a8c 100644
> --- a/lib/Target/R600/AMDGPUInstructions.td
> +++ b/lib/Target/R600/AMDGPUInstructions.td
> @@ -43,12 +43,23 @@ def COND_EQ : PatLeaf <
>                       case ISD::SETEQ: return true;}}}]
>  >;
>  
> +def COND_OEQ : PatLeaf <
> +  (cond),
> +  [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
> +>;
> +
>  def COND_NE : PatLeaf <
>    (cond),
>    [{switch(N->get()){{default: return false;
>                       case ISD::SETONE: case ISD::SETUNE:
>                       case ISD::SETNE: return true;}}}]
>  >;
> +
> +def COND_UNE : PatLeaf <
> +  (cond),
> +  [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
> +>;
> +
>  def COND_GT : PatLeaf <
>    (cond),
>    [{switch(N->get()){{default: return false;
> @@ -56,6 +67,11 @@ def COND_GT : PatLeaf <
>                       case ISD::SETGT: return true;}}}]
>  >;
>  
> +def COND_OGT : PatLeaf <
> +  (cond),
> +  [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
> +>;
> +
>  def COND_GE : PatLeaf <
>    (cond),
>    [{switch(N->get()){{default: return false;
> @@ -63,6 +79,11 @@ def COND_GE : PatLeaf <
>                       case ISD::SETGE: return true;}}}]
>  >;
>  
> +def COND_OGE : PatLeaf <
> +  (cond),
> +  [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
> +>;
> +
>  def COND_LT : PatLeaf <
>    (cond),
>    [{switch(N->get()){{default: return false;
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index 6a02bdb..a8e8a77 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -38,10 +38,17 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
>  
>    computeRegisterProperties();
>  
> -  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
> +  // Set condition code actions
> +  setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETUO,  MVT::f32, Expand);
>    setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
>    setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
>    setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
> +  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
>    setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
>    setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
>  
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index 65ea04b..e92385d 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -689,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
>  // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
>  def SETE : R600_2OP <
>    0x08, "SETE",
> -  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
> +  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
>  >;
>  
>  def SGT : R600_2OP <
>    0x09, "SETGT",
> -  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
> +  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
>  >;
>  
>  def SGE : R600_2OP <
>    0xA, "SETGE",
> -  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
> +  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
>  >;
>  
>  def SNE : R600_2OP <
>    0xB, "SETNE",
> -  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
> +  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
>  >;
>  
>  def SETE_DX10 : R600_2OP <
>    0xC, "SETE_DX10",
> -  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
> +  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
>  >;
>  
>  def SETGT_DX10 : R600_2OP <
>    0xD, "SETGT_DX10",
> -  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
> +  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
>  >;
>  
>  def SETGE_DX10 : R600_2OP <
>    0xE, "SETGE_DX10",
> -  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
> +  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
>  >;
>  
>  def SETNE_DX10 : R600_2OP <
>    0xF, "SETNE_DX10",
> -  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
> +  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
>  >;
>  
>  def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
> @@ -920,19 +920,19 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
>  
>  class CNDE_Common <bits<5> inst> : R600_3OP <
>    inst, "CNDE",
> -  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
> +  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
>  >;
>  
>  class CNDGT_Common <bits<5> inst> : R600_3OP <
>    inst, "CNDGT",
> -  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
> +  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
>  > {
>    let Itinerary = VecALU;
>  }
>  
>  class CNDGE_Common <bits<5> inst> : R600_3OP <
>    inst, "CNDGE",
> -  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
> +  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
>  > {
>    let Itinerary = VecALU;
>  }
> @@ -2324,38 +2324,6 @@ def KIL : Pat <
>    (MASK_WRITE (KILLGT (f32 ZERO), $src0))
>  >;
>  
> -// The next two patterns are special cases for handling 'true if ordered' and
> -// 'true if unordered' conditionals.  The assumption here is that the behavior of
> -// SETE and SNE conforms to the Direct3D 10 rules for floating point values
> -// described here:
> -// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
> -// We assume that  SETE returns false when one of the operands is NAN and
> -// SNE returns true when on of the operands is NAN
> -
> -//SETE - 'true if ordered'
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
> -  (SETE $src0, $src1)
> ->;
> -
> -//SETE_DX10 - 'true if ordered'
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
> -  (SETE_DX10 $src0, $src1)
> ->;
> -
> -//SNE - 'true if unordered'
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
> -  (SNE $src0, $src1)
> ->;
> -
> -//SETNE_DX10 - 'true if ordered'
> -def : Pat <
> -  (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
> -  (SETNE_DX10 $src0, $src1)
> ->;
> -
>  def : Extract_Element <f32, v4f32, 0, sub0>;
>  def : Extract_Element <f32, v4f32, 1, sub1>;
>  def : Extract_Element <f32, v4f32, 2, sub2>;
> diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
> index 8b704e5..be25c9c 100644
> --- a/test/CodeGen/R600/fmax.ll
> +++ b/test/CodeGen/R600/fmax.ll
> @@ -5,7 +5,7 @@
>  define void @test() {
>     %r0 = call float @llvm.R600.load.input(i32 0)
>     %r1 = call float @llvm.R600.load.input(i32 1)
> -   %r2 = fcmp uge float %r0, %r1
> +   %r2 = fcmp oge float %r0, %r1
>     %r3 = select i1 %r2, float %r0, float %r1
>     call void @llvm.AMDGPU.store.output(float %r3, i32 0)
>     ret void
> diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
> index 8bdb050..0baa3cd 100644
> --- a/test/CodeGen/R600/kcache-fold.ll
> +++ b/test/CodeGen/R600/kcache-fold.ll
> @@ -10,7 +10,7 @@ main_body:
>    %3 = extractelement <4 x float> %2, i32 0
>    %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
>    %5 = extractelement <4 x float> %4, i32 0
> -  %6 = fcmp ult float %1, 0.000000e+00
> +  %6 = fcmp ogt float %1, 0.000000e+00
>    %7 = select i1 %6, float %3, float %5
>    %8 = load <4 x float> addrspace(8)* null
>    %9 = extractelement <4 x float> %8, i32 1
> @@ -18,7 +18,7 @@ main_body:
>    %11 = extractelement <4 x float> %10, i32 1
>    %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
>    %13 = extractelement <4 x float> %12, i32 1
> -  %14 = fcmp ult float %9, 0.000000e+00
> +  %14 = fcmp ogt float %9, 0.000000e+00
>    %15 = select i1 %14, float %11, float %13
>    %16 = load <4 x float> addrspace(8)* null
>    %17 = extractelement <4 x float> %16, i32 2
> @@ -26,7 +26,7 @@ main_body:
>    %19 = extractelement <4 x float> %18, i32 2
>    %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
>    %21 = extractelement <4 x float> %20, i32 2
> -  %22 = fcmp ult float %17, 0.000000e+00
> +  %22 = fcmp ogt float %17, 0.000000e+00
>    %23 = select i1 %22, float %19, float %21
>    %24 = load <4 x float> addrspace(8)* null
>    %25 = extractelement <4 x float> %24, i32 3
> @@ -34,7 +34,7 @@ main_body:
>    %27 = extractelement <4 x float> %26, i32 3
>    %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
>    %29 = extractelement <4 x float> %28, i32 3
> -  %30 = fcmp ult float %25, 0.000000e+00
> +  %30 = fcmp ogt float %25, 0.000000e+00
>    %31 = select i1 %30, float %27, float %29
>    %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
>    %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
> @@ -58,7 +58,7 @@ main_body:
>    %3 = extractelement <4 x float> %2, i32 0
>    %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
>    %5 = extractelement <4 x float> %4, i32 1
> -  %6 = fcmp ult float %1, 0.000000e+00
> +  %6 = fcmp ogt float %1, 0.000000e+00
>    %7 = select i1 %6, float %3, float %5
>    %8 = load <4 x float> addrspace(8)* null
>    %9 = extractelement <4 x float> %8, i32 1
> @@ -66,7 +66,7 @@ main_body:
>    %11 = extractelement <4 x float> %10, i32 0
>    %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
>    %13 = extractelement <4 x float> %12, i32 1
> -  %14 = fcmp ult float %9, 0.000000e+00
> +  %14 = fcmp ogt float %9, 0.000000e+00
>    %15 = select i1 %14, float %11, float %13
>    %16 = load <4 x float> addrspace(8)* null
>    %17 = extractelement <4 x float> %16, i32 2
> @@ -74,7 +74,7 @@ main_body:
>    %19 = extractelement <4 x float> %18, i32 3
>    %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
>    %21 = extractelement <4 x float> %20, i32 2
> -  %22 = fcmp ult float %17, 0.000000e+00
> +  %22 = fcmp ogt float %17, 0.000000e+00
>    %23 = select i1 %22, float %19, float %21
>    %24 = load <4 x float> addrspace(8)* null
>    %25 = extractelement <4 x float> %24, i32 3
> @@ -82,7 +82,7 @@ main_body:
>    %27 = extractelement <4 x float> %26, i32 3
>    %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
>    %29 = extractelement <4 x float> %28, i32 2
> -  %30 = fcmp ult float %25, 0.000000e+00
> +  %30 = fcmp ogt float %25, 0.000000e+00
>    %31 = select i1 %30, float %27, float %29
>    %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
>    %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
> diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
> index 6e0b744..6d9396c 100644
> --- a/test/CodeGen/R600/pv.ll
> +++ b/test/CodeGen/R600/pv.ll
> @@ -1,7 +1,7 @@
>  ; RUN: llc < %s -march=r600 | FileCheck %s
>  
>  ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
> -;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X
> +;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
>  
>  define void @main() #0 {
>  main_body:
> diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
> index 7e2d559..834c030 100644
> --- a/test/CodeGen/R600/selectcc-opt.ll
> +++ b/test/CodeGen/R600/selectcc-opt.ll
> @@ -6,7 +6,7 @@
>  
>  define void @test_a(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ult float %in, 0.000000e+00
> +  %0 = fcmp olt float %in, 0.000000e+00
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> @@ -34,7 +34,7 @@ ENDIF:
>  ; CHECK-NEXT: ALU clause starting
>  define void @test_b(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ult float %in, 0.0
> +  %0 = fcmp olt float %in, 0.0
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
> index bdc2ff4..5c7d499 100644
> --- a/test/CodeGen/R600/set-dx10.ll
> +++ b/test/CodeGen/R600/set-dx10.ll
> @@ -30,13 +30,13 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @fcmp_ueq_select_fptosi
> +; CHECK: @fcmp_oeq_select_fptosi
>  ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ueq float %in, 5.0
> +  %0 = fcmp oeq float %in, 5.0
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> @@ -44,25 +44,25 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @fcmp_ueq_select_i32
> +; CHECK: @fcmp_oeq_select_i32
>  ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ueq float %in, 5.0
> +  %0 = fcmp oeq float %in, 5.0
>    %1 = select i1 %0, i32 -1, i32 0
>    store i32 %1, i32 addrspace(1)* %out
>    ret void
>  }
>  
> -; CHECK: @fcmp_ugt_select_fptosi
> +; CHECK: @fcmp_ogt_select_fptosi
>  ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ugt float %in, 5.0
> +  %0 = fcmp ogt float %in, 5.0
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> @@ -70,25 +70,25 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @fcmp_ugt_select_i32
> +; CHECK: @fcmp_ogt_select_i32
>  ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ugt float %in, 5.0
> +  %0 = fcmp ogt float %in, 5.0
>    %1 = select i1 %0, i32 -1, i32 0
>    store i32 %1, i32 addrspace(1)* %out
>    ret void
>  }
>  
> -; CHECK: @fcmp_uge_select_fptosi
> +; CHECK: @fcmp_oge_select_fptosi
>  ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp uge float %in, 5.0
> +  %0 = fcmp oge float %in, 5.0
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> @@ -96,25 +96,25 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @fcmp_uge_select_i32
> +; CHECK: @fcmp_oge_select_i32
>  ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp uge float %in, 5.0
> +  %0 = fcmp oge float %in, 5.0
>    %1 = select i1 %0, i32 -1, i32 0
>    store i32 %1, i32 addrspace(1)* %out
>    ret void
>  }
>  
> -; CHECK: @fcmp_ule_select_fptosi
> +; CHECK: @fcmp_ole_select_fptosi
>  ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ule float %in, 5.0
> +  %0 = fcmp ole float %in, 5.0
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> @@ -122,25 +122,25 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @fcmp_ule_select_i32
> +; CHECK: @fcmp_ole_select_i32
>  ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ule float %in, 5.0
> +  %0 = fcmp ole float %in, 5.0
>    %1 = select i1 %0, i32 -1, i32 0
>    store i32 %1, i32 addrspace(1)* %out
>    ret void
>  }
>  
> -; CHECK: @fcmp_ult_select_fptosi
> +; CHECK: @fcmp_olt_select_fptosi
>  ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ult float %in, 5.0
> +  %0 = fcmp olt float %in, 5.0
>    %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
>    %2 = fsub float -0.000000e+00, %1
>    %3 = fptosi float %2 to i32
> @@ -148,13 +148,13 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @fcmp_ult_select_i32
> +; CHECK: @fcmp_olt_select_i32
>  ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> -define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
> +define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
>  entry:
> -  %0 = fcmp ult float %in, 5.0
> +  %0 = fcmp olt float %in, 5.0
>    %1 = select i1 %0, i32 -1, i32 0
>    store i32 %1, i32 addrspace(1)* %out
>    ret void
> diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
> index d3aa060..f986a02 100644
> --- a/test/CodeGen/R600/unsupported-cc.ll
> +++ b/test/CodeGen/R600/unsupported-cc.ll
> @@ -2,7 +2,7 @@
>  
>  ; These tests are for condition codes that are not supported by the hardware
>  
> -; CHECK: @slt
> +; CHECK-LABEL: @slt
>  ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 5(7.006492e-45)
> @@ -14,7 +14,7 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @ult_i32
> +; CHECK-LABEL: @ult_i32
>  ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 5(7.006492e-45)
> @@ -26,10 +26,11 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @ult_float
> -; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> +; CHECK-LABEL: @ult_float
> +; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> +; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
> +; CHECK-NEXT: LSHR *
>  define void @ult_float(float addrspace(1)* %out, float %in) {
>  entry:
>    %0 = fcmp ult float %in, 5.0
> @@ -38,10 +39,22 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @olt
> -; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> -;CHECK-NEXT: 1084227584(5.000000e+00)
> +; CHECK-LABEL: @ult_float_native
> +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
> +; CHECK-NEXT: LSHR *
> +; CHECK-NEXT: 1084227584(5.000000e+00)
> +define void @ult_float_native(float addrspace(1)* %out, float %in) {
> +entry:
> +  %0 = fcmp ult float %in, 5.0
> +  %1 = select i1 %0, float 0.0, float 1.0
> +  store float %1, float addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: @olt
> +; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> +; CHECK-NEXT: LSHR *
> +; CHECK-NEXT: 1084227584(5.000000e+00)
>  define void @olt(float addrspace(1)* %out, float %in) {
>  entry:
>    %0 = fcmp olt float %in, 5.0
> @@ -50,7 +63,7 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @sle
> +; CHECK-LABEL: @sle
>  ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 6(8.407791e-45)
> @@ -62,7 +75,7 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @ule_i32
> +; CHECK-LABEL: @ule_i32
>  ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
>  ; CHECK-NEXT: LSHR
>  ; CHECK-NEXT: 6(8.407791e-45)
> @@ -74,10 +87,11 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @ule_float
> -; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> +; CHECK-LABEL: @ule_float
> +; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
>  ; CHECK-NEXT: 1084227584(5.000000e+00)
> +; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
> +; CHECK-NEXT: LSHR *
>  define void @ule_float(float addrspace(1)* %out, float %in) {
>  entry:
>    %0 = fcmp ule float %in, 5.0
> @@ -86,9 +100,21 @@ entry:
>    ret void
>  }
>  
> -; CHECK: @ole
> -; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
> -; CHECK-NEXT: LSHR
> +; CHECK-LABEL: @ule_float_native
> +; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
> +; CHECK-NEXT: LSHR *
> +; CHECK-NEXT: 1084227584(5.000000e+00)
> +define void @ule_float_native(float addrspace(1)* %out, float %in) {
> +entry:
> +  %0 = fcmp ule float %in, 5.0
> +  %1 = select i1 %0, float 0.0, float 1.0
> +  store float %1, float addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: @ole
> +; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
> +; CHECK-NEXT: LSHR *
>  ; CHECK-NEXT:1084227584(5.000000e+00)
>  define void @ole(float addrspace(1)* %out, float %in) {
>  entry:
> diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
> index 8e9c5b5..ee17e0f 100644
> --- a/test/CodeGen/R600/vselect.ll
> +++ b/test/CodeGen/R600/vselect.ll
> @@ -31,7 +31,7 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs
>  entry:
>    %0 = load <2 x float> addrspace(1)* %in0
>    %1 = load <2 x float> addrspace(1)* %in1
> -  %cmp = fcmp one <2 x float> %0, %1
> +  %cmp = fcmp une <2 x float> %0, %1
>    %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
>    store <2 x float> %result, <2 x float> addrspace(1)* %out
>    ret void
> @@ -69,7 +69,7 @@ define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrs
>  entry:
>    %0 = load <4 x float> addrspace(1)* %in0
>    %1 = load <4 x float> addrspace(1)* %in1
> -  %cmp = fcmp one <4 x float> %0, %1
> +  %cmp = fcmp une <4 x float> %0, %1
>    %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
>    store <4 x float> %result, <4 x float> addrspace(1)* %out
>    ret void
> -- 
> 1.7.11.4
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list