PATCH: SelectionDAG: More efficient legalization of unsupported SELECT_CC/SETCC condition codes + Fix NAN handling on R600

Tom Stellard tom at stellard.net
Thu Sep 12 14:54:31 PDT 2013


Hi,

The first three patches improve the legalization of SELECT_CC and SETCC
nodes with illegal conditions.  The current code legalizes
conditions by lowering to AND/OR opcodes, but for targets like R600
which support a limited number of conditions (oeq, oge, ogt, and une)
it is usually better to legalize by either swapping the arguments or
inverting the condition and swapping the true / false values.

The last patch fixes the handling of NAN in comparison instructions for
the R600 target.

Please Review.

-Tom
-------------- next part --------------
>From c6088b1cad0869d878613a1c4c71e774bd45874a Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Wed, 11 Sep 2013 09:41:21 -0700
Subject: [PATCH 1/4] SelectionDAG: Clean up LegalizeSetCCCondCode() function

Interpreting the results of this function is not very intuitive, so I
cleaned it up to make it more clear whether or not a SETCC op was
legalized and how it was legalized (either by swapping LHS and RHS or
replacing with AND/OR).

This patch does change functionality in the LHS and RHS swapping case,
but unfortunately there are no in-tree tests for this.  However, this
patch is a prerequisite for R600 to take advantage of the LHS and RHS
swapping, so tests will be added in subsequent commits.
---
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 77 +++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index a252796..c58b8fd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -95,7 +95,7 @@ private:
                                      SDValue N1, SDValue N2,
                                      ArrayRef<int> Mask) const;
 
-  void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
+  bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC,
                              SDLoc dl);
 
   SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
@@ -1596,9 +1596,14 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
 }
 
 /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and
-/// condition code CC on the current target. This routine expands SETCC with
-/// illegal condition code into AND / OR of multiple SETCC values.
-void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
+/// condition code CC on the current target.
+/// If the SETCC has been legalized using AND / OR, then the legalized node
+/// will be stored in LHS and RHS and CC will be set to SDValue().
+/// If the SETCC has been legalized by using getSetCCSwappedOperands(),
+/// then the values of LHS and RHS will be swapped and CC will be set to the
+/// new condition.
+/// \returns true if the SetCC has been legalized, false if it hasn't.
+bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
                                                  SDValue &LHS, SDValue &RHS,
                                                  SDValue &CC,
                                                  SDLoc dl) {
@@ -1659,10 +1664,9 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
         // different manner of supporting expanding these cases.
         llvm_unreachable("Don't know how to expand this condition!");
       }
-      LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC);
-      RHS = SDValue();
-      CC = SDValue();
-      return;
+      std::swap(LHS, RHS);
+      CC = DAG.getCondCode(InvCC);
+      return true;
     }
 
     SDValue SetCC1, SetCC2;
@@ -1679,9 +1683,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2);
     RHS = SDValue();
     CC  = SDValue();
-    break;
+    return true;
   }
   }
+  return false;
 }
 
 /// EmitStackConvert - Emit a store/load combination to the stack.  This stores
@@ -3620,10 +3625,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp1 = Node->getOperand(0);
     Tmp2 = Node->getOperand(1);
     Tmp3 = Node->getOperand(2);
-    LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl);
+    bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2,
+                                           Tmp3, dl);
+
+    if (Legalized) {
+      // If we exapanded the SETCC by swapping LHS and RHS, create a new SETCC
+      // node..
+      if (Tmp3.getNode())
+        Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
+                           Tmp1, Tmp2, Tmp3);
 
-    // If we expanded the SETCC into an AND/OR, return the new node
-    if (Tmp2.getNode() == 0) {
       Results.push_back(Tmp1);
       break;
     }
@@ -3654,14 +3665,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp4 = Node->getOperand(3);   // False
     SDValue CC = Node->getOperand(4);
 
-    LegalizeSetCCCondCode(getSetCCResultType(Tmp1.getValueType()),
-                          Tmp1, Tmp2, CC, dl);
+    bool Legalized = LegalizeSetCCCondCode(
+        getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
 
-    assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!");
-    Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
-    CC = DAG.getCondCode(ISD::SETNE);
-    Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
-                       Tmp3, Tmp4, CC);
+    assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+    // If we exapanded the SETCC by swapping LHS and RHS, create a new SELECT_CC
+    // node and return it.
+    if (CC.getNode()) {
+      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+                         Tmp1, Tmp2, Tmp3, Tmp4, CC);
+    } else {
+      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+      CC = DAG.getCondCode(ISD::SETNE);
+      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+                         Tmp3, Tmp4, CC);
+    }
     Results.push_back(Tmp1);
     break;
   }
@@ -3671,14 +3689,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp3 = Node->getOperand(3);              // RHS
     Tmp4 = Node->getOperand(1);              // CC
 
-    LegalizeSetCCCondCode(getSetCCResultType(Tmp2.getValueType()),
-                          Tmp2, Tmp3, Tmp4, dl);
+    bool Legalized = LegalizeSetCCCondCode(getSetCCResultType(
+        Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl);
+    assert(Legalized && "Can't legalize BR_CC with legal condition!");
 
-    assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!");
-    Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
-    Tmp4 = DAG.getCondCode(ISD::SETNE);
-    Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
-                       Tmp3, Node->getOperand(4));
+    // If we exapanded the SETCC by swapping LHS and RHS, create a new BR_CC
+    // node.
+    if (Tmp4.getNode()) {
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1,
+                         Tmp4, Tmp2, Tmp3, Node->getOperand(4));
+    } else {
+      Tmp3 = DAG.getConstant(0, Tmp2.getValueType());
+      Tmp4 = DAG.getCondCode(ISD::SETNE);
+      Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2,
+                         Tmp3, Node->getOperand(4));
+    }
     Results.push_back(Tmp1);
     break;
   }
-- 
1.7.11.4

-------------- next part --------------
>From b591d35726cc34cba57e59a98e07b552f990e2ef Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Tue, 10 Sep 2013 19:32:55 -0700
Subject: [PATCH 2/4] SelectionDAG: Try to expand all condition codes using
 getCCSwappedOperands()

This is useful for targets like R600, which only support GT, GE, NE, and EQ
condition codes as it removes the need to handle unsupported condition
codes in target specific code.

There are no tests with this commit, but R600 has been updated to take
advantage of this new feature, so its existing selectcc tests are now
testing the swapped operands path.
---
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp    | 19 +++----
 lib/CodeGen/SelectionDAG/SelectionDAG.cpp   |  7 ++-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp | 25 ++++++---
 lib/Target/R600/R600ISelLowering.cpp        | 86 +++++++++++++++++++----------
 lib/Target/R600/R600Instructions.td         | 48 ----------------
 5 files changed, 91 insertions(+), 94 deletions(-)

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c58b8fd..f6406b2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1615,8 +1615,13 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     // Nothing to do.
     break;
   case TargetLowering::Expand: {
+    ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode);
+    if (TLI.isCondCodeLegal(InvCC, OpVT)) {
+      std::swap(LHS, RHS);
+      CC = DAG.getCondCode(InvCC);
+      return true;
+    }
     ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID;
-    ISD::CondCode InvCC = ISD::SETCC_INVALID;
     unsigned Opc = 0;
     switch (CCCode) {
     default: llvm_unreachable("Don't know how to expand this condition!");
@@ -1658,15 +1663,9 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT,
     case ISD::SETLT:
     case ISD::SETNE:
     case ISD::SETEQ:
-      InvCC = ISD::getSetCCSwappedOperands(CCCode);
-      if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) {
-        // We only support using the inverted operation and not a
-        // different manner of supporting expanding these cases.
-        llvm_unreachable("Don't know how to expand this condition!");
-      }
-      std::swap(LHS, RHS);
-      CC = DAG.getCondCode(InvCC);
-      return true;
+      // We only support using the inverted operation, which is computed above
+      // and not a different manner of supporting expanding these cases.
+      llvm_unreachable("Don't know how to expand this condition!");
     }
 
     SDValue SetCC1, SetCC2;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 845b9a3..f7836d3 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1645,7 +1645,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1,
       }
     } else {
       // Ensure that the constant occurs on the RHS.
-      return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond));
+      ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+      MVT CompVT = N1.getValueType().getSimpleVT();
+      if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT))
+        return SDValue();
+
+      return getSetCC(dl, VT, N2, N1, SwappedCond);
     }
   }
 
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f2199d7..2d70d7d 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1089,8 +1089,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
   // Ensure that the constant occurs on the RHS, and fold constant
   // comparisons.
-  if (isa<ConstantSDNode>(N0.getNode()))
-    return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
+  ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
+  if (isa<ConstantSDNode>(N0.getNode()) &&
+      (DCI.isBeforeLegalizeOps() ||
+       isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
+    return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
 
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
@@ -1329,7 +1332,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
         CC = ISD::getSetCCInverse(CC,
                                   N0.getOperand(0).getValueType().isInteger());
-        return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
+        if (DCI.isBeforeLegalizeOps() ||
+            isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
+          return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
       }
 
       if ((N0.getOpcode() == ISD::XOR ||
@@ -1766,16 +1771,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) {
         if (ValueHasExactlyOneBitSet(N1, DAG)) {
           Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
-          SDValue Zero = DAG.getConstant(0, N1.getValueType());
-          return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+          if (DCI.isBeforeLegalizeOps() ||
+              isCondCodeLegal(Cond, N0.getSimpleValueType())) {
+            SDValue Zero = DAG.getConstant(0, N1.getValueType());
+            return DAG.getSetCC(dl, VT, N0, Zero, Cond);
+          }
         }
       }
     if (N1.getOpcode() == ISD::AND)
       if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) {
         if (ValueHasExactlyOneBitSet(N0, DAG)) {
           Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
-          SDValue Zero = DAG.getConstant(0, N0.getValueType());
-          return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+          if (DCI.isBeforeLegalizeOps() ||
+              isCondCodeLegal(Cond, N1.getSimpleValueType())) {
+            SDValue Zero = DAG.getConstant(0, N0.getValueType());
+            return DAG.getSetCC(dl, VT, N1, Zero, Cond);
+          }
         }
       }
   }
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index ff9ba52..778ee59 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -38,6 +38,18 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
 
   computeRegisterProperties();
 
+  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
+  setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
+
+  setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
+  setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
+  setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
+  setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
+
   setOperationAction(ISD::FCOS, MVT::f32, Custom);
   setOperationAction(ISD::FSIN, MVT::f32, Custom);
 
@@ -841,16 +853,19 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   //
   // SET* can match the following patterns:
   //
-  // select_cc f32, f32, -1,  0, cc_any
-  // select_cc f32, f32, 1.0f, 0.0f, cc_any
-  // select_cc i32, i32, -1,  0, cc_any
+  // select_cc f32, f32, -1,  0, cc_supported
+  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
+  // select_cc i32, i32, -1,  0, cc_supported
   //
 
   // Move hardware True/False values to the correct operand.
-  if (isHWTrueValue(False) && isHWFalseValue(True)) {
-    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+  ISD::CondCode InverseCC =
+     ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
+  if (isHWTrueValue(False) && isHWFalseValue(True) &&
+      isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
     std::swap(False, True);
-    CC = DAG.getCondCode(ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32));
+    CC = DAG.getCondCode(InverseCC);
   }
 
   if (isHWTrueValue(True) && isHWFalseValue(False) &&
@@ -863,14 +878,34 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   //
   // CND* can match the following patterns:
   //
-  // select_cc f32, 0.0, f32, f32, cc_any
-  // select_cc f32, 0.0, i32, i32, cc_any
-  // select_cc i32, 0,   f32, f32, cc_any
-  // select_cc i32, 0,   i32, i32, cc_any
+  // select_cc f32, 0.0, f32, f32, cc_supported
+  // select_cc f32, 0.0, i32, i32, cc_supported
+  // select_cc i32, 0,   f32, f32, cc_supported
+  // select_cc i32, 0,   i32, i32, cc_supported
   //
-  if (isZero(LHS) || isZero(RHS)) {
-    SDValue Cond = (isZero(LHS) ? RHS : LHS);
-    SDValue Zero = (isZero(LHS) ? LHS : RHS);
+
+  // Try to move the zero value to the RHS
+  if (isZero(LHS)) {
+    ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
+    // Try swapping the operands
+    ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
+    if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+      std::swap(LHS, RHS);
+      CC = DAG.getCondCode(CCSwapped);
+    } else {
+      // Try inverting the conditon and then swapping the operands
+      ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
+      CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
+      if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
+        std::swap(True, False);
+        std::swap(LHS, RHS);
+        CC = DAG.getCondCode(CCSwapped);
+      }
+    }
+  }
+  if (isZero(RHS)) {
+    SDValue Cond = LHS;
+    SDValue Zero = RHS;
     ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
     if (CompareVT != VT) {
       // Bitcast True / False to the correct types.  This will end up being
@@ -880,20 +915,11 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
       True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
       False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
     }
-    if (isZero(LHS)) {
-      CCOpcode = ISD::getSetCCSwappedOperands(CCOpcode);
-    }
 
     switch (CCOpcode) {
     case ISD::SETONE:
     case ISD::SETUNE:
     case ISD::SETNE:
-    case ISD::SETULE:
-    case ISD::SETULT:
-    case ISD::SETOLE:
-    case ISD::SETOLT:
-    case ISD::SETLE:
-    case ISD::SETLT:
       CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
       Temp = True;
       True = False;
@@ -1567,14 +1593,18 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
       LHSCC = ISD::getSetCCInverse(LHSCC,
                                   LHS.getOperand(0).getValueType().isInteger());
-      return DAG.getSelectCC(SDLoc(N),
-                             LHS.getOperand(0),
-                             LHS.getOperand(1),
-                             LHS.getOperand(2),
-                             LHS.getOperand(3),
-                             LHSCC);
+      if (DCI.isBeforeLegalizeOps() ||
+          isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
+        return DAG.getSelectCC(SDLoc(N),
+                               LHS.getOperand(0),
+                               LHS.getOperand(1),
+                               LHS.getOperand(2),
+                               LHS.getOperand(3),
+                               LHSCC);
+      break;
     }
     }
+    return SDValue();
   }
 
   case AMDGPUISD::EXPORT: {
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 24bc6b0..65ea04b 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -2324,54 +2324,6 @@ def KIL : Pat <
   (MASK_WRITE (KILLGT (f32 ZERO), $src0))
 >;
 
-// SGT Reverse args
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LT),
-  (SGT $src1, $src0)
->;
-
-// SGE Reverse args
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_LE),
-  (SGE $src1, $src0)
->;
-
-// SETGT_DX10 reverse args
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LT),
-  (SETGT_DX10 $src1, $src0)
->;
-
-// SETGE_DX10 reverse args
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, -1, 0, COND_LE),
-  (SETGE_DX10 $src1, $src0)
->;
-
-// SETGT_INT reverse args
-def : Pat <
-  (selectcc i32:$src0, i32:$src1, -1, 0, SETLT),
-  (SETGT_INT $src1, $src0)
->;
-
-// SETGE_INT reverse args
-def : Pat <
-  (selectcc i32:$src0, i32:$src1, -1, 0, SETLE),
-  (SETGE_INT $src1, $src0)
->;
-
-// SETGT_UINT reverse args
-def : Pat <
-  (selectcc i32:$src0, i32:$src1, -1, 0, SETULT),
-  (SETGT_UINT $src1, $src0)
->;
-
-// SETGE_UINT reverse args
-def : Pat <
-  (selectcc i32:$src0, i32:$src1, -1, 0, SETULE),
-  (SETGE_UINT $src1, $src0)
->;
-
 // The next two patterns are special cases for handling 'true if ordered' and
 // 'true if unordered' conditionals.  The assumption here is that the behavior of
 // SETE and SNE conforms to the Direct3D 10 rules for floating point values
-- 
1.7.11.4

-------------- next part --------------
>From d62adcc8de93d4986535c2cd4ef5113a2afdc676 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Thu, 12 Sep 2013 08:18:29 -0700
Subject: [PATCH 3/4] SelectionDAG: Improve legalization of SELECT_CC with
 illegal condition codes

SelectionDAG will now attempt to inverse an illegal conditon in order to
find a legal one and if that doesn't work, it will attempt to swap the
operands using the inverted condition.

There are no new test cases for this, but a nubmer of the existing R600
tests hit this path.
---
 lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 50 +++++++++++++++++++++++---------
 lib/Target/R600/R600ISelLowering.cpp     | 16 +++++++---
 2 files changed, 49 insertions(+), 17 deletions(-)

diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f6406b2..e78caba 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3664,20 +3664,44 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Tmp4 = Node->getOperand(3);   // False
     SDValue CC = Node->getOperand(4);
 
-    bool Legalized = LegalizeSetCCCondCode(
-        getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
-
-    assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
-    // If we exapanded the SETCC by swapping LHS and RHS, create a new SELECT_CC
-    // node and return it.
-    if (CC.getNode()) {
-      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
-                         Tmp1, Tmp2, Tmp3, Tmp4, CC);
+    bool Legalized = false;
+    // Try to legalize by inverting the condition.  This is for targets that
+    // might support an ordered version of a condition, but not the unordered
+    // version (or vice versa).
+    ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                               Tmp1.getValueType().isInteger());
+    if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) {
+      // Use the new condition code and swap true and false
+      Legalized = true;
+      Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
     } else {
-      Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
-      CC = DAG.getCondCode(ISD::SETNE);
-      Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
-                         Tmp3, Tmp4, CC);
+      // If The inverse is not legal, then try to swap the arguments using
+      // the inverse condition code.
+      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC);
+      if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) {
+        // The swapped inverse condition is legal, so swap true and false,
+        // lhs and rhs.
+        Legalized = true;
+        Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+      }
+    }
+
+    if (!Legalized) {
+      Legalized = LegalizeSetCCCondCode(
+          getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, dl);
+
+      assert(Legalized && "Can't legalize SELECT_CC with legal condition!");
+      // If we exapanded the SETCC by swapping LHS and RHS, create a new
+      // SELECT_CC node.
+      if (CC.getNode()) {
+        Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0),
+                           Tmp1, Tmp2, Tmp3, Tmp4, CC);
+      } else {
+        Tmp2 = DAG.getConstant(0, Tmp1.getValueType());
+        CC = DAG.getCondCode(ISD::SETNE);
+        Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2,
+                           Tmp3, Tmp4, CC);
+      }
     }
     Results.push_back(Tmp1);
     break;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 778ee59..6a02bdb 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -862,10 +862,18 @@ SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
   ISD::CondCode InverseCC =
      ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
-  if (isHWTrueValue(False) && isHWFalseValue(True) &&
-      isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
-    std::swap(False, True);
-    CC = DAG.getCondCode(InverseCC);
+  if (isHWTrueValue(False) && isHWFalseValue(True)) {
+    if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
+      std::swap(False, True);
+      CC = DAG.getCondCode(InverseCC);
+    } else {
+      ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
+      if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
+        std::swap(False, True);
+        std::swap(LHS, RHS);
+        CC = DAG.getCondCode(SwapInvCC);
+      }
+    }
   }
 
   if (isHWTrueValue(True) && isHWFalseValue(False) &&
-- 
1.7.11.4

-------------- next part --------------
>From 859f3f6346bb8b5d7bfb181cca9f103ac18f116f Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Thu, 12 Sep 2013 08:18:38 -0700
Subject: [PATCH 4/4] R600: Fix handling of NAN in comparison instructions

We were completely ignoring the unorder/ordered attributes of condition
codes and also incorrectly lowering seto and setuo.
---
 lib/Target/R600/AMDGPUInstructions.td | 21 ++++++++++++
 lib/Target/R600/R600ISelLowering.cpp  |  9 +++++-
 lib/Target/R600/R600Instructions.td   | 54 +++++++------------------------
 test/CodeGen/R600/fmax.ll             |  2 +-
 test/CodeGen/R600/kcache-fold.ll      | 16 +++++-----
 test/CodeGen/R600/pv.ll               |  2 +-
 test/CodeGen/R600/selectcc-opt.ll     |  4 +--
 test/CodeGen/R600/set-dx10.ll         | 60 +++++++++++++++++------------------
 test/CodeGen/R600/unsupported-cc.ll   | 60 +++++++++++++++++++++++++----------
 test/CodeGen/R600/vselect.ll          |  4 +--
 10 files changed, 127 insertions(+), 105 deletions(-)

diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
index e30abc0..5778a8c 100644
--- a/lib/Target/R600/AMDGPUInstructions.td
+++ b/lib/Target/R600/AMDGPUInstructions.td
@@ -43,12 +43,23 @@ def COND_EQ : PatLeaf <
                      case ISD::SETEQ: return true;}}}]
 >;
 
+def COND_OEQ : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}]
+>;
+
 def COND_NE : PatLeaf <
   (cond),
   [{switch(N->get()){{default: return false;
                      case ISD::SETONE: case ISD::SETUNE:
                      case ISD::SETNE: return true;}}}]
 >;
+
+def COND_UNE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}]
+>;
+
 def COND_GT : PatLeaf <
   (cond),
   [{switch(N->get()){{default: return false;
@@ -56,6 +67,11 @@ def COND_GT : PatLeaf <
                      case ISD::SETGT: return true;}}}]
 >;
 
+def COND_OGT : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}]
+>;
+
 def COND_GE : PatLeaf <
   (cond),
   [{switch(N->get()){{default: return false;
@@ -63,6 +79,11 @@ def COND_GE : PatLeaf <
                      case ISD::SETGE: return true;}}}]
 >;
 
+def COND_OGE : PatLeaf <
+  (cond),
+  [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}]
+>;
+
 def COND_LT : PatLeaf <
   (cond),
   [{switch(N->get()){{default: return false;
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index 6a02bdb..a8e8a77 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -38,10 +38,17 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
 
   computeRegisterProperties();
 
-  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
+  // Set condition code actions
+  setCondCodeAction(ISD::SETO,   MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUO,  MVT::f32, Expand);
   setCondCodeAction(ISD::SETLT,  MVT::f32, Expand);
+  setCondCodeAction(ISD::SETLE,  MVT::f32, Expand);
   setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
+  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
   setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
 
diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
index 65ea04b..e92385d 100644
--- a/lib/Target/R600/R600Instructions.td
+++ b/lib/Target/R600/R600Instructions.td
@@ -689,42 +689,42 @@ def MIN : R600_2OP_Helper <0x4, "MIN", AMDGPUfmin>;
 // XXX: Use the defs in TargetSelectionDAG.td instead of intrinsics.
 def SETE : R600_2OP <
   0x08, "SETE",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_EQ))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OEQ))]
 >;
 
 def SGT : R600_2OP <
   0x09, "SETGT",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GT))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGT))]
 >;
 
 def SGE : R600_2OP <
   0xA, "SETGE",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_GE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_OGE))]
 >;
 
 def SNE : R600_2OP <
   0xB, "SETNE",
-  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_NE))]
+  [(set f32:$dst, (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, COND_UNE))]
 >;
 
 def SETE_DX10 : R600_2OP <
   0xC, "SETE_DX10",
-  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_EQ))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OEQ))]
 >;
 
 def SETGT_DX10 : R600_2OP <
   0xD, "SETGT_DX10",
-  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GT))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGT))]
 >;
 
 def SETGE_DX10 : R600_2OP <
   0xE, "SETGE_DX10",
-  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_GE))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_OGE))]
 >;
 
 def SETNE_DX10 : R600_2OP <
   0xF, "SETNE_DX10",
-  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_NE))]
+  [(set i32:$dst, (selectcc f32:$src0, f32:$src1, -1, 0, COND_UNE))]
 >;
 
 def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>;
@@ -920,19 +920,19 @@ class MULADD_IEEE_Common <bits<5> inst> : R600_3OP <
 
 class CNDE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDE",
-  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_EQ))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OEQ))]
 >;
 
 class CNDGT_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGT",
-  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GT))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGT))]
 > {
   let Itinerary = VecALU;
 }
 
 class CNDGE_Common <bits<5> inst> : R600_3OP <
   inst, "CNDGE",
-  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_GE))]
+  [(set f32:$dst, (selectcc f32:$src0, FP_ZERO, f32:$src1, f32:$src2, COND_OGE))]
 > {
   let Itinerary = VecALU;
 }
@@ -2324,38 +2324,6 @@ def KIL : Pat <
   (MASK_WRITE (KILLGT (f32 ZERO), $src0))
 >;
 
-// The next two patterns are special cases for handling 'true if ordered' and
-// 'true if unordered' conditionals.  The assumption here is that the behavior of
-// SETE and SNE conforms to the Direct3D 10 rules for floating point values
-// described here:
-// http://msdn.microsoft.com/en-us/library/windows/desktop/cc308050.aspx#alpha_32_bit
-// We assume that  SETE returns false when one of the operands is NAN and
-// SNE returns true when on of the operands is NAN
-
-//SETE - 'true if ordered'
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETO),
-  (SETE $src0, $src1)
->;
-
-//SETE_DX10 - 'true if ordered'
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, -1, 0, SETO),
-  (SETE_DX10 $src0, $src1)
->;
-
-//SNE - 'true if unordered'
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, FP_ONE, FP_ZERO, SETUO),
-  (SNE $src0, $src1)
->;
-
-//SETNE_DX10 - 'true if ordered'
-def : Pat <
-  (selectcc f32:$src0, f32:$src1, -1, 0, SETUO),
-  (SETNE_DX10 $src0, $src1)
->;
-
 def : Extract_Element <f32, v4f32, 0, sub0>;
 def : Extract_Element <f32, v4f32, 1, sub1>;
 def : Extract_Element <f32, v4f32, 2, sub2>;
diff --git a/test/CodeGen/R600/fmax.ll b/test/CodeGen/R600/fmax.ll
index 8b704e5..be25c9c 100644
--- a/test/CodeGen/R600/fmax.ll
+++ b/test/CodeGen/R600/fmax.ll
@@ -5,7 +5,7 @@
 define void @test() {
    %r0 = call float @llvm.R600.load.input(i32 0)
    %r1 = call float @llvm.R600.load.input(i32 1)
-   %r2 = fcmp uge float %r0, %r1
+   %r2 = fcmp oge float %r0, %r1
    %r3 = select i1 %r2, float %r0, float %r1
    call void @llvm.AMDGPU.store.output(float %r3, i32 0)
    ret void
diff --git a/test/CodeGen/R600/kcache-fold.ll b/test/CodeGen/R600/kcache-fold.ll
index 8bdb050..0baa3cd 100644
--- a/test/CodeGen/R600/kcache-fold.ll
+++ b/test/CodeGen/R600/kcache-fold.ll
@@ -10,7 +10,7 @@ main_body:
   %3 = extractelement <4 x float> %2, i32 0
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %5 = extractelement <4 x float> %4, i32 0
-  %6 = fcmp ult float %1, 0.000000e+00
+  %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
   %8 = load <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
@@ -18,7 +18,7 @@ main_body:
   %11 = extractelement <4 x float> %10, i32 1
   %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
-  %14 = fcmp ult float %9, 0.000000e+00
+  %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
   %16 = load <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
@@ -26,7 +26,7 @@ main_body:
   %19 = extractelement <4 x float> %18, i32 2
   %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %21 = extractelement <4 x float> %20, i32 2
-  %22 = fcmp ult float %17, 0.000000e+00
+  %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
   %24 = load <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
@@ -34,7 +34,7 @@ main_body:
   %27 = extractelement <4 x float> %26, i32 3
   %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 3
-  %30 = fcmp ult float %25, 0.000000e+00
+  %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
@@ -58,7 +58,7 @@ main_body:
   %3 = extractelement <4 x float> %2, i32 0
   %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %5 = extractelement <4 x float> %4, i32 1
-  %6 = fcmp ult float %1, 0.000000e+00
+  %6 = fcmp ogt float %1, 0.000000e+00
   %7 = select i1 %6, float %3, float %5
   %8 = load <4 x float> addrspace(8)* null
   %9 = extractelement <4 x float> %8, i32 1
@@ -66,7 +66,7 @@ main_body:
   %11 = extractelement <4 x float> %10, i32 0
   %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %13 = extractelement <4 x float> %12, i32 1
-  %14 = fcmp ult float %9, 0.000000e+00
+  %14 = fcmp ogt float %9, 0.000000e+00
   %15 = select i1 %14, float %11, float %13
   %16 = load <4 x float> addrspace(8)* null
   %17 = extractelement <4 x float> %16, i32 2
@@ -74,7 +74,7 @@ main_body:
   %19 = extractelement <4 x float> %18, i32 3
   %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
   %21 = extractelement <4 x float> %20, i32 2
-  %22 = fcmp ult float %17, 0.000000e+00
+  %22 = fcmp ogt float %17, 0.000000e+00
   %23 = select i1 %22, float %19, float %21
   %24 = load <4 x float> addrspace(8)* null
   %25 = extractelement <4 x float> %24, i32 3
@@ -82,7 +82,7 @@ main_body:
   %27 = extractelement <4 x float> %26, i32 3
   %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
   %29 = extractelement <4 x float> %28, i32 2
-  %30 = fcmp ult float %25, 0.000000e+00
+  %30 = fcmp ogt float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
diff --git a/test/CodeGen/R600/pv.ll b/test/CodeGen/R600/pv.ll
index 6e0b744..6d9396c 100644
--- a/test/CodeGen/R600/pv.ll
+++ b/test/CodeGen/R600/pv.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=r600 | FileCheck %s
 
 ;CHECK: DOT4 * T{{[0-9]\.W}} (MASKED)
-;CHECK: CNDGE T{{[0-9].[XYZW]}}, PV.X
+;CHECK: MAX T{{[0-9].[XYZW]}}, 0.0, PV.X
 
 define void @main() #0 {
 main_body:
diff --git a/test/CodeGen/R600/selectcc-opt.ll b/test/CodeGen/R600/selectcc-opt.ll
index 7e2d559..834c030 100644
--- a/test/CodeGen/R600/selectcc-opt.ll
+++ b/test/CodeGen/R600/selectcc-opt.ll
@@ -6,7 +6,7 @@
 
 define void @test_a(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 0.000000e+00
+  %0 = fcmp olt float %in, 0.000000e+00
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -34,7 +34,7 @@ ENDIF:
 ; CHECK-NEXT: ALU clause starting
 define void @test_b(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 0.0
+  %0 = fcmp olt float %in, 0.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
diff --git a/test/CodeGen/R600/set-dx10.ll b/test/CodeGen/R600/set-dx10.ll
index bdc2ff4..5c7d499 100644
--- a/test/CodeGen/R600/set-dx10.ll
+++ b/test/CodeGen/R600/set-dx10.ll
@@ -30,13 +30,13 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ueq_select_fptosi
+; CHECK: @fcmp_oeq_select_fptosi
 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ueq float %in, 5.0
+  %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -44,25 +44,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ueq_select_i32
+; CHECK: @fcmp_oeq_select_i32
 ; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ueq_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ueq float %in, 5.0
+  %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ugt_select_fptosi
+; CHECK: @fcmp_ogt_select_fptosi
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ugt float %in, 5.0
+  %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -70,25 +70,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ugt_select_i32
+; CHECK: @fcmp_ogt_select_i32
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ugt_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ugt float %in, 5.0
+  %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_uge_select_fptosi
+; CHECK: @fcmp_oge_select_fptosi
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp uge float %in, 5.0
+  %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -96,25 +96,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_uge_select_i32
+; CHECK: @fcmp_oge_select_i32
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_uge_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp uge float %in, 5.0
+  %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ule_select_fptosi
+; CHECK: @fcmp_ole_select_fptosi
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ule float %in, 5.0
+  %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -122,25 +122,25 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ule_select_i32
+; CHECK: @fcmp_ole_select_i32
 ; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ule_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ule float %in, 5.0
+  %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
 }
 
-; CHECK: @fcmp_ult_select_fptosi
+; CHECK: @fcmp_olt_select_fptosi
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_fptosi(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 5.0
+  %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
   %2 = fsub float -0.000000e+00, %1
   %3 = fptosi float %2 to i32
@@ -148,13 +148,13 @@ entry:
   ret void
 }
 
-; CHECK: @fcmp_ult_select_i32
+; CHECK: @fcmp_olt_select_i32
 ; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 1084227584(5.000000e+00)
-define void @fcmp_ult_select_i32(i32 addrspace(1)* %out, float %in) {
+define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
 entry:
-  %0 = fcmp ult float %in, 5.0
+  %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, i32 -1, i32 0
   store i32 %1, i32 addrspace(1)* %out
   ret void
diff --git a/test/CodeGen/R600/unsupported-cc.ll b/test/CodeGen/R600/unsupported-cc.ll
index d3aa060..f986a02 100644
--- a/test/CodeGen/R600/unsupported-cc.ll
+++ b/test/CodeGen/R600/unsupported-cc.ll
@@ -2,7 +2,7 @@
 
 ; These tests are for condition codes that are not supported by the hardware
 
-; CHECK: @slt
+; CHECK-LABEL: @slt
 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
@@ -14,7 +14,7 @@ entry:
   ret void
 }
 
-; CHECK: @ult_i32
+; CHECK-LABEL: @ult_i32
 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 5(7.006492e-45)
@@ -26,10 +26,11 @@ entry:
   ret void
 }
 
-; CHECK: @ult_float
-; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK-LABEL: @ult_float
+; CHECK: SETGE * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
 define void @ult_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ult float %in, 5.0
@@ -38,10 +39,22 @@ entry:
   ret void
 }
 
-; CHECK: @olt
-; CHECK: SETGT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
-;CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-LABEL: @ult_float_native
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ult_float_native(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ult float %in, 5.0
+  %1 = select i1 %0, float 0.0, float 1.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @olt
+; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
 define void @olt(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp olt float %in, 5.0
@@ -50,7 +63,7 @@ entry:
   ret void
 }
 
-; CHECK: @sle
+; CHECK-LABEL: @sle
 ; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
@@ -62,7 +75,7 @@ entry:
   ret void
 }
 
-; CHECK: @ule_i32
+; CHECK-LABEL: @ule_i32
 ; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 ; CHECK-NEXT: LSHR
 ; CHECK-NEXT: 6(8.407791e-45)
@@ -74,10 +87,11 @@ entry:
   ret void
 }
 
-; CHECK: @ule_float
-; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK-LABEL: @ule_float
+; CHECK: SETGT * T{{[0-9]}}.[[CHAN:[XYZW]]], KC0[2].Z, literal.x
 ; CHECK-NEXT: 1084227584(5.000000e+00)
+; CHECK-NEXT: SETE T{{[0-9]\.[XYZW]}}, PV.[[CHAN]], 0.0
+; CHECK-NEXT: LSHR *
 define void @ule_float(float addrspace(1)* %out, float %in) {
 entry:
   %0 = fcmp ule float %in, 5.0
@@ -86,9 +100,21 @@ entry:
   ret void
 }
 
-; CHECK: @ole
-; CHECK: SETGE {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
-; CHECK-NEXT: LSHR
+; CHECK-LABEL: @ule_float_native
+; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
+; CHECK-NEXT: LSHR *
+; CHECK-NEXT: 1084227584(5.000000e+00)
+define void @ule_float_native(float addrspace(1)* %out, float %in) {
+entry:
+  %0 = fcmp ule float %in, 5.0
+  %1 = select i1 %0, float 0.0, float 1.0
+  store float %1, float addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @ole
+; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
+; CHECK-NEXT: LSHR *
 ; CHECK-NEXT:1084227584(5.000000e+00)
 define void @ole(float addrspace(1)* %out, float %in) {
 entry:
diff --git a/test/CodeGen/R600/vselect.ll b/test/CodeGen/R600/vselect.ll
index 8e9c5b5..ee17e0f 100644
--- a/test/CodeGen/R600/vselect.ll
+++ b/test/CodeGen/R600/vselect.ll
@@ -31,7 +31,7 @@ define void @test_select_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrs
 entry:
   %0 = load <2 x float> addrspace(1)* %in0
   %1 = load <2 x float> addrspace(1)* %in1
-  %cmp = fcmp one <2 x float> %0, %1
+  %cmp = fcmp une <2 x float> %0, %1
   %result = select <2 x i1> %cmp, <2 x float> %0, <2 x float> %1
   store <2 x float> %result, <2 x float> addrspace(1)* %out
   ret void
@@ -69,7 +69,7 @@ define void @test_select_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrs
 entry:
   %0 = load <4 x float> addrspace(1)* %in0
   %1 = load <4 x float> addrspace(1)* %in1
-  %cmp = fcmp one <4 x float> %0, %1
+  %cmp = fcmp une <4 x float> %0, %1
   %result = select <4 x i1> %cmp, <4 x float> %0, <4 x float> %1
   store <4 x float> %result, <4 x float> addrspace(1)* %out
   ret void
-- 
1.7.11.4



More information about the llvm-commits mailing list