[llvm] r214714 - [PowerPC] Fix and improve vector comparisons

Mon Aug 4 06:13:57 PDT 2014

Author: uweigand
Date: Mon Aug  4 08:13:57 2014
New Revision: 214714

URL: http://llvm.org/viewvc/llvm-project?rev=214714&view=rev
Log:
[PowerPC] Fix and improve vector comparisons

This patch refactors code generation of vector comparisons.

This fixes a wrong code-gen bug for ISD::SETGE for floating-point types,
and improves generated code for vector comparisons in general.

Specifically, the patch moves all logic deciding how to implement vector
comparisons into getVCmpInst, which gets two extra boolean outputs
indicating to its caller whether its needs to swap the input operands
and/or negate the result of the comparison.  Apart from implementing
these two modifications as directed by getVCmpInst, there is no need
to ever implement vector comparisons in any other manner; in particular,
there is never a need to perform two separate comparisons (e.g. one for
equal and one for greater-than, as code used to do before this patch).

Reviewed by Bill Schmidt.


Modified:
    llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/test/CodeGen/PowerPC/vec_cmp.ll

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp?rev=214714&r1=214713&r2=214714&view=diff
==============================================================================

--- llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelDAGToDAG.cpp Mon Aug  4 08:13:57 2014
@@ -663,94 +663,105 @@ static unsigned getCRIdxForSetCC(ISD::Co
 // getVCmpInst: return the vector compare instruction for the specified
 // vector type and condition code. Since this is for altivec specific code,
 // only support the altivec types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC,
-                                bool HasVSX) {
-  switch (CC) {
-    case ISD::SETEQ:
-    case ISD::SETUEQ:
-    case ISD::SETNE:
-    case ISD::SETUNE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPEQUB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPEQUH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPEQUW;
-      // v4f32 != v4f32 could be translate to unordered not equal
-      else if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPEQDP;
-      break;
-    case ISD::SETLT:
-    case ISD::SETGT:
-    case ISD::SETLE:
-    case ISD::SETGE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPGTSB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPGTSH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPGTSW;
-      else if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGTDP;
-      break;
-    case ISD::SETULT:
-    case ISD::SETUGT:
-    case ISD::SETUGE:
-    case ISD::SETULE:
-      if (VecVT == MVT::v16i8)
-        return PPC::VCMPGTUB;
-      else if (VecVT == MVT::v8i16)
-        return PPC::VCMPGTUH;
-      else if (VecVT == MVT::v4i32)
-        return PPC::VCMPGTUW;
-      break;
-    case ISD::SETOEQ:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPEQDP;
-      break;
-    case ISD::SETOLT:
-    case ISD::SETOGT:
-    case ISD::SETOLE:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGTDP;
-      break;
-    case ISD::SETOGE:
-      if (VecVT == MVT::v4f32)
-        return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
-      else if (VecVT == MVT::v2f64)
-        return PPC::XVCMPGEDP;
-      break;
-    default:
-      break;
-  }
-  llvm_unreachable("Invalid integer vector compare condition");
-}
+static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
+                                bool HasVSX, bool &Swap, bool &Negate) {
+  Swap = false;
+  Negate = false;
 
-// getVCmpEQInst: return the equal compare instruction for the specified vector
-// type. Since this is for altivec specific code, only support the altivec
-// types (v16i8, v8i16, v4i32, and v4f32).
-static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT, bool HasVSX) {
-  switch (VecVT) {
-    case MVT::v16i8:
-      return PPC::VCMPEQUB;
-    case MVT::v8i16:
-      return PPC::VCMPEQUH;
-    case MVT::v4i32:
-      return PPC::VCMPEQUW;
-    case MVT::v4f32:
-      return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
-    case MVT::v2f64:
-      return PPC::XVCMPEQDP;
-    default:
-      llvm_unreachable("Invalid integer vector compare condition");
+  if (VecVT.isFloatingPoint()) {
+    /* Handle some cases by swapping input operands.  */
+    switch (CC) {
+      case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
+      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+      case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
+      case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
+      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+      case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
+      default: break;
+    }
+    /* Handle some cases by negating the result.  */
+    switch (CC) {
+      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+      case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
+      case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
+      case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
+      default: break;
+    }
+    /* We have instructions implementing the remaining cases.  */
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETOEQ:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPEQDP;
+        break;
+      case ISD::SETGT:
+      case ISD::SETOGT:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPGTDP;
+        break;
+      case ISD::SETGE:
+      case ISD::SETOGE:
+        if (VecVT == MVT::v4f32)
+          return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
+        else if (VecVT == MVT::v2f64)
+          return PPC::XVCMPGEDP;
+        break;
+      default:
+        break;
+    }
+    llvm_unreachable("Invalid floating-point vector compare condition");
+  } else {
+    /* Handle some cases by swapping input operands.  */
+    switch (CC) {
+      case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
+      case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
+      case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
+      case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
+      default: break;
+    }
+    /* Handle some cases by negating the result.  */
+    switch (CC) {
+      case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
+      case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
+      case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
+      case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
+      default: break;
+    }
+    /* We have instructions implementing the remaining cases.  */
+    switch (CC) {
+      case ISD::SETEQ:
+      case ISD::SETUEQ:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPEQUB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPEQUH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPEQUW;
+        break;
+      case ISD::SETGT:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPGTSB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPGTSH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPGTSW;
+        break;
+      case ISD::SETUGT:
+        if (VecVT == MVT::v16i8)
+          return PPC::VCMPGTUB;
+        else if (VecVT == MVT::v8i16)
+          return PPC::VCMPGTUH;
+        else if (VecVT == MVT::v4i32)
+          return PPC::VCMPGTUW;
+        break;
+      default:
+        break;
+    }
+    llvm_unreachable("Invalid integer vector compare condition");
   }
 }
 
@@ -842,60 +853,20 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDN
   // vector compare operations return the same type as the operands.
   if (LHS.getValueType().isVector()) {
     EVT VecVT = LHS.getValueType();
-    MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy;
-    unsigned int VCmpInst = getVCmpInst(VT, CC, PPCSubTarget->hasVSX());
-
-    switch (CC) {
-      case ISD::SETEQ:
-      case ISD::SETOEQ:
-      case ISD::SETUEQ:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-      case ISD::SETNE:
-      case ISD::SETONE:
-      case ISD::SETUNE: {
-        SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
-        return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
-                                                               PPC::VNOR,
-                                    VecVT, VCmp, VCmp);
-      } 
-      case ISD::SETLT:
-      case ISD::SETOLT:
-      case ISD::SETULT:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS);
-      case ISD::SETGT:
-      case ISD::SETOGT:
-      case ISD::SETUGT:
-        return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-      case ISD::SETGE:
-      case ISD::SETOGE:
-      case ISD::SETUGE: {
-        // Small optimization: Altivec provides a 'Vector Compare Greater Than
-        // or Equal To' instruction (vcmpgefp), so in this case there is no
-        // need for extra logic for the equal compare.
-        if (VecVT.getSimpleVT().isFloatingPoint()) {
-          return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
-        } else {
-          SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
-          unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
-          SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
-          return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
-                                                                 PPC::VOR,
-                                      VecVT, VCmpGT, VCmpEQ);
-        }
-      }
-      case ISD::SETLE:
-      case ISD::SETOLE:
-      case ISD::SETULE: {
-        SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0);
-        unsigned int VCmpEQInst = getVCmpEQInst(VT, PPCSubTarget->hasVSX());
-        SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0);
-        return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLOR :
-                                                               PPC::VOR,
-                                    VecVT, VCmpLE, VCmpEQ);
-      }
-      default:
-        llvm_unreachable("Invalid vector compare type: should be expanded by legalize");
+    bool Swap, Negate;
+    unsigned int VCmpInst = getVCmpInst(VecVT.getSimpleVT(), CC,
+                                        PPCSubTarget->hasVSX(), Swap, Negate);
+    if (Swap)
+      std::swap(LHS, RHS);
+
+    if (Negate) {
+      SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0);
+      return CurDAG->SelectNodeTo(N, PPCSubTarget->hasVSX() ? PPC::XXLNOR :
+                                                              PPC::VNOR,
+                                  VecVT, VCmp, VCmp);
     }
+
+    return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS);
   }
 
   if (PPCSubTarget->useCRBits())

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=214714&r1=214713&r2=214714&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Mon Aug  4 08:13:57 2014
@@ -526,11 +526,6 @@ PPCTargetLowering::PPCTargetLowering(PPC
     // Altivec does not contain unordered floating-point compare instructions
     setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
-    setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
-
     setCondCodeAction(ISD::SETO,   MVT::v4f32, Expand);
     setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
 
@@ -561,11 +556,6 @@ PPCTargetLowering::PPCTargetLowering(PPC
       // Share the Altivec comparison restrictions.
       setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETUGT, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETUGE, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETULT, MVT::v2f64, Expand);
-      setCondCodeAction(ISD::SETULE, MVT::v2f64, Expand);
-
       setCondCodeAction(ISD::SETO,   MVT::v2f64, Expand);
       setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
 

Modified: llvm/trunk/test/CodeGen/PowerPC/vec_cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/vec_cmp.ll?rev=214714&r1=214713&r2=214714&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/vec_cmp.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/vec_cmp.ll Mon Aug  4 08:13:57 2014
@@ -63,9 +63,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16si8_cmp_le:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtsb [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
@@ -74,9 +73,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16ui8_cmp_le:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtub [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
@@ -121,9 +119,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16si8_cmp_ge:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtsb [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone {
 entry:
@@ -132,9 +129,8 @@ entry:
   ret <16 x i8> %sext
 }
 ; CHECK-LABEL:      v16ui8_cmp_ge:
-; CHECK:      vcmpequb [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtub [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 
 define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone {
@@ -193,9 +189,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8si16_cmp_le:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtsh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -204,9 +199,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8ui16_cmp_le:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtuh [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -251,9 +245,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8si16_cmp_ge:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtsh [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone {
 entry:
@@ -262,9 +255,8 @@ entry:
   ret <8 x i16> %sext
 }
 ; CHECK-LABEL:      v8ui16_cmp_ge:
-; CHECK:      vcmpequh [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtuh [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 
 define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone {
@@ -326,9 +318,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4si32_cmp_le:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtsw [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -337,9 +328,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4ui32_cmp_le:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK:      vcmpgtuw [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -384,9 +374,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4si32_cmp_ge:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtsw [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone {
 entry:
@@ -395,9 +384,8 @@ entry:
   ret <4 x i32> %sext
 }
 ; CHECK-LABEL:      v4ui32_cmp_ge:
-; CHECK:      vcmpequw [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3
-; CHECK-NEXT: vor      2, [[RCMPGT]], [[RCMPEQ]]
+; CHECK:      vcmpgtuw [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
 
 
 define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone {
@@ -480,9 +468,7 @@ entry:
   ret <4 x float> %0
 }
 ; CHECK-LABEL:      v4f32_cmp_le:
-; CHECK:      vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3
-; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2
-; CHECK-NEXT: vor      2, [[RCMPLE]], [[RCMPEQ]]
+; CHECK: vcmpgefp 2, 3, 2
 
 define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone {
 entry:
@@ -514,6 +500,50 @@ entry:
 ; CHECK-LABEL: v4f32_cmp_gt:
 ; CHECK: vcmpgtfp 2, 2, 3
 
+define <4 x float> @v4f32_cmp_ule(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ule <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ule:
+; CHECK:      vcmpgtfp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_ult(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ult <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ult:
+; CHECK:      vcmpgefp [[RET:[0-9]+]], 2, 3
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_uge(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp uge <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_uge:
+; CHECK:      vcmpgtfp [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
+define <4 x float> @v4f32_cmp_ugt(<4 x float> %x, <4 x float> %y) nounwind readnone {
+entry:
+  %cmp = fcmp ugt <4 x float> %x, %y
+  %sext = sext <4 x i1> %cmp to <4 x i32>
+  %0 = bitcast <4 x i32> %sext to <4 x float>
+  ret <4 x float> %0
+}
+; CHECK-LABEL: v4f32_cmp_ugt:
+; CHECK:      vcmpgefp [[RET:[0-9]+]], 3, 2
+; CHECK-NEXT: vnor     2, [[RET]], [[RET]]
+
 
 define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {
 entry: