[llvm] 0278c9c - [VE] Change the way to lower select

Fri Oct 14 16:49:45 PDT 2022

Author: Kazushi (Jam) Marukawa
Date: 2022-10-15T08:49:36+09:00
New Revision: 0278c9ceb6c2475f1b1213fcc4ffaf667c6b7731

URL: https://github.com/llvm/llvm-project/commit/0278c9ceb6c2475f1b1213fcc4ffaf667c6b7731
DIFF: https://github.com/llvm/llvm-project/commit/0278c9ceb6c2475f1b1213fcc4ffaf667c6b7731.diff

LOG: [VE] Change the way to lower select

Change to use VEISD::CMOV in combineSelect for better optimization.
Support VEISD::CMOV in combineTRUNCATE also to optimize trancate.
Merge functions to handle condition codes to VE.h.  And add basic
CMOV patterns to VEInstrInfo.td.  Update regression tests also.

Reviewed By: efocht

Differential Revision: https://reviews.llvm.org/D135878

Added: 
    

Modified: 
    llvm/lib/Target/VE/VE.h
    llvm/lib/Target/VE/VEISelDAGToDAG.cpp
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.td
    llvm/test/CodeGen/VE/Scalar/select.ll
    llvm/test/CodeGen/VE/Scalar/select_cc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h
index 2794d1458be71..4f7ec91682d2f 100644

--- a/llvm/lib/Target/VE/VE.h
+++ b/llvm/lib/Target/VE/VE.h
@@ -16,6 +16,8 @@
 
 #include "MCTargetDesc/VEMCTargetDesc.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -145,6 +147,10 @@ inline static VECC::CondCode stringToVEFCondCode(StringRef S) {
       .Default(VECC::UNKNOWN);
 }
 
+inline static bool isIntVECondCode(VECC::CondCode CC) {
+  return CC < VECC::CC_AF;
+}
+
 inline static unsigned VECondCodeToVal(VECC::CondCode CC) {
   switch (CC) {
   case VECC::CC_IG:
@@ -196,6 +202,80 @@ inline static unsigned VECondCodeToVal(VECC::CondCode CC) {
   }
 }
 
+/// Convert a DAG integer condition code to a VE ICC condition.
+inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unknown integer condition code!");
+  case ISD::SETEQ:
+    return VECC::CC_IEQ;
+  case ISD::SETNE:
+    return VECC::CC_INE;
+  case ISD::SETLT:
+    return VECC::CC_IL;
+  case ISD::SETGT:
+    return VECC::CC_IG;
+  case ISD::SETLE:
+    return VECC::CC_ILE;
+  case ISD::SETGE:
+    return VECC::CC_IGE;
+  case ISD::SETULT:
+    return VECC::CC_IL;
+  case ISD::SETULE:
+    return VECC::CC_ILE;
+  case ISD::SETUGT:
+    return VECC::CC_IG;
+  case ISD::SETUGE:
+    return VECC::CC_IGE;
+  }
+}
+
+/// Convert a DAG floating point condition code to a VE FCC condition.
+inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unknown fp condition code!");
+  case ISD::SETFALSE:
+    return VECC::CC_AF;
+  case ISD::SETEQ:
+  case ISD::SETOEQ:
+    return VECC::CC_EQ;
+  case ISD::SETNE:
+  case ISD::SETONE:
+    return VECC::CC_NE;
+  case ISD::SETLT:
+  case ISD::SETOLT:
+    return VECC::CC_L;
+  case ISD::SETGT:
+  case ISD::SETOGT:
+    return VECC::CC_G;
+  case ISD::SETLE:
+  case ISD::SETOLE:
+    return VECC::CC_LE;
+  case ISD::SETGE:
+  case ISD::SETOGE:
+    return VECC::CC_GE;
+  case ISD::SETO:
+    return VECC::CC_NUM;
+  case ISD::SETUO:
+    return VECC::CC_NAN;
+  case ISD::SETUEQ:
+    return VECC::CC_EQNAN;
+  case ISD::SETUNE:
+    return VECC::CC_NENAN;
+  case ISD::SETULT:
+    return VECC::CC_LNAN;
+  case ISD::SETUGT:
+    return VECC::CC_GNAN;
+  case ISD::SETULE:
+    return VECC::CC_LENAN;
+  case ISD::SETUGE:
+    return VECC::CC_GENAN;
+  case ISD::SETTRUE:
+    return VECC::CC_AT;
+  }
+}
+
 inline static VECC::CondCode VEValToCondCode(unsigned Val, bool IsInteger) {
   if (IsInteger) {
     switch (Val) {
@@ -320,6 +400,22 @@ inline static VERD::RoundingMode VEValToRD(unsigned Val) {
   llvm_unreachable("Invalid branch predicates");
 }
 
+/// getImmVal - get immediate representation of integer value
+inline static uint64_t getImmVal(const ConstantSDNode *N) {
+  return N->getSExtValue();
+}
+
+/// getFpImmVal - get immediate representation of floating point value
+inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
+  const APInt &Imm = N->getValueAPF().bitcastToAPInt();
+  uint64_t Val = Imm.getZExtValue();
+  if (Imm.getBitWidth() == 32) {
+    // Immediate value of float place places at higher bits on VE.
+    Val <<= 32;
+  }
+  return Val;
+}
+
 // MImm - Special immediate value of sequential bit stream of 0 or 1.
 //   See VEInstrInfo.td for details.
 inline static bool isMImmVal(uint64_t Val) {

diff  --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
index 15fd25747ecef..0301b46b2b513 100644
--- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
+++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp
@@ -20,100 +20,6 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-//===----------------------------------------------------------------------===//
-// Instruction Selector Implementation
-//===----------------------------------------------------------------------===//
-
-/// Convert a DAG integer condition code to a VE ICC condition.
-inline static VECC::CondCode intCondCode2Icc(ISD::CondCode CC) {
-  switch (CC) {
-  default:
-    llvm_unreachable("Unknown integer condition code!");
-  case ISD::SETEQ:
-    return VECC::CC_IEQ;
-  case ISD::SETNE:
-    return VECC::CC_INE;
-  case ISD::SETLT:
-    return VECC::CC_IL;
-  case ISD::SETGT:
-    return VECC::CC_IG;
-  case ISD::SETLE:
-    return VECC::CC_ILE;
-  case ISD::SETGE:
-    return VECC::CC_IGE;
-  case ISD::SETULT:
-    return VECC::CC_IL;
-  case ISD::SETULE:
-    return VECC::CC_ILE;
-  case ISD::SETUGT:
-    return VECC::CC_IG;
-  case ISD::SETUGE:
-    return VECC::CC_IGE;
-  }
-}
-
-/// Convert a DAG floating point condition code to a VE FCC condition.
-inline static VECC::CondCode fpCondCode2Fcc(ISD::CondCode CC) {
-  switch (CC) {
-  default:
-    llvm_unreachable("Unknown fp condition code!");
-  case ISD::SETFALSE:
-    return VECC::CC_AF;
-  case ISD::SETEQ:
-  case ISD::SETOEQ:
-    return VECC::CC_EQ;
-  case ISD::SETNE:
-  case ISD::SETONE:
-    return VECC::CC_NE;
-  case ISD::SETLT:
-  case ISD::SETOLT:
-    return VECC::CC_L;
-  case ISD::SETGT:
-  case ISD::SETOGT:
-    return VECC::CC_G;
-  case ISD::SETLE:
-  case ISD::SETOLE:
-    return VECC::CC_LE;
-  case ISD::SETGE:
-  case ISD::SETOGE:
-    return VECC::CC_GE;
-  case ISD::SETO:
-    return VECC::CC_NUM;
-  case ISD::SETUO:
-    return VECC::CC_NAN;
-  case ISD::SETUEQ:
-    return VECC::CC_EQNAN;
-  case ISD::SETUNE:
-    return VECC::CC_NENAN;
-  case ISD::SETULT:
-    return VECC::CC_LNAN;
-  case ISD::SETUGT:
-    return VECC::CC_GNAN;
-  case ISD::SETULE:
-    return VECC::CC_LENAN;
-  case ISD::SETUGE:
-    return VECC::CC_GENAN;
-  case ISD::SETTRUE:
-    return VECC::CC_AT;
-  }
-}
-
-/// getImmVal - get immediate representation of integer value
-inline static uint64_t getImmVal(const ConstantSDNode *N) {
-  return N->getSExtValue();
-}
-
-/// getFpImmVal - get immediate representation of floating point value
-inline static uint64_t getFpImmVal(const ConstantFPSDNode *N) {
-  const APInt &Imm = N->getValueAPF().bitcastToAPInt();
-  uint64_t Val = Imm.getZExtValue();
-  if (Imm.getBitWidth() == 32) {
-    // Immediate value of float place places at higher bits on VE.
-    Val <<= 32;
-  }
-  return Val;
-}
-
 //===--------------------------------------------------------------------===//
 /// VEDAGToDAGISel - VE specific code to select VE machine
 /// instructions for SelectionDAG operations.

diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index 32de4c61ce331..4fe6affe40345 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -914,6 +914,7 @@ VETargetLowering::VETargetLowering(const TargetMachine &TM,
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::TRUNCATE);
+  setTargetDAGCombine(ISD::SELECT);
 
   // Set function alignment to 16 bytes
   setMinFunctionAlignment(Align(16));
@@ -931,6 +932,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((VEISD::NodeType)Opcode) {
   case VEISD::FIRST_NUMBER:
     break;
+    TARGET_NODE_CASE(CMOV)
     TARGET_NODE_CASE(CALL)
     TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
     TARGET_NODE_CASE(EH_SJLJ_SETJMP)
@@ -2685,6 +2687,69 @@ VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   }
 }
 
+static bool isMImm(SDValue V) {
+  EVT VT = V.getValueType();
+  if (VT.isVector())
+    return false;
+
+  if (VT.isInteger()) {
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
+      return isMImmVal(getImmVal(C));
+  } else if (VT.isFloatingPoint()) {
+    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
+      if (VT == MVT::f32) {
+        // Float value places at higher bits, so ignore lower 32 bits.
+        return isMImm32Val(getFpImmVal(C) >> 32);
+      } else if (VT == MVT::f64) {
+        return isMImmVal(getFpImmVal(C));
+      }
+    }
+  }
+  return false;
+}
+
+SDValue VETargetLowering::combineSelect(SDNode *N,
+                                        DAGCombinerInfo &DCI) const {
+  assert(N->getOpcode() == ISD::SELECT &&
+         "Should be called with a SELECT node");
+  ISD::CondCode CC = ISD::CondCode::SETNE;
+  SDValue Cond = N->getOperand(0);
+  SDValue True = N->getOperand(1);
+  SDValue False = N->getOperand(2);
+
+  // We handle only scalar SELECT.
+  EVT VT = N->getValueType(0);
+  if (VT.isVector())
+    return SDValue();
+
+  // Peform combineSelect after leagalize DAG.
+  if (!DCI.isAfterLegalizeDAG())
+    return SDValue();
+
+  EVT VT0 = Cond.getValueType();
+  if (isMImm(True)) {
+    // VE's condition move can handle MImm in True clause, so nothing to do.
+  } else if (isMImm(False)) {
+    // VE's condition move can handle MImm in True clause, so swap True and
+    // False clauses if False has MImm value.  And, update condition code.
+    std::swap(True, False);
+    CC = getSetCCInverse(CC, VT0);
+  }
+
+  SDLoc DL(N);
+  SelectionDAG &DAG = DCI.DAG;
+  VECC::CondCode VECCVal;
+  if (VT0.isFloatingPoint()) {
+    VECCVal = fpCondCode2Fcc(CC);
+  } else {
+    VECCVal = intCondCode2Icc(CC);
+  }
+  SDValue Ops[] = {Cond, True, False,
+                   DAG.getConstant(VECCVal, DL, MVT::i32)};
+  return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
+}
+
+static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
 static bool isI32Insn(const SDNode *User, const SDNode *N) {
   switch (User->getOpcode()) {
   default:
@@ -2717,6 +2782,17 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) {
     if (User->getOperand(2).getNode() != N &&
         User->getOperand(3).getNode() != N)
       return true;
+    return isI32InsnAllUses(User, N);
+  case VEISD::CMOV:
+    // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
+    // However, trunc in true or false clauses is not safe.
+    if (User->getOperand(1).getNode() != N &&
+        User->getOperand(2).getNode() != N &&
+        isa<ConstantSDNode>(User->getOperand(3))) {
+      VECC::CondCode VECCVal = static_cast<VECC::CondCode>(
+          cast<ConstantSDNode>(User->getOperand(3))->getZExtValue());
+      return isIntVECondCode(VECCVal);
+    }
     [[fallthrough]];
   case ISD::AND:
   case ISD::OR:
@@ -2725,33 +2801,39 @@ static bool isI32Insn(const SDNode *User, const SDNode *N) {
   case ISD::CopyToReg:
     // Check all use of selections, bit operations, and copies.  If all of them
     // are safe, optimize truncate to extract_subreg.
-    for (const SDNode *U : User->uses()) {
-      switch (U->getOpcode()) {
-      default:
-        // If the use is an instruction which treats the source operand as i32,
-        // it is safe to avoid truncate here.
-        if (isI32Insn(U, N))
-          continue;
-        break;
-      case ISD::ANY_EXTEND:
-      case ISD::SIGN_EXTEND:
-      case ISD::ZERO_EXTEND: {
-        // Special optimizations to the combination of ext and trunc.
-        // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
-        // since this truncate instruction clears higher 32 bits which is filled
-        // by one of ext instructions later.
-        assert(N->getValueType(0) == MVT::i32 &&
-               "find truncate to not i32 integer");
-        if (User->getOpcode() == ISD::SELECT_CC ||
-            User->getOpcode() == ISD::SELECT)
-          continue;
-        break;
-      }
-      }
-      return false;
+    return isI32InsnAllUses(User, N);
+  }
+}
+
+static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
+  // Check all use of User node.  If all of them are safe, optimize
+  // truncate to extract_subreg.
+  for (const SDNode *U : User->uses()) {
+    switch (U->getOpcode()) {
+    default:
+      // If the use is an instruction which treats the source operand as i32,
+      // it is safe to avoid truncate here.
+      if (isI32Insn(U, N))
+        continue;
+      break;
+    case ISD::ANY_EXTEND:
+    case ISD::SIGN_EXTEND:
+    case ISD::ZERO_EXTEND: {
+      // Special optimizations to the combination of ext and trunc.
+      // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
+      // since this truncate instruction clears higher 32 bits which is filled
+      // by one of ext instructions later.
+      assert(N->getValueType(0) == MVT::i32 &&
+             "find truncate to not i32 integer");
+      if (User->getOpcode() == ISD::SELECT_CC ||
+          User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
+        continue;
+      break;
     }
-    return true;
+    }
+    return false;
   }
+  return true;
 }
 
 // Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
@@ -2801,6 +2883,8 @@ SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
   switch (N->getOpcode()) {
   default:
     break;
+  case ISD::SELECT:
+    return combineSelect(N, DCI);
   case ISD::TRUNCATE:
     return combineTRUNCATE(N, DCI);
   }

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index b9a29e4362d64..6b8c37dc109d9 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -24,6 +24,8 @@ namespace VEISD {
 enum NodeType : unsigned {
   FIRST_NUMBER = ISD::BUILTIN_OP_END,
 
+  CMOV, // Select between two values using the result of comparison.
+
   CALL,                   // A call instruction.
   EH_SJLJ_LONGJMP,        // SjLj exception handling longjmp.
   EH_SJLJ_SETJMP,         // SjLj exception handling setjmp.
@@ -200,6 +202,7 @@ class VETargetLowering : public TargetLowering {
   /// Custom DAGCombine {
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+  SDValue combineSelect(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
   /// } Custom DAGCombine
 

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index d8eb65185a702..83c4fb1c43271 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -446,6 +446,11 @@ def retflag       : SDNode<"VEISD::RET_FLAG", SDTNone,
 
 def getGOT        : Operand<iPTR>;
 
+// res = cmov cmp, t, f, cond
+def SDT_Cmov      : SDTypeProfile<1, 4, [SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>,
+                                  SDTCisVT<4, i32>]>;
+def cmov          : SDNode<"VEISD::CMOV", SDT_Cmov>;
+
 def VEeh_sjlj_setjmp: SDNode<"VEISD::EH_SJLJ_SETJMP",
                              SDTypeProfile<1, 1, [SDTCisInt<0>,
                                                   SDTCisPtrTy<1>]>,
@@ -715,21 +720,31 @@ multiclass RRSWPm<string opcStr, bits<8>opc,
 //   e.g. CMOVL, CMOVW, CMOVD, and etc.
 let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0,
     cfw = ? in
-multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC> {
+multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+                   SDPatternOperator OpNode = null_frag,
+                   Operand immOp = simm7> {
   def rr : RR<opc, (outs I64:$sx), (ins CCOp:$cfw, RC:$sy, I64:$sz, I64:$sd),
-              !strconcat(opcStr, " $sx, $sz, $sy")>;
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set i64:$sx, (OpNode Ty:$sy, i64:$sz, i64:$sd,
+                                     (i32 CCOp:$cfw)))]>;
   let cy = 0 in
   def ir : RR<opc, (outs I64:$sx),
-              (ins CCOp:$cfw, simm7:$sy, I64:$sz, I64:$sd),
-              !strconcat(opcStr, " $sx, $sz, $sy")>;
+              (ins CCOp:$cfw, immOp:$sy, I64:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set i64:$sx, (OpNode (Ty immOp:$sy), i64:$sz, i64:$sd,
+                                     (i32 CCOp:$cfw)))]>;
   let cz = 0 in
   def rm : RR<opc, (outs I64:$sx),
               (ins CCOp:$cfw, RC:$sy, mimm:$sz, I64:$sd),
-              !strconcat(opcStr, " $sx, $sz, $sy")>;
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set i64:$sx, (OpNode Ty:$sy, (i64 mimm:$sz), i64:$sd,
+                                     (i32 CCOp:$cfw)))]>;
   let cy = 0, cz = 0 in
   def im : RR<opc, (outs I64:$sx),
-              (ins CCOp:$cfw, simm7:$sy, mimm:$sz, I64:$sd),
-              !strconcat(opcStr, " $sx, $sz, $sy")>;
+              (ins CCOp:$cfw, immOp:$sy, mimm:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set i64:$sx, (OpNode (Ty immOp:$sy), (i64 mimm:$sz), i64:$sd,
+                                     (i32 CCOp:$cfw)))]>;
 }
 
 // Multiclass for floating point conversion instructions.
@@ -1307,10 +1322,16 @@ def : Pat<(i32 (bswap (i32 mimm:$src))),
           (EXTRACT_SUBREG (BSWPmi (MIMM $src), 1), sub_i32)>;
 
 // Section 8.5.11 - CMOV (Conditional Move)
-let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64>;
-let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32>;
-let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64>;
-let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32>;
+let isReMaterializable = 1 in {
+let cw = 0, cw2 = 0 in
+defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64, cmov>;
+let cw = 1, cw2 = 0 in
+defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32, cmov>;
+let cw = 0, cw2 = 1 in
+defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64, cmov, simm7fp>;
+let cw = 1, cw2 = 1 in
+defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32, cmov, simm7fp>;
+}
 def : MnemonicAlias<"cmov.l", "cmov.l.at">;
 def : MnemonicAlias<"cmov.w", "cmov.w.at">;
 def : MnemonicAlias<"cmov.d", "cmov.d.at">;
@@ -2182,41 +2203,80 @@ def : Pat<(f128 (selectcc f64:$l, f64:$r, f128:$t, f128:$f, cond:$cond)),
 def : Pat<(f128 (selectcc f128:$l, f128:$r, f128:$t, f128:$f, cond:$cond)),
           (cmov128rr<CMOVDrr> (fcond2cc $cond), (FCMPQrr $l, $r), $t, $f)>;
 
-// Generic SELECT pattern matches
-// Use cmov.w for all cases since %pred holds i32.
-//
-//   CMOV.w.ne %res, %tval, %tmp  ; set tval if %tmp is true
-
-def : Pat<(i32 (select i32:$pred, i32:$t, i32:$f)),
-          (cmov32rr<CMOVWrr, sub_i32> CC_INE, $pred, $t, $f)>;
-def : Pat<(i32 (select i32:$pred, (i32 mimm:$t), i32:$f)),
-          (cmov32rm<CMOVWrm, sub_i32> CC_INE, $pred, $t, $f)>;
-def : Pat<(i32 (select i32:$pred, i32:$t, (i32 mimm:$f))),
-          (cmov32rm<CMOVWrm, sub_i32> CC_IEQ, $pred, $f, $t)>;
-
-def : Pat<(i64 (select i32:$pred, i64:$t, i64:$f)),
-          (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
-def : Pat<(i64 (select i32:$pred, (i64 mimm:$t), i64:$f)),
-          (cmovrm<CMOVWrm, MIMM> CC_INE, $pred, $t, $f)>;
-def : Pat<(i64 (select i32:$pred, i64:$t, (i64 mimm:$f))),
-          (cmovrm<CMOVWrm, MIMM> CC_IEQ, $pred, $f, $t)>;
-
-def : Pat<(f32 (select i32:$pred, f32:$t, f32:$f)),
-          (cmov32rr<CMOVWrr, sub_f32> CC_INE, $pred, $t, $f)>;
-def : Pat<(f32 (select i32:$pred, (f32 mimmfp:$t), f32:$f)),
-          (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_INE, $pred, $t, $f)>;
-def : Pat<(f32 (select i32:$pred, f32:$t, (f32 mimmfp:$f))),
-          (cmov32rm<CMOVWrm, sub_f32, MIMMFP> CC_IEQ, $pred, $f, $t)>;
-
-def : Pat<(f64 (select i32:$pred, f64:$t, f64:$f)),
-          (cmovrr<CMOVWrr> CC_INE, $pred, $t, $f)>;
-def : Pat<(f64 (select i32:$pred, (f64 mimmfp:$t), f64:$f)),
-          (cmovrm<CMOVWrm, MIMMFP> CC_INE, $pred, $t, $f)>;
-def : Pat<(f64 (select i32:$pred, f64:$t, (f64 mimmfp:$f))),
-          (cmovrm<CMOVWrm, MIMMFP> CC_IEQ, $pred, $f, $t)>;
-
-def : Pat<(f128 (select i32:$pred, f128:$t, f128:$f)),
-          (cmov128rr<CMOVWrr> CC_INE, $pred, $t, $f)>;
+// Generic CMOV pattern matches
+//   CMOV accepts i64 $t, $f, and result.  So, we extend it to support
+//   i32/f32/f64/f128 $t, $f, and result.
+
+// CMOV for i32
+multiclass CMOVI32m<ValueType TY, string Insn> {
+  def : Pat<(i32 (cmov TY:$cmp, i32:$t, i32:$f, (i32 CCOp:$cond))),
+            (EXTRACT_SUBREG
+                (!cast<Instruction>(Insn#"rr") (CCOP $cond), $cmp,
+                           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_i32),
+                           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+                sub_i32)>;
+  def : Pat<(i32 (cmov TY:$cmp, (i32 mimm:$t), i32:$f, (i32 CCOp:$cond))),
+            (EXTRACT_SUBREG
+                (!cast<Instruction>(Insn#"rm") (CCOP $cond), $cmp,
+                           (MIMM $t),
+                           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_i32)),
+                sub_i32)>;
+}
+defm : CMOVI32m<i64, "CMOVL">;
+defm : CMOVI32m<i32, "CMOVW">;
+defm : CMOVI32m<f64, "CMOVD">;
+defm : CMOVI32m<f32, "CMOVS">;
+
+// CMOV for f32
+multiclass CMOVF32m<ValueType TY, string Insn> {
+  def : Pat<(f32 (cmov TY:$cmp, f32:$t, f32:$f, (i32 CCOp:$cond))),
+            (EXTRACT_SUBREG
+                (!cast<Instruction>(Insn#"rr")
+                    (CCOP $cond), $cmp,
+                    (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $t, sub_f32),
+                    (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
+                sub_f32)>;
+  def : Pat<(f32 (cmov TY:$cmp, (f32 mimmfp:$t), f32:$f, (i32 CCOp:$cond))),
+            (EXTRACT_SUBREG
+                (!cast<Instruction>(Insn#"rm")
+                    (CCOP $cond), $cmp, (MIMMFP $t),
+                    (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $f, sub_f32)),
+                sub_f32)>;
+}
+defm : CMOVF32m<i64, "CMOVL">;
+defm : CMOVF32m<i32, "CMOVW">;
+defm : CMOVF32m<f64, "CMOVD">;
+defm : CMOVF32m<f32, "CMOVS">;
+
+// CMOV for f64
+multiclass CMOVF64m<ValueType TY, string Insn> {
+  def : Pat<(f64 (cmov TY:$cmp, f64:$t, f64:$f, (i32 CCOp:$cond))),
+            (!cast<Instruction>(Insn#"rr") (CCOP $cond), $cmp, $t, $f)>;
+  def : Pat<(f64 (cmov TY:$cmp, (f64 mimmfp:$t), f64:$f, (i32 CCOp:$cond))),
+            (!cast<Instruction>(Insn#"rm") (CCOP $cond), $cmp, (MIMMFP $t),
+                                           $f)>;
+}
+defm : CMOVF64m<i64, "CMOVL">;
+defm : CMOVF64m<i32, "CMOVW">;
+defm : CMOVF64m<f64, "CMOVD">;
+defm : CMOVF64m<f32, "CMOVS">;
+
+// CMOV for f128
+multiclass CMOVF128m<ValueType TY, string Insn> {
+  def : Pat<(f128 (cmov TY:$cmp, f128:$t, f128:$f, (i32 CCOp:$cond))),
+            (INSERT_SUBREG
+              (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
+                (!cast<Instruction>(Insn#"rr") (CCOP $cond), $cmp,
+                  (EXTRACT_SUBREG $t, sub_odd),
+                  (EXTRACT_SUBREG $f, sub_odd)), sub_odd),
+              (!cast<Instruction>(Insn#"rr") (CCOP $cond), $cmp,
+                (EXTRACT_SUBREG $t, sub_even),
+                (EXTRACT_SUBREG $f, sub_even)), sub_even)>;
+}
+defm : CMOVF128m<i64, "CMOVL">;
+defm : CMOVF128m<i32, "CMOVW">;
+defm : CMOVF128m<f64, "CMOVD">;
+defm : CMOVF128m<f32, "CMOVS">;
 
 // bitconvert
 def : Pat<(f64 (bitconvert i64:$src)), (COPY_TO_REGCLASS $src, I64)>;

diff  --git a/llvm/test/CodeGen/VE/Scalar/select.ll b/llvm/test/CodeGen/VE/Scalar/select.ll
index eeb3f036b7dc4..184513a3f820b 100644
--- a/llvm/test/CodeGen/VE/Scalar/select.ll
+++ b/llvm/test/CodeGen/VE/Scalar/select.ll
@@ -119,7 +119,6 @@ define zeroext i32 @select_u32_var(i1 zeroext %0, i32 zeroext %1, i32 zeroext %2
 define i64 @select_i64_var(i1 zeroext %0, i64 %1, i64 %2) {
 ; CHECK-LABEL: select_i64_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -131,7 +130,6 @@ define i64 @select_i64_var(i1 zeroext %0, i64 %1, i64 %2) {
 define i64 @select_u64_var(i1 zeroext %0, i64 %1, i64 %2) {
 ; CHECK-LABEL: select_u64_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -143,7 +141,6 @@ define i64 @select_u64_var(i1 zeroext %0, i64 %1, i64 %2) {
 define i128 @select_i128_var(i1 zeroext %0, i128 %1, i128 %2) {
 ; CHECK-LABEL: select_i128_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s3, %s1, %s0
 ; CHECK-NEXT:    cmov.w.ne %s4, %s2, %s0
 ; CHECK-NEXT:    or %s0, 0, %s3
@@ -157,7 +154,6 @@ define i128 @select_i128_var(i1 zeroext %0, i128 %1, i128 %2) {
 define i128 @select_u128_var(i1 zeroext %0, i128 %1, i128 %2) {
 ; CHECK-LABEL: select_u128_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s3, %s1, %s0
 ; CHECK-NEXT:    cmov.w.ne %s4, %s2, %s0
 ; CHECK-NEXT:    or %s0, 0, %s3
@@ -171,7 +167,6 @@ define i128 @select_u128_var(i1 zeroext %0, i128 %1, i128 %2) {
 define float @select_float_var(i1 zeroext %0, float %1, float %2) {
 ; CHECK-LABEL: select_float_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -183,7 +178,6 @@ define float @select_float_var(i1 zeroext %0, float %1, float %2) {
 define double @select_double_var(i1 zeroext %0, double %1, double %2) {
 ; CHECK-LABEL: select_double_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -195,7 +189,6 @@ define double @select_double_var(i1 zeroext %0, double %1, double %2) {
 define fp128 @select_quad_var(i1 zeroext %0, fp128 %1, fp128 %2) {
 ; CHECK-LABEL: select_quad_var:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s4, %s2, %s0
 ; CHECK-NEXT:    cmov.w.ne %s5, %s3, %s0
 ; CHECK-NEXT:    or %s0, 0, %s4
@@ -285,7 +278,6 @@ define zeroext i32 @select_u32_mimm(i1 zeroext %0, i32 zeroext %1) {
 define i64 @select_i64_mimm(i1 zeroext %0, i64 %1) {
 ; CHECK-LABEL: select_i64_mimm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s1, (48)0, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -297,7 +289,6 @@ define i64 @select_i64_mimm(i1 zeroext %0, i64 %1) {
 define i64 @select_u64_mimm(i1 zeroext %0, i64 %1) {
 ; CHECK-LABEL: select_u64_mimm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s1, (48)0, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -309,7 +300,6 @@ define i64 @select_u64_mimm(i1 zeroext %0, i64 %1) {
 define i128 @select_i128_mimm(i1 zeroext %0, i128 %1) {
 ; CHECK-LABEL: select_i128_mimm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s1, (48)0, %s0
 ; CHECK-NEXT:    cmov.w.ne %s2, (0)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
@@ -323,7 +313,6 @@ define i128 @select_i128_mimm(i1 zeroext %0, i128 %1) {
 define i128 @select_u128_mimm(i1 zeroext %0, i128 %1) {
 ; CHECK-LABEL: select_u128_mimm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s1, (48)0, %s0
 ; CHECK-NEXT:    cmov.w.ne %s2, (0)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
@@ -337,7 +326,6 @@ define i128 @select_u128_mimm(i1 zeroext %0, i128 %1) {
 define float @select_float_mimm(i1 zeroext %0, float %1) {
 ; CHECK-LABEL: select_float_mimm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s1, (2)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -349,7 +337,6 @@ define float @select_float_mimm(i1 zeroext %0, float %1) {
 define double @select_double_mimm(i1 zeroext %0, double %1) {
 ; CHECK-LABEL: select_double_mimm:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s1, (2)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -366,7 +353,6 @@ define fp128 @select_quad_mimm(i1 zeroext %0, fp128 %1) {
 ; CHECK-NEXT:    lea.sl %s1, .LCPI{{[0-9]+}}_0 at hi(, %s1)
 ; CHECK-NEXT:    ld %s4, 8(, %s1)
 ; CHECK-NEXT:    ld %s5, (, %s1)
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s4, %s0
 ; CHECK-NEXT:    cmov.w.ne %s3, %s5, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
@@ -458,7 +444,6 @@ define zeroext i32 @select_mimm_u32(i1 zeroext %0, i32 zeroext %1) {
 define i64 @select_mimm_i64(i1 zeroext %0, i64 %1) {
 ; CHECK-LABEL: select_mimm_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.eq %s1, (48)0, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -470,7 +455,6 @@ define i64 @select_mimm_i64(i1 zeroext %0, i64 %1) {
 define i64 @select_mimm_u64(i1 zeroext %0, i64 %1) {
 ; CHECK-LABEL: select_mimm_u64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.eq %s1, (48)0, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -482,7 +466,6 @@ define i64 @select_mimm_u64(i1 zeroext %0, i64 %1) {
 define i128 @select_mimm_i128(i1 zeroext %0, i128 %1) {
 ; CHECK-LABEL: select_mimm_i128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.eq %s1, (48)0, %s0
 ; CHECK-NEXT:    cmov.w.eq %s2, (0)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
@@ -496,7 +479,6 @@ define i128 @select_mimm_i128(i1 zeroext %0, i128 %1) {
 define i128 @select_mimm_u128(i1 zeroext %0, i128 %1) {
 ; CHECK-LABEL: select_mimm_u128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.eq %s1, (48)0, %s0
 ; CHECK-NEXT:    cmov.w.eq %s2, (0)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
@@ -510,7 +492,6 @@ define i128 @select_mimm_u128(i1 zeroext %0, i128 %1) {
 define float @select_mimm_float(i1 zeroext %0, float %1) {
 ; CHECK-LABEL: select_mimm_float:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.eq %s1, (2)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -522,7 +503,6 @@ define float @select_mimm_float(i1 zeroext %0, float %1) {
 define double @select_mimm_double(i1 zeroext %0, double %1) {
 ; CHECK-LABEL: select_mimm_double:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.eq %s1, (2)1, %s0
 ; CHECK-NEXT:    or %s0, 0, %s1
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -539,7 +519,6 @@ define fp128 @select_mimm_quad(i1 zeroext %0, fp128 %1) {
 ; CHECK-NEXT:    lea.sl %s1, .LCPI{{[0-9]+}}_0 at hi(, %s1)
 ; CHECK-NEXT:    ld %s4, 8(, %s1)
 ; CHECK-NEXT:    ld %s5, (, %s1)
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s4, %s2, %s0
 ; CHECK-NEXT:    cmov.w.ne %s5, %s3, %s0
 ; CHECK-NEXT:    or %s0, 0, %s4

diff  --git a/llvm/test/CodeGen/VE/Scalar/select_cc.ll b/llvm/test/CodeGen/VE/Scalar/select_cc.ll
index 86c17bc798d48..d8ea7f1fe7153 100644
--- a/llvm/test/CodeGen/VE/Scalar/select_cc.ll
+++ b/llvm/test/CodeGen/VE/Scalar/select_cc.ll
@@ -1326,7 +1326,6 @@ define i64 @select_cc_i1_i64(i1 zeroext %0, i1 zeroext %1, i64 %2, i64 %3) {
 ; CHECK-LABEL: select_cc_i1_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s3, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -1515,7 +1514,6 @@ define i64 @select_cc_i1_u64(i1 zeroext %0, i1 zeroext %1, i64 %2, i64 %3) {
 ; CHECK-LABEL: select_cc_i1_u64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s3, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -1704,7 +1702,6 @@ define i128 @select_cc_i1_i128(i1 zeroext %0, i1 zeroext %1, i128 %2, i128 %3) {
 ; CHECK-LABEL: select_cc_i1_i128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s4, %s0
 ; CHECK-NEXT:    cmov.w.ne %s3, %s5, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
@@ -1921,7 +1918,6 @@ define i128 @select_cc_i1_u128(i1 zeroext %0, i1 zeroext %1, i128 %2, i128 %3) {
 ; CHECK-LABEL: select_cc_i1_u128:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s4, %s0
 ; CHECK-NEXT:    cmov.w.ne %s3, %s5, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
@@ -2138,7 +2134,6 @@ define float @select_cc_i1_float(i1 zeroext %0, i1 zeroext %1, float %2, float %
 ; CHECK-LABEL: select_cc_i1_float:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s3, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -2327,7 +2322,6 @@ define double @select_cc_i1_double(i1 zeroext %0, i1 zeroext %1, double %2, doub
 ; CHECK-LABEL: select_cc_i1_double:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s3, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2
 ; CHECK-NEXT:    b.l.t (, %s10)
@@ -2516,7 +2510,6 @@ define fp128 @select_cc_i1_quad(i1 zeroext %0, i1 zeroext %1, fp128 %2, fp128 %3
 ; CHECK-LABEL: select_cc_i1_quad:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xor %s0, %s0, %s1
-; CHECK-NEXT:    and %s0, %s0, (32)0
 ; CHECK-NEXT:    cmov.w.ne %s2, %s4, %s0
 ; CHECK-NEXT:    cmov.w.ne %s3, %s5, %s0
 ; CHECK-NEXT:    or %s0, 0, %s2