[llvm] b44c50d - [NVPTX] Rework and cleanup FTZ ISel (#146410)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 9 11:16:53 PDT 2025


Author: Alex MacLean
Date: 2025-07-09T11:16:48-07:00
New Revision: b44c50d41626b7b81da7cdfb2292a0b58fcc838f

URL: https://github.com/llvm/llvm-project/commit/b44c50d41626b7b81da7cdfb2292a0b58fcc838f
DIFF: https://github.com/llvm/llvm-project/commit/b44c50d41626b7b81da7cdfb2292a0b58fcc838f.diff

LOG: [NVPTX] Rework and cleanup FTZ ISel (#146410)

This change cleans up DAG-to-DAG instruction selection around FTZ and
SETP comparison mode. Largely these changes do not impact functionality
though support for `{sin.cos}.approx.ftz.f32` is added.

Added: 
    

Modified: 
    llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
    llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
    llvm/lib/Target/NVPTX/NVPTX.h
    llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
    llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
    llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
    llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
    llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
    llvm/test/CodeGen/NVPTX/atomics-sm70.ll
    llvm/test/CodeGen/NVPTX/atomics-sm90.ll
    llvm/test/CodeGen/NVPTX/atomics.ll
    llvm/test/CodeGen/NVPTX/bf16-instructions.ll
    llvm/test/CodeGen/NVPTX/branch-fold.mir
    llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
    llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
    llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
    llvm/test/CodeGen/NVPTX/cmpxchg.ll
    llvm/test/CodeGen/NVPTX/compare-int.ll
    llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
    llvm/test/CodeGen/NVPTX/extractelement.ll
    llvm/test/CodeGen/NVPTX/f16-instructions.ll
    llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
    llvm/test/CodeGen/NVPTX/fast-math.ll
    llvm/test/CodeGen/NVPTX/i1-select.ll
    llvm/test/CodeGen/NVPTX/i128.ll
    llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
    llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
    llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
    llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
    llvm/test/CodeGen/NVPTX/jump-table.ll
    llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
    llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
    llvm/test/CodeGen/NVPTX/math-intrins.ll
    llvm/test/CodeGen/NVPTX/sext-setcc.ll
    llvm/test/CodeGen/NVPTX/tid-range.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 28f6968ee6caf..443db4391a523 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -154,73 +154,114 @@ void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
   llvm_unreachable("Invalid conversion modifier");
 }
 
+void NVPTXInstPrinter::printFTZFlag(const MCInst *MI, int OpNum,
+                                    raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(OpNum);
+  const int Imm = MO.getImm();
+  if (Imm)
+    O << ".ftz";
+}
+
 void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
                                     StringRef Modifier) {
   const MCOperand &MO = MI->getOperand(OpNum);
   int64_t Imm = MO.getImm();
 
-  if (Modifier == "ftz") {
-    // FTZ flag
-    if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG)
-      O << ".ftz";
-    return;
-  } else if (Modifier == "base") {
-    switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) {
+  if (Modifier == "FCmp") {
+    switch (Imm) {
     default:
       return;
     case NVPTX::PTXCmpMode::EQ:
-      O << ".eq";
+      O << "eq";
       return;
     case NVPTX::PTXCmpMode::NE:
-      O << ".ne";
+      O << "ne";
       return;
     case NVPTX::PTXCmpMode::LT:
-      O << ".lt";
+      O << "lt";
       return;
     case NVPTX::PTXCmpMode::LE:
-      O << ".le";
+      O << "le";
       return;
     case NVPTX::PTXCmpMode::GT:
-      O << ".gt";
+      O << "gt";
       return;
     case NVPTX::PTXCmpMode::GE:
-      O << ".ge";
-      return;
-    case NVPTX::PTXCmpMode::LO:
-      O << ".lo";
-      return;
-    case NVPTX::PTXCmpMode::LS:
-      O << ".ls";
-      return;
-    case NVPTX::PTXCmpMode::HI:
-      O << ".hi";
-      return;
-    case NVPTX::PTXCmpMode::HS:
-      O << ".hs";
+      O << "ge";
       return;
     case NVPTX::PTXCmpMode::EQU:
-      O << ".equ";
+      O << "equ";
       return;
     case NVPTX::PTXCmpMode::NEU:
-      O << ".neu";
+      O << "neu";
       return;
     case NVPTX::PTXCmpMode::LTU:
-      O << ".ltu";
+      O << "ltu";
       return;
     case NVPTX::PTXCmpMode::LEU:
-      O << ".leu";
+      O << "leu";
       return;
     case NVPTX::PTXCmpMode::GTU:
-      O << ".gtu";
+      O << "gtu";
       return;
     case NVPTX::PTXCmpMode::GEU:
-      O << ".geu";
+      O << "geu";
       return;
     case NVPTX::PTXCmpMode::NUM:
-      O << ".num";
+      O << "num";
       return;
     case NVPTX::PTXCmpMode::NotANumber:
-      O << ".nan";
+      O << "nan";
+      return;
+    }
+  }
+  if (Modifier == "ICmp") {
+    switch (Imm) {
+    default:
+      llvm_unreachable("Invalid ICmp mode");
+    case NVPTX::PTXCmpMode::EQ:
+      O << "eq";
+      return;
+    case NVPTX::PTXCmpMode::NE:
+      O << "ne";
+      return;
+    case NVPTX::PTXCmpMode::LT:
+    case NVPTX::PTXCmpMode::LTU:
+      O << "lt";
+      return;
+    case NVPTX::PTXCmpMode::LE:
+    case NVPTX::PTXCmpMode::LEU:
+      O << "le";
+      return;
+    case NVPTX::PTXCmpMode::GT:
+    case NVPTX::PTXCmpMode::GTU:
+      O << "gt";
+      return;
+    case NVPTX::PTXCmpMode::GE:
+    case NVPTX::PTXCmpMode::GEU:
+      O << "ge";
+      return;
+    }
+  }
+  if (Modifier == "IType") {
+    switch (Imm) {
+    default:
+      llvm_unreachable("Invalid IType");
+    case NVPTX::PTXCmpMode::EQ:
+    case NVPTX::PTXCmpMode::NE:
+      O << "b";
+      return;
+    case NVPTX::PTXCmpMode::LT:
+    case NVPTX::PTXCmpMode::LE:
+    case NVPTX::PTXCmpMode::GT:
+    case NVPTX::PTXCmpMode::GE:
+      O << "s";
+      return;
+    case NVPTX::PTXCmpMode::LTU:
+    case NVPTX::PTXCmpMode::LEU:
+    case NVPTX::PTXCmpMode::GTU:
+    case NVPTX::PTXCmpMode::GEU:
+      O << "u";
       return;
     }
   }

diff  --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index 6189284e8a58c..193c436939f66 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -54,6 +54,7 @@ class NVPTXInstPrinter : public MCInstPrinter {
   void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O);
   void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O,
                         StringRef Modifier = {});
+  void printFTZFlag(const MCInst *MI, int OpNum, raw_ostream &O);
 };
 
 }

diff  --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b7fd7090299a9..15997bc3878d8 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -14,12 +14,12 @@
 #ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
 #define LLVM_LIB_TARGET_NVPTX_NVPTX_H
 
+#include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Target/TargetMachine.h"
-
 namespace llvm {
 class FunctionPass;
 class MachineFunctionPass;
@@ -224,10 +224,6 @@ enum CmpMode {
   LE,
   GT,
   GE,
-  LO,
-  LS,
-  HI,
-  HS,
   EQU,
   NEU,
   LTU,
@@ -237,9 +233,6 @@ enum CmpMode {
   NUM,
   // NAN is a MACRO
   NotANumber,
-
-  BASE_MASK = 0xFF,
-  FTZ_FLAG = 0x100
 };
 }
 

diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5631342ecc13e..429d52fb6f230 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -363,23 +363,29 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
 
 // Map ISD:CONDCODE value to appropriate CmpMode expected by
 // NVPTXInstPrinter::printCmpMode()
-static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
+SDValue NVPTXDAGToDAGISel::getPTXCmpMode(const CondCodeSDNode &CondCode) {
   using NVPTX::PTXCmpMode::CmpMode;
-  unsigned PTXCmpMode = [](ISD::CondCode CC) {
+  const unsigned PTXCmpMode = [](ISD::CondCode CC) {
     switch (CC) {
     default:
       llvm_unreachable("Unexpected condition code.");
     case ISD::SETOEQ:
+    case ISD::SETEQ:
       return CmpMode::EQ;
     case ISD::SETOGT:
+    case ISD::SETGT:
       return CmpMode::GT;
     case ISD::SETOGE:
+    case ISD::SETGE:
       return CmpMode::GE;
     case ISD::SETOLT:
+    case ISD::SETLT:
       return CmpMode::LT;
     case ISD::SETOLE:
+    case ISD::SETLE:
       return CmpMode::LE;
     case ISD::SETONE:
+    case ISD::SETNE:
       return CmpMode::NE;
     case ISD::SETO:
       return CmpMode::NUM;
@@ -397,45 +403,29 @@ static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
       return CmpMode::LEU;
     case ISD::SETUNE:
       return CmpMode::NEU;
-    case ISD::SETEQ:
-      return CmpMode::EQ;
-    case ISD::SETGT:
-      return CmpMode::GT;
-    case ISD::SETGE:
-      return CmpMode::GE;
-    case ISD::SETLT:
-      return CmpMode::LT;
-    case ISD::SETLE:
-      return CmpMode::LE;
-    case ISD::SETNE:
-      return CmpMode::NE;
     }
   }(CondCode.get());
-
-  if (FTZ)
-    PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
-
-  return PTXCmpMode;
+  return CurDAG->getTargetConstant(PTXCmpMode, SDLoc(), MVT::i32);
 }
 
 bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
-  unsigned PTXCmpMode =
-      getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+  SDValue PTXCmpMode = getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)));
   SDLoc DL(N);
   SDNode *SetP = CurDAG->getMachineNode(
-      NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
-      N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+      NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1,
+      {N->getOperand(0), N->getOperand(1), PTXCmpMode,
+       CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});
   ReplaceNode(N, SetP);
   return true;
 }
 
 bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
-  unsigned PTXCmpMode =
-      getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+  SDValue PTXCmpMode = getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)));
   SDLoc DL(N);
   SDNode *SetP = CurDAG->getMachineNode(
-      NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
-      N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+      NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1,
+      {N->getOperand(0), N->getOperand(1), PTXCmpMode,
+       CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});
   ReplaceNode(N, SetP);
   return true;
 }
@@ -1953,7 +1943,7 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
     llvm_unreachable("Unexpected opcode");
   };
 
-  int Opcode = IsVec ? NVPTX::BFMA16x2rrr : NVPTX::BFMA16rrr;
+  int Opcode = IsVec ? NVPTX::FMA_BF16x2rrr : NVPTX::FMA_BF16rrr;
   MachineSDNode *FMA = CurDAG->getMachineNode(Opcode, DL, VT, Operands);
   ReplaceNode(N, FMA);
   return true;

diff  --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 0e4dec1adca67..b314c4ccefe8b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -104,12 +104,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
   }
 
   bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
+  SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
   SDValue selectPossiblyImm(SDValue V);
 
   bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
 
-  static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N);
-
   // Returns the Memory Order and Scope that the PTX memory instruction should
   // use, and inserts appropriate fence instruction before the memory
   // instruction, if needed to implement the instructions memory order. Required

diff  --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 441ddeeb7d667..dcdebb81e3c86 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -68,48 +68,28 @@ def CvtMode : Operand<i32> {
   let PrintMethod = "printCvtMode";
 }
 
+// FTZ flag
+
+def FTZ : PatLeaf<(i1 1)>;
+def NoFTZ : PatLeaf<(i1 0)>;
+
+def getFTZFlag : SDNodeXForm<imm, [{
+  (void)N;
+  return CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, SDLoc(), MVT::i1);
+}]>;
+
+def FTZFlag : OperandWithDefaultOps<i1, (ops (getFTZFlag (i1 0)))> {
+  let PrintMethod = "printFTZFlag";
+}
+
 // Compare modes
 // These must match the enum in NVPTX.h
-def CmpEQ   : PatLeaf<(i32 0)>;
-def CmpNE   : PatLeaf<(i32 1)>;
-def CmpLT   : PatLeaf<(i32 2)>;
-def CmpLE   : PatLeaf<(i32 3)>;
-def CmpGT   : PatLeaf<(i32 4)>;
-def CmpGE   : PatLeaf<(i32 5)>;
-def CmpLO   : PatLeaf<(i32 6)>;
-def CmpLS   : PatLeaf<(i32 7)>;
-def CmpHI   : PatLeaf<(i32 8)>;
-def CmpHS   : PatLeaf<(i32 9)>;
-def CmpEQU  : PatLeaf<(i32 10)>;
-def CmpNEU  : PatLeaf<(i32 11)>;
-def CmpLTU  : PatLeaf<(i32 12)>;
-def CmpLEU  : PatLeaf<(i32 13)>;
-def CmpGTU  : PatLeaf<(i32 14)>;
-def CmpGEU  : PatLeaf<(i32 15)>;
-def CmpNUM  : PatLeaf<(i32 16)>;
-def CmpNAN  : PatLeaf<(i32 17)>;
-
-def CmpEQ_FTZ   : PatLeaf<(i32 0x100)>;
-def CmpNE_FTZ   : PatLeaf<(i32 0x101)>;
-def CmpLT_FTZ   : PatLeaf<(i32 0x102)>;
-def CmpLE_FTZ   : PatLeaf<(i32 0x103)>;
-def CmpGT_FTZ   : PatLeaf<(i32 0x104)>;
-def CmpGE_FTZ   : PatLeaf<(i32 0x105)>;
-def CmpEQU_FTZ  : PatLeaf<(i32 0x10A)>;
-def CmpNEU_FTZ  : PatLeaf<(i32 0x10B)>;
-def CmpLTU_FTZ  : PatLeaf<(i32 0x10C)>;
-def CmpLEU_FTZ  : PatLeaf<(i32 0x10D)>;
-def CmpGTU_FTZ  : PatLeaf<(i32 0x10E)>;
-def CmpGEU_FTZ  : PatLeaf<(i32 0x10F)>;
-def CmpNUM_FTZ  : PatLeaf<(i32 0x110)>;
-def CmpNAN_FTZ  : PatLeaf<(i32 0x111)>;
+def CmpEQ : PatLeaf<(i32 0)>;
+def CmpNE : PatLeaf<(i32 1)>;
 
 def CmpMode : Operand<i32> {
   let PrintMethod = "printCmpMode";
 }
-def VecElement : Operand<i32> {
-  let PrintMethod = "printVecElement";
-}
 
 // PRMT modes
 // These must match the enum in NVPTX.h
@@ -152,8 +132,6 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
 def hasDotInstructions : Predicate<"Subtarget->hasDotInstructions()">;
 def hasTcgen05Instructions : Predicate<"Subtarget->hasTcgen05Instructions()">;
 
-def True : Predicate<"true">;
-
 class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
 class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
 
@@ -198,7 +176,7 @@ def RI64 : Operand<Any>;
 
 // Utility class to wrap up information about a register and DAG type for more
 // convenient iteration and parameterization
-class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
+class RegTyInfo<ValueType ty, NVPTXRegClass rc, string ptx_type, Operand imm, SDNode imm_node,
                 bit supports_imm = 1> {
   ValueType Ty = ty;
   NVPTXRegClass RC = rc;
@@ -206,20 +184,21 @@ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
   SDNode ImmNode = imm_node;
   bit SupportsImm = supports_imm;
   int Size = ty.Size;
+  string PtxType = ptx_type;
 }
 
-def I1RT     : RegTyInfo<i1,  B1,  i1imm,  imm>;
-def I16RT    : RegTyInfo<i16, B16, i16imm, imm>;
-def I32RT    : RegTyInfo<i32, B32, i32imm, imm>;
-def I64RT    : RegTyInfo<i64, B64, i64imm, imm>;
+def I1RT     : RegTyInfo<i1,  B1,  "pred", i1imm,  imm>;
+def I16RT    : RegTyInfo<i16, B16, "b16",  i16imm, imm>;
+def I32RT    : RegTyInfo<i32, B32, "b32",  i32imm, imm>;
+def I64RT    : RegTyInfo<i64, B64, "b64",  i64imm, imm>;
 
-def F32RT    : RegTyInfo<f32, B32, f32imm, fpimm>;
-def F64RT    : RegTyInfo<f64, B64, f64imm, fpimm>;
-def F16RT    : RegTyInfo<f16, B16, f16imm, fpimm, supports_imm = 0>;
-def BF16RT   : RegTyInfo<bf16, B16, bf16imm, fpimm, supports_imm = 0>;
+def F32RT    : RegTyInfo<f32,  B32, "f32",  f32imm,  fpimm>;
+def F64RT    : RegTyInfo<f64,  B64, "f64",  f64imm,  fpimm>;
+def F16RT    : RegTyInfo<f16,  B16, "f16",  f16imm,  fpimm, supports_imm = 0>;
+def BF16RT   : RegTyInfo<bf16, B16, "bf16", bf16imm, fpimm, supports_imm = 0>;
 
-def F16X2RT  : RegTyInfo<v2f16, B32, ?, ?, supports_imm = 0>;
-def BF16X2RT : RegTyInfo<v2bf16, B32, ?, ?, supports_imm = 0>;
+def F16X2RT  : RegTyInfo<v2f16, B32, "f16x2", ?, ?, supports_imm = 0>;
+def BF16X2RT : RegTyInfo<v2bf16, B32, "bf16x2", ?, ?, supports_imm = 0>;
 
 
 // This class provides a basic wrapper around an NVPTXInst that abstracts the
@@ -321,76 +300,57 @@ multiclass ADD_SUB_INT_CARRY<string op_str, SDNode op_node, bit commutative> {
 // Also defines ftz (flush subnormal inputs and results to sign-preserving
 // zero) variants for fp32 functions.
 multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
+  defvar nan_str = !if(NaN, ".NaN", "");
   if !not(NaN) then {
-   def f64rr :
+   def _f64_rr :
      BasicNVPTXInst<(outs B64:$dst),
                (ins B64:$a, B64:$b),
                OpcStr # ".f64",
                [(set f64:$dst, (OpNode f64:$a, f64:$b))]>;
-   def f64ri :
+   def _f64_ri :
      BasicNVPTXInst<(outs B64:$dst),
                (ins B64:$a, f64imm:$b),
                OpcStr # ".f64",
                [(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>;
   }
-   def f32rr_ftz :
-     BasicNVPTXInst<(outs B32:$dst),
+   def _f32_rr :
+     BasicFlagsNVPTXInst<(outs B32:$dst),
                (ins B32:$a, B32:$b),
-               OpcStr # ".ftz.f32",
-               [(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
-               Requires<[doF32FTZ]>;
-   def f32ri_ftz :
-     BasicNVPTXInst<(outs B32:$dst),
-               (ins B32:$a, f32imm:$b),
-               OpcStr # ".ftz.f32",
-               [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
-               Requires<[doF32FTZ]>;
-   def f32rr :
-     BasicNVPTXInst<(outs B32:$dst),
-               (ins B32:$a, B32:$b),
-               OpcStr # ".f32",
+               (ins FTZFlag:$ftz),
+               OpcStr # "$ftz" # nan_str # ".f32",
                [(set f32:$dst, (OpNode f32:$a, f32:$b))]>;
-   def f32ri :
-     BasicNVPTXInst<(outs B32:$dst),
+   def _f32_ri :
+     BasicFlagsNVPTXInst<(outs B32:$dst),
                (ins B32:$a, f32imm:$b),
-               OpcStr # ".f32",
+               (ins FTZFlag:$ftz),
+               OpcStr # "$ftz" # nan_str # ".f32",
                [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>;
 
-   def f16rr_ftz :
-     BasicNVPTXInst<(outs B16:$dst),
-               (ins B16:$a, B16:$b),
-               OpcStr # ".ftz.f16",
-               [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
-               Requires<[useFP16Math, doF32FTZ]>;
-   def f16rr :
-     BasicNVPTXInst<(outs B16:$dst),
+   def _f16_rr :
+     BasicFlagsNVPTXInst<(outs B16:$dst),
                (ins B16:$a, B16:$b),
-               OpcStr # ".f16",
+               (ins FTZFlag:$ftz),
+               OpcStr # "$ftz" # nan_str # ".f16",
                [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
-               Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
+               Requires<[useFP16Math]>;
 
-   def f16x2rr_ftz :
-     BasicNVPTXInst<(outs B32:$dst),
-               (ins B32:$a, B32:$b),
-               OpcStr # ".ftz.f16x2",
-               [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
-               Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>;
-   def f16x2rr :
-     BasicNVPTXInst<(outs B32:$dst),
+   def _f16x2_rr :
+     BasicFlagsNVPTXInst<(outs B32:$dst),
                (ins B32:$a, B32:$b),
-               OpcStr # ".f16x2",
+               (ins FTZFlag:$ftz),
+               OpcStr # "$ftz" # nan_str # ".f16x2",
                [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
                Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
-   def bf16rr :
+   def _bf16_rr :
      BasicNVPTXInst<(outs B16:$dst),
                (ins B16:$a, B16:$b),
-               OpcStr # ".bf16",
+               OpcStr # nan_str # ".bf16",
                [(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
                Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
-   def bf16x2rr :
+   def _bf16x2_rr :
      BasicNVPTXInst<(outs B32:$dst),
                (ins B32:$a, B32:$b),
-               OpcStr # ".bf16x2",
+               OpcStr # nan_str # ".bf16x2",
                [(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
                Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
 }
@@ -415,52 +375,31 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
               (ins B64:$a, f64imm:$b),
               op_str # ".f64",
               [(set f64:$dst, (op_pat f64:$a, fpimm:$b))]>;
-  def f32rr_ftz :
-    BasicNVPTXInst<(outs B32:$dst),
-              (ins B32:$a, B32:$b),
-              op_str # ".ftz.f32",
-              [(set f32:$dst, (op_pat f32:$a, f32:$b))]>,
-              Requires<[doF32FTZ]>;
-  def f32ri_ftz :
-    BasicNVPTXInst<(outs B32:$dst),
-              (ins B32:$a, f32imm:$b),
-              op_str # ".ftz.f32",
-              [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>,
-              Requires<[doF32FTZ]>;
   def f32rr :
-    BasicNVPTXInst<(outs B32:$dst),
+    BasicFlagsNVPTXInst<(outs B32:$dst),
               (ins B32:$a, B32:$b),
-              op_str # ".f32",
+              (ins FTZFlag:$ftz),
+              op_str # "$ftz.f32",
               [(set f32:$dst, (op_pat f32:$a, f32:$b))]>;
   def f32ri :
-    BasicNVPTXInst<(outs B32:$dst),
+    BasicFlagsNVPTXInst<(outs B32:$dst),
               (ins B32:$a, f32imm:$b),
-              op_str # ".f32",
+              (ins FTZFlag:$ftz),
+              op_str # "$ftz.f32",
               [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
 
-  def f16rr_ftz :
-    BasicNVPTXInst<(outs B16:$dst),
-              (ins B16:$a, B16:$b),
-              op_str # ".ftz.f16",
-              [(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
-              Requires<[useFP16Math, doF32FTZ]>;
   def f16rr :
-    BasicNVPTXInst<(outs B16:$dst),
+    BasicFlagsNVPTXInst<(outs B16:$dst),
               (ins B16:$a, B16:$b),
-              op_str # ".f16",
+              (ins FTZFlag:$ftz),
+              op_str # "$ftz.f16",
               [(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
               Requires<[useFP16Math]>;
-
-  def f16x2rr_ftz :
-    BasicNVPTXInst<(outs B32:$dst),
-              (ins B32:$a, B32:$b),
-              op_str # ".ftz.f16x2",
-              [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
-              Requires<[useFP16Math, doF32FTZ]>;
   def f16x2rr :
-    BasicNVPTXInst<(outs B32:$dst),
+    BasicFlagsNVPTXInst<(outs B32:$dst),
               (ins B32:$a, B32:$b),
-              op_str # ".f16x2",
+              (ins FTZFlag:$ftz),
+              op_str # "$ftz.f16x2",
               [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
               Requires<[useFP16Math]>;
   def bf16rr :
@@ -493,15 +432,12 @@ multiclass F3_fma_component<string op_str, SDNode op_node> {
 // instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush
 // subnormal inputs and results to zero).
 multiclass F2<string OpcStr, SDNode OpNode> {
-   def f64 :     BasicNVPTXInst<(outs B64:$dst), (ins B64:$a),
+   def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a),
                            OpcStr # ".f64",
                            [(set f64:$dst, (OpNode f64:$a))]>;
-   def f32_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
-                           OpcStr # ".ftz.f32",
-                           [(set f32:$dst, (OpNode f32:$a))]>,
-                           Requires<[doF32FTZ]>;
-   def f32 :     BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
-                           OpcStr # ".f32",
+   def f32 : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a),
+                           (ins FTZFlag:$ftz),
+                           OpcStr # "$ftz.f32",
                            [(set f32:$dst, (OpNode f32:$a))]>;
 }
 
@@ -514,37 +450,19 @@ multiclass F2_Support_Half<string OpcStr, SDNode OpNode> {
                            OpcStr # ".bf16x2",
                            [(set v2bf16:$dst, (OpNode v2bf16:$a))]>,
                            Requires<[hasSM<80>, hasPTX<70>]>;
-   def f16_ftz :   BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
-                           OpcStr # ".ftz.f16",
-                           [(set f16:$dst, (OpNode f16:$a))]>,
-                           Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
-   def f16x2_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
-                           OpcStr # ".ftz.f16x2",
-                           [(set v2f16:$dst, (OpNode v2f16:$a))]>,
-                           Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
-   def f16 :       BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
-                           OpcStr # ".f16",
+   def f16 :       BasicFlagsNVPTXInst<(outs B16:$dst), (ins B16:$a),
+                           (ins FTZFlag:$ftz),
+                           OpcStr # "$ftz.f16",
                            [(set f16:$dst, (OpNode f16:$a))]>,
                            Requires<[hasSM<53>, hasPTX<65>]>;
-   def f16x2 :     BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
-                           OpcStr # ".f16x2",
+   def f16x2 :     BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a),
+                           (ins FTZFlag:$ftz),
+                           OpcStr # "$ftz.f16x2",
                            [(set v2f16:$dst, (OpNode v2f16:$a))]>,
                            Requires<[hasSM<53>, hasPTX<65>]>;
 
 }
 
-// Variant where only .ftz.bf16 is supported.
-multiclass F2_Support_Half_BF<string OpcStr, SDNode OpNode> {
-   def bf16_ftz :  BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
-                           OpcStr # ".ftz.bf16",
-                           [(set bf16:$dst, (OpNode bf16:$a))]>,
-                           Requires<[hasSM<90>, hasPTX<78>]>;
-   def bf16x2_ftz: BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
-                           OpcStr # ".ftz.bf16x2",
-                           [(set v2bf16:$dst, (OpNode v2bf16:$a))]>,
-                           Requires<[hasSM<90>, hasPTX<78>]>;
-}
-
 //===----------------------------------------------------------------------===//
 // NVPTX Instructions.
 //===----------------------------------------------------------------------===//
@@ -992,48 +910,38 @@ def SHL2MUL16 : SDNodeXForm<imm, [{
 }]>;
 
 // Convert "sign/zero-extend, then shift left by an immediate" to mul.wide.
-def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)),
-          (MULWIDES64Imm $a, (SHL2MUL32 $b))>,
-      Requires<[doMulWide]>;
-def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)),
-          (MULWIDEU64Imm $a, (SHL2MUL32 $b))>,
-      Requires<[doMulWide]>;
-
-def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)),
-          (MULWIDES32Imm $a, (SHL2MUL16 $b))>,
-      Requires<[doMulWide]>;
-def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)),
-          (MULWIDEU32Imm $a, (SHL2MUL16 $b))>,
-      Requires<[doMulWide]>;
-
-// Convert "sign/zero-extend then multiply" to mul.wide.
-def : Pat<(mul (sext i32:$a), (sext i32:$b)),
-          (MULWIDES64 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)),
-          (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>,
-      Requires<[doMulWide]>;
-
-def : Pat<(mul (zext i32:$a), (zext i32:$b)),
-          (MULWIDEU64 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)),
-          (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>,
-      Requires<[doMulWide]>;
-
-def : Pat<(mul (sext i16:$a), (sext i16:$b)),
-          (MULWIDES32 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)),
-          (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>,
-      Requires<[doMulWide]>;
-
-def : Pat<(mul (zext i16:$a), (zext i16:$b)),
-          (MULWIDEU32 $a, $b)>,
-      Requires<[doMulWide]>;
-def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)),
-          (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>,
-      Requires<[doMulWide]>;
+let Predicates = [doMulWide] in {
+  def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)),
+            (MULWIDES64Imm $a, (SHL2MUL32 $b))>;
+  def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)),
+            (MULWIDEU64Imm $a, (SHL2MUL32 $b))>;
+
+  def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)),
+            (MULWIDES32Imm $a, (SHL2MUL16 $b))>;
+  def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)),
+            (MULWIDEU32Imm $a, (SHL2MUL16 $b))>;
+
+  // Convert "sign/zero-extend then multiply" to mul.wide.
+  def : Pat<(mul (sext i32:$a), (sext i32:$b)),
+            (MULWIDES64 $a, $b)>;
+  def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)),
+            (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>;
+
+  def : Pat<(mul (zext i32:$a), (zext i32:$b)),
+            (MULWIDEU64 $a, $b)>;
+  def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)),
+            (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>;
+
+  def : Pat<(mul (sext i16:$a), (sext i16:$b)),
+            (MULWIDES32 $a, $b)>;
+  def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)),
+            (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>;
+
+  def : Pat<(mul (zext i16:$a), (zext i16:$b)),
+            (MULWIDEU32 $a, $b)>;
+  def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)),
+            (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>;
+}
 
 //
 // Integer multiply-add
@@ -1101,10 +1009,10 @@ defm FADD : F3_fma_component<"add", fadd>;
 defm FSUB : F3_fma_component<"sub", fsub>;
 defm FMUL : F3_fma_component<"mul", fmul>;
 
-defm FMIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>;
-defm FMAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>;
-defm FMINNAN : FMINIMUMMAXIMUM<"min.NaN", /* NaN */ true, fminimum>;
-defm FMAXNAN : FMINIMUMMAXIMUM<"max.NaN", /* NaN */ true, fmaximum>;
+defm MIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>;
+defm MAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>;
+defm MIN_NAN : FMINIMUMMAXIMUM<"min", /* NaN */ true, fminimum>;
+defm MAX_NAN : FMINIMUMMAXIMUM<"max", /* NaN */ true, fmaximum>;
 
 defm FABS  : F2<"abs", fabs>;
 defm FNEG  : F2<"neg", fneg>;
@@ -1113,36 +1021,43 @@ defm FNEG_H: F2_Support_Half<"neg", fneg>;
 
 defm FSQRT : F2<"sqrt.rn", fsqrt>;
 
-defm FEXP2_H: F2_Support_Half_BF<"ex2.approx", fexp2>;
-
 //
 // F16 NEG
 //
-class FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
-      BasicNVPTXInst<(outs RC:$dst), (ins RC:$src),
-                OpcStr,
-                [(set T:$dst, (fneg T:$src))]>,
-                Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>;
-def FNEG16_ftz   : FNEG_F16_F16X2<"neg.ftz.f16", f16, B16, doF32FTZ>;
-def FNEG16       : FNEG_F16_F16X2<"neg.f16", f16, B16, True>;
-def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, B32, doF32FTZ>;
-def FNEG16x2     : FNEG_F16_F16X2<"neg.f16x2", v2f16, B32, True>;
+class FNEG16<RegTyInfo t> :
+      BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), (ins FTZFlag:$ftz),
+                "neg$ftz." # t.PtxType,
+                [(set t.Ty:$dst, (fneg t.Ty:$src))]>;
+
+let Predicates = [useFP16Math, hasPTX<60>, hasSM<53>] in {
+  def NEG_F16    : FNEG16<F16RT>;
+  def NEG_F16x2  : FNEG16<F16X2RT>;
+}
+let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
+  def NEG_BF16   : FNEG16<BF16RT>;
+  def NEG_BF16x2 : FNEG16<BF16X2RT>;
+}
 
 //
-// BF16 NEG
+// EX2
 //
 
-class FNEG_BF16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
-      BasicNVPTXInst<(outs RC:$dst), (ins RC:$src),
-                OpcStr,
-                [(set T:$dst, (fneg T:$src))]>,
-                Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>;
-def BFNEG16_ftz   : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, B16, doF32FTZ>;
-def BFNEG16       : FNEG_BF16_F16X2<"neg.bf16", bf16, B16, True>;
-def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, B32, doF32FTZ>;
-def BFNEG16x2     : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, B32, True>;
+class FEXP2Inst<RegTyInfo t, dag flags, string flag_str> :
+      BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src),
+                flags, "ex2.approx" # flag_str # "." # t.PtxType,
+                [(set t.Ty:$dst, (fexp2 t.Ty:$src))]>;
+
+def EX2_APPROX_f32 : FEXP2Inst<F32RT, (ins FTZFlag:$ftz), "$ftz">;
+
+let Predicates = [useFP16Math, hasPTX<70>, hasSM<75>] in {
+  def EX2_APPROX_f16 : FEXP2Inst<F16RT, (ins), "">;
+  def EX2_APPROX_f16x2 : FEXP2Inst<F16X2RT, (ins), "">;
+}
+let Predicates = [hasPTX<78>, hasSM<90>] in {
+  def EX2_APPROX_bf16 : FEXP2Inst<BF16RT, (ins), ".ftz">;
+  def EX2_APPROX_bf16x2 : FEXP2Inst<BF16X2RT, (ins), ".ftz">;
+}
 
-//
 // F64 division
 //
 def FRCP64r :
@@ -1176,42 +1091,27 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
 }]>;
 
 
-def FRCP32_approx_r_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$b),
-                 "rcp.approx.ftz.f32",
-                 [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
-                 Requires<[doF32FTZ]>;
 def FRCP32_approx_r :
-  BasicNVPTXInst<(outs B32:$dst),
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$b),
-                 "rcp.approx.f32",
+                 (ins FTZFlag:$ftz),
+                 "rcp.approx$ftz.f32",
                  [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
 
 //
 // F32 Approximate division
 //
-def FDIV32approxrr_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
+def FDIV32_approx_rr :
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, B32:$b),
-                 "div.approx.ftz.f32",
-                 [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
-                 Requires<[doF32FTZ]>;
-def FDIV32approxri_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$a, f32imm:$b),
-                 "div.approx.ftz.f32",
-                 [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
-                 Requires<[doF32FTZ]>;
-def FDIV32approxrr :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$a, B32:$b),
-                 "div.approx.f32",
+                 (ins FTZFlag:$ftz),
+                 "div.approx$ftz.f32",
                  [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
-def FDIV32approxri :
-  BasicNVPTXInst<(outs B32:$dst),
+def FDIV32_approx_ri :
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, f32imm:$b),
-                 "div.approx.f32",
+                 (ins FTZFlag:$ftz),
+                 "div.approx$ftz.f32",
                  [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
 //
 // F32 Semi-accurate reciprocal
@@ -1225,37 +1125,23 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
 }]>;
 
 
-def : Pat<(fdiv_full f32imm_1, f32:$b),
-          (FRCP32_approx_r_ftz $b)>,
-      Requires<[doF32FTZ]>;
-
 def : Pat<(fdiv_full f32imm_1, f32:$b),
           (FRCP32_approx_r $b)>;
 
 //
 // F32 Semi-accurate division
 //
-def FDIV32rr_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$a, B32:$b),
-                 "div.full.ftz.f32",
-                 [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
-                 Requires<[doF32FTZ]>;
-def FDIV32ri_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$a, f32imm:$b),
-                 "div.full.ftz.f32",
-                 [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
-                 Requires<[doF32FTZ]>;
 def FDIV32rr :
-  BasicNVPTXInst<(outs B32:$dst),
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, B32:$b),
-                 "div.full.f32",
+                 (ins FTZFlag:$ftz),
+                 "div.full$ftz.f32",
                  [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
 def FDIV32ri :
-  BasicNVPTXInst<(outs B32:$dst),
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, f32imm:$b),
-                 "div.full.f32",
+                 (ins FTZFlag:$ftz),
+                 "div.full$ftz.f32",
                  [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
 //
 // F32 Accurate reciprocal
@@ -1266,86 +1152,73 @@ def fdiv_ftz : PatFrag<(ops node:$a, node:$b),
   return getDivF32Level(N) == NVPTX::DivPrecisionLevel::IEEE754;
 }]>;
 
-def FRCP32r_prec_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$b),
-                 "rcp.rn.ftz.f32",
-                 [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>,
-                 Requires<[doF32FTZ]>;
 def FRCP32r_prec :
-  BasicNVPTXInst<(outs B32:$dst),
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$b),
-                 "rcp.rn.f32",
-                 [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
+                 (ins FTZFlag:$ftz),
+                 "rcp.rn$ftz.f32",
+                 [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>;
 //
 // F32 Accurate division
 //
-def FDIV32rr_prec_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$a, B32:$b),
-                 "div.rn.ftz.f32",
-                 [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>,
-                 Requires<[doF32FTZ]>;
-def FDIV32ri_prec_ftz :
-  BasicNVPTXInst<(outs B32:$dst),
-                 (ins B32:$a, f32imm:$b),
-                 "div.rn.ftz.f32",
-                 [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>,
-                 Requires<[doF32FTZ]>;
 def FDIV32rr_prec :
-  BasicNVPTXInst<(outs B32:$dst),
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, B32:$b),
-                 "div.rn.f32",
-                 [(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
+                 (ins FTZFlag:$ftz),
+                 "div.rn$ftz.f32",
+                 [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>;
 def FDIV32ri_prec :
-  BasicNVPTXInst<(outs B32:$dst),
+  BasicFlagsNVPTXInst<(outs B32:$dst),
                  (ins B32:$a, f32imm:$b),
-                 "div.rn.f32",
-                 [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
+                 (ins FTZFlag:$ftz),
+                 "div.rn$ftz.f32",
+                 [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>;
+
+def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
+def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>;
+def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>;
 
 //
 // FMA
 //
 
-multiclass FMA<string asmstr, RegTyInfo t, list<Predicate> Preds = []> {
-  def rrr : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
-                      asmstr,
-                      [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, t.Ty:$c))]>,
-                      Requires<Preds>;
-
-  if t.SupportsImm then {
-    def rri : BasicNVPTXInst<(outs t.RC:$dst),
-                        (ins t.RC:$a, t.RC:$b, t.Imm:$c),
-                        asmstr,
-                        [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, fpimm:$c))]>,
-                        Requires<Preds>;
-    def rir : BasicNVPTXInst<(outs t.RC:$dst),
-                        (ins t.RC:$a, t.Imm:$b, t.RC:$c),
-                        asmstr,
-                        [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, t.Ty:$c))]>,
-                        Requires<Preds>;
-    def rii : BasicNVPTXInst<(outs t.RC:$dst),
-                        (ins t.RC:$a, t.Imm:$b, t.Imm:$c),
-                        asmstr,
-                        [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, fpimm:$c))]>,
-                        Requires<Preds>;
-    def iir : BasicNVPTXInst<(outs t.RC:$dst),
-                        (ins t.Imm:$a, t.Imm:$b, t.RC:$c),
-                        asmstr,
-                        [(set t.Ty:$dst, (fma fpimm:$a, fpimm:$b, t.Ty:$c))]>,
-                        Requires<Preds>;
+multiclass FMA<RegTyInfo t, bit allow_ftz = true, list<Predicate> preds = []> {
+  defvar flag_str = !if(allow_ftz, "$ftz", "");
+  defvar flag_ops = !if(allow_ftz, (ins FTZFlag:$ftz), (ins));
+  defvar op_str = "fma.rn" # flag_str # "." # t.PtxType;
+
+  let Predicates = preds in {
+    def rrr : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
+                        flag_ops, op_str,
+                        [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, t.Ty:$c))]>;
+
+    if t.SupportsImm then {
+      def rri : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+                          (ins t.RC:$a, t.RC:$b, t.Imm:$c),
+                          flag_ops, op_str,
+                          [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, fpimm:$c))]>;
+      def rir : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+                          (ins t.RC:$a, t.Imm:$b, t.RC:$c),
+                          flag_ops, op_str,
+                          [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, t.Ty:$c))]>;
+      def rii : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+                          (ins t.RC:$a, t.Imm:$b, t.Imm:$c),
+                          flag_ops, op_str,
+                          [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, fpimm:$c))]>;
+      def iir : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+                          (ins t.Imm:$a, t.Imm:$b, t.RC:$c),
+                          flag_ops, op_str,
+                          [(set t.Ty:$dst, (fma fpimm:$a, fpimm:$b, t.Ty:$c))]>;
+    }
   }
 }
 
-defm FMA16_ftz    : FMA<"fma.rn.ftz.f16", F16RT, [useFP16Math, doF32FTZ]>;
-defm FMA16        : FMA<"fma.rn.f16", F16RT, [useFP16Math]>;
-defm FMA16x2_ftz  : FMA<"fma.rn.ftz.f16x2", F16X2RT, [useFP16Math, doF32FTZ]>;
-defm FMA16x2      : FMA<"fma.rn.f16x2", F16X2RT, [useFP16Math]>;
-defm BFMA16       : FMA<"fma.rn.bf16", BF16RT, [hasBF16Math]>;
-defm BFMA16x2     : FMA<"fma.rn.bf16x2", BF16X2RT, [hasBF16Math]>;
-defm FMA32_ftz    : FMA<"fma.rn.ftz.f32", F32RT, [doF32FTZ]>;
-defm FMA32        : FMA<"fma.rn.f32", F32RT>;
-defm FMA64        : FMA<"fma.rn.f64", F64RT>;
+defm FMA_F16    : FMA<F16RT,    allow_ftz = true, preds = [useFP16Math]>;
+defm FMA_F16x2  : FMA<F16X2RT,  allow_ftz = true, preds = [useFP16Math]>;
+defm FMA_BF16   : FMA<BF16RT,   allow_ftz = false, preds = [hasBF16Math]>;
+defm FMA_BF16x2 : FMA<BF16X2RT, allow_ftz = false, preds = [hasBF16Math]>;
+defm FMA_F32    : FMA<F32RT,    allow_ftz = true>;
+defm FMA_F64    : FMA<F64RT,    allow_ftz = false>;
 
 // sin/cos
 
@@ -1355,11 +1228,13 @@ class UnaryOpAllowsApproxFn<SDPatternOperator operator>
   return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
 }]>;
 
-def SINF:  BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
-                      "sin.approx.f32",
+def SIN_APPROX_f32 :
+  BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
+                      "sin.approx$ftz.f32",
                       [(set f32:$dst, (UnaryOpAllowsApproxFn<fsin> f32:$src))]>;
-def COSF:  BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
-                      "cos.approx.f32",
+def COS_APPROX_f32 :
+  BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
+                      "cos.approx$ftz.f32",
                       [(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;
 
 //-----------------------------------
@@ -1636,53 +1511,80 @@ def : Pat<(i16 (sext_inreg (trunc (srl i64:$s,  (i32 imm:$o))), i8)),
 
 // FIXME: This doesn't cover versions of set and setp that combine with a
 // boolean predicate, e.g. setp.eq.and.b16.
+def cond2cc : SDNodeXForm<cond, [{
+  return getPTXCmpMode(*N);
+}]>;
 
-let hasSideEffects = false in {
-  multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
+multiclass FSETP<RegTyInfo t, bit allow_ftz = true> {
+  defvar ftz_str = !if(allow_ftz, "$ftz", "");
+  defvar op_str = "setp.${cmp:FCmp}" # ftz_str # "." # t.PtxType;
+  defvar flags = !con((ins CmpMode:$cmp), !if(allow_ftz, (ins  FTZFlag:$ftz), (ins)));
+  let hasSideEffects = false in {
     def rr :
-      BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}." # TypeStr>;
+      BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.RC:$b),
+                          flags, op_str>;
+    
+    if t.SupportsImm then {
+      def ri :
+        BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.Imm:$b),
+                            flags, op_str>;
+      def ir :
+        BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.Imm:$a, t.RC:$b),
+                            flags, op_str>;
+    }
+  }
+  def : Pat<(i1 (setcc t.Ty:$a, t.Ty:$b, cond:$cc)),
+            (!cast<NVPTXInst>(NAME # "rr") $a, $b, (cond2cc $cc))>;
+  if t.SupportsImm then {
+    def : Pat<(i1 (setcc t.Ty:$a, fpimm:$b, cond:$cc)),
+              (!cast<NVPTXInst>(NAME # "ri") $a, fpimm:$b, (cond2cc $cc))>;
+    def : Pat<(i1 (setcc fpimm:$a, t.Ty:$b, cond:$cc)),
+              (!cast<NVPTXInst>(NAME # "ir") fpimm:$a, $b, (cond2cc $cc))>;
+  }
+}
+
+multiclass ISETP<RegTyInfo t> {
+  defvar op_str = "setp.${cmp:ICmp}.${cmp:IType}" # t.Size;
+  let hasSideEffects = false in {
+    def rr :
+      BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.RC:$b),
+                          (ins CmpMode:$cmp), op_str>;
     def ri :
-      BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}." # TypeStr>;
+      BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.Imm:$b),
+                          (ins CmpMode:$cmp), op_str>;
     def ir :
-      BasicFlagsNVPTXInst<(outs B1:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}." # TypeStr>;
+      BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.Imm:$a, t.RC:$b),
+                          (ins CmpMode:$cmp), op_str>;
   }
+  def : Pat<(i1 (setcc t.Ty:$a, t.Ty:$b, cond:$cc)),
+            (!cast<NVPTXInst>(NAME # "rr") $a, $b, (cond2cc $cc))>;
+  def : Pat<(i1 (setcc t.Ty:$a, imm:$b, cond:$cc)),
+            (!cast<NVPTXInst>(NAME # "ri") $a, imm:$b, (cond2cc $cc))>;
+  def : Pat<(i1 (setcc imm:$a, t.Ty:$b, cond:$cc)),
+            (!cast<NVPTXInst>(NAME # "ir") imm:$a, $b, (cond2cc $cc))>;
 }
 
-defm SETP_b16 : SETP<"b16", B16, i16imm>;
-defm SETP_s16 : SETP<"s16", B16, i16imm>;
-defm SETP_u16 : SETP<"u16", B16, i16imm>;
-defm SETP_b32 : SETP<"b32", B32, i32imm>;
-defm SETP_s32 : SETP<"s32", B32, i32imm>;
-defm SETP_u32 : SETP<"u32", B32, i32imm>;
-defm SETP_b64 : SETP<"b64", B64, i64imm>;
-defm SETP_s64 : SETP<"s64", B64, i64imm>;
-defm SETP_u64 : SETP<"u64", B64, i64imm>;
-defm SETP_f32 : SETP<"f32", B32, f32imm>;
-defm SETP_f64 : SETP<"f64", B64, f64imm>;
-def SETP_f16rr :
-      BasicFlagsNVPTXInst<(outs B1:$dst),
-                (ins B16:$a, B16:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}.f16">,
-                Requires<[useFP16Math]>;
+defm SETP_i16 : ISETP<I16RT>;
+defm SETP_i32 : ISETP<I32RT>;
+defm SETP_i64 : ISETP<I64RT>;
+
+defm SETP_f32 : FSETP<F32RT>;
+defm SETP_f64 : FSETP<F64RT, allow_ftz = false>;
+let Predicates = [useFP16Math] in
+  defm SETP_f16 : FSETP<F16RT>;
+let Predicates = [hasBF16Math, hasPTX<78>, hasSM<90>] in
+  defm SETP_bf16 : FSETP<BF16RT>;
 
 def SETP_f16x2rr :
       BasicFlagsNVPTXInst<(outs B1:$p, B1:$q),
-                (ins B32:$a, B32:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}.f16x2">,
+                (ins B32:$a, B32:$b), (ins CmpMode:$cmp, FTZFlag:$ftz),
+                "setp.${cmp:FCmp}$ftz.f16x2">,
                 Requires<[useFP16Math]>;
-def SETP_bf16rr :
-      BasicFlagsNVPTXInst<(outs B1:$dst),
-                (ins B16:$a, B16:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}.bf16">,
-                Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
 
 def SETP_bf16x2rr :
       BasicFlagsNVPTXInst<(outs B1:$p, B1:$q),
-                (ins B32:$a, B32:$b), (ins CmpMode:$cmp),
-                "setp${cmp:base}${cmp:ftz}.bf16x2">,
+                (ins B32:$a, B32:$b), (ins CmpMode:$cmp, FTZFlag:$ftz),
+                "setp.${cmp:FCmp}$ftz.bf16x2">,
                 Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
 
 //-----------------------------------
@@ -1786,209 +1688,36 @@ def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
 // Comparison and Selection
 //-----------------------------------
 
-multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
-                       Instruction setp_16rr,
-                       Instruction setp_16ri,
-                       Instruction setp_16ir,
-                       Instruction setp_32rr,
-                       Instruction setp_32ri,
-                       Instruction setp_32ir,
-                       Instruction setp_64rr,
-                       Instruction setp_64ri,
-                       Instruction setp_64ir> {
-  // i16 -> pred
-  def : Pat<(i1 (OpNode i16:$a, i16:$b)),
-            (setp_16rr $a, $b, Mode)>;
-  def : Pat<(i1 (OpNode i16:$a, imm:$b)),
-            (setp_16ri $a, imm:$b, Mode)>;
-  def : Pat<(i1 (OpNode imm:$a, i16:$b)),
-            (setp_16ir imm:$a, $b, Mode)>;
-  // i32 -> pred
-  def : Pat<(i1 (OpNode i32:$a, i32:$b)),
-            (setp_32rr $a, $b, Mode)>;
-  def : Pat<(i1 (OpNode i32:$a, imm:$b)),
-            (setp_32ri $a, imm:$b, Mode)>;
-  def : Pat<(i1 (OpNode imm:$a, i32:$b)),
-            (setp_32ir imm:$a, $b, Mode)>;
-  // i64 -> pred
-  def : Pat<(i1 (OpNode i64:$a, i64:$b)),
-            (setp_64rr $a, $b, Mode)>;
-  def : Pat<(i1 (OpNode i64:$a, imm:$b)),
-            (setp_64ri $a, imm:$b, Mode)>;
-  def : Pat<(i1 (OpNode imm:$a, i64:$b)),
-            (setp_64ir imm:$a, $b, Mode)>;
-}
-
-multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
-  : ISET_FORMAT<OpNode, Mode,
-                SETP_s16rr, SETP_s16ri, SETP_s16ir,
-                SETP_s32rr, SETP_s32ri, SETP_s32ir,
-                SETP_s64rr, SETP_s64ri, SETP_s64ir> {
-  // TableGen doesn't like empty multiclasses.
-  def : PatLeaf<(i32 0)>;
-}
-
-multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
-  : ISET_FORMAT<OpNode, Mode,
-                SETP_u16rr, SETP_u16ri, SETP_u16ir,
-                SETP_u32rr, SETP_u32ri, SETP_u32ir,
-                SETP_u64rr, SETP_u64ri, SETP_u64ir> {
-  // TableGen doesn't like empty multiclasses.
-  def : PatLeaf<(i32 0)>;
-}
-
-defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
-defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
-defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
-defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
-defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
-defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
-defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
-defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
-defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
-defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
-defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
-defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
+def cond_signed : PatLeaf<(cond), [{
+  return isSignedIntSetCC(N->get());
+}]>;
+
+def cond_not_signed : PatLeaf<(cond), [{
+  return !isSignedIntSetCC(N->get());
+}]>;
 
 // comparisons of i8 extracted with BFE as i32
 // It's faster to do comparison directly on i32 extracted by BFE,
 // instead of the long conversion and sign extending.
-def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
-                (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
-         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
-def: Pat<(setgt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
-                (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
-         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
-def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
-                (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
-         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
-def: Pat<(setge (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
-                (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
-         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
-def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
-                (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
-         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
-def: Pat<(setlt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
-                (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
-         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
-def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
-                (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
-         (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
-def: Pat<(setle (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
-                (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
-         (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
-
-def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
-def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
-def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
-def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
-def: Pat<(setult (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
-def: Pat<(setult (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
-def: Pat<(setule (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
-def: Pat<(setule (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
-def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
-def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
-def: Pat<(setne (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
-def: Pat<(setne (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
-                 (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
-         (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
-
-// i1 compare -> i32
-def : Pat<(i32 (setne i1:$a, i1:$b)),
-          (SELP_b32ii -1, 0, (XORb1rr $a, $b))>;
-def : Pat<(i32 (setne i1:$a, i1:$b)),
-          (SELP_b32ii 0, -1, (XORb1rr $a, $b))>;
-
-
-
-multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
-  // f16 -> pred
-  def : Pat<(i1 (OpNode f16:$a, f16:$b)),
-            (SETP_f16rr $a, $b, ModeFTZ)>,
-        Requires<[useFP16Math, doF32FTZ]>;
-  def : Pat<(i1 (OpNode f16:$a, f16:$b)),
-            (SETP_f16rr $a, $b, Mode)>,
-        Requires<[useFP16Math]>;
-
-  // bf16 -> pred
-  def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
-            (SETP_bf16rr $a, $b, ModeFTZ)>,
-        Requires<[hasBF16Math, doF32FTZ]>;
-  def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
-            (SETP_bf16rr $a, $b, Mode)>,
-        Requires<[hasBF16Math]>;
-
-  // f32 -> pred
-  def : Pat<(i1 (OpNode f32:$a, f32:$b)),
-            (SETP_f32rr $a, $b, ModeFTZ)>,
-        Requires<[doF32FTZ]>;
-  def : Pat<(i1 (OpNode f32:$a, f32:$b)),
-            (SETP_f32rr $a, $b, Mode)>;
-  def : Pat<(i1 (OpNode f32:$a, fpimm:$b)),
-            (SETP_f32ri $a, fpimm:$b, ModeFTZ)>,
-        Requires<[doF32FTZ]>;
-  def : Pat<(i1 (OpNode f32:$a, fpimm:$b)),
-            (SETP_f32ri $a, fpimm:$b, Mode)>;
-  def : Pat<(i1 (OpNode fpimm:$a, f32:$b)),
-            (SETP_f32ir fpimm:$a, $b, ModeFTZ)>,
-        Requires<[doF32FTZ]>;
-  def : Pat<(i1 (OpNode fpimm:$a, f32:$b)),
-            (SETP_f32ir fpimm:$a, $b, Mode)>;
-
-  // f64 -> pred
-  def : Pat<(i1 (OpNode f64:$a, f64:$b)),
-            (SETP_f64rr $a, $b, Mode)>;
-  def : Pat<(i1 (OpNode f64:$a, fpimm:$b)),
-            (SETP_f64ri $a, fpimm:$b, Mode)>;
-  def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
-            (SETP_f64ir fpimm:$a, $b, Mode)>;
-}
-
-defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
-defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
-defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
-defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
-defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
-defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
-
-defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
-defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
-defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
-defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
-defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
-defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
-
-defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
-defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
-defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
-defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
-defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
-defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
-
-defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
-defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
+def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
+                (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8)),
+                cond_signed:$cc),
+         (SETP_i32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), (cond2cc $cc))>;
+
+def: Pat<(setcc (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
+                (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8)),
+                cond_signed:$cc),
+         (SETP_i32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), (cond2cc $cc))>;
+
+def: Pat<(setcc (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+                (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255)),
+                cond_signed:$cc),
+         (SETP_i32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), (cond2cc $cc))>;
+
+def: Pat<(setcc (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+                (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255)),
+                cond_not_signed:$cc),
+         (SETP_i32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), (cond2cc $cc))>;
 
 def SDTDeclareArrayParam :
   SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
@@ -2382,25 +2111,25 @@ def : Pat<(f64 (uint_to_fp i64:$a)), (CVT_f64_u64 $a, CvtRN)>;
 
 
 // f16 -> sint
-def : Pat<(i1  (fp_to_sint f16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_sint f16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_sint f16:$a)), (CVT_s16_f16 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_sint f16:$a)), (CVT_s32_f16 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_sint f16:$a)), (CVT_s64_f16 $a, CvtRZI)>;
 
 // f16 -> uint
-def : Pat<(i1  (fp_to_uint f16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_uint f16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_uint f16:$a)), (CVT_u16_f16 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_uint f16:$a)), (CVT_u32_f16 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_uint f16:$a)), (CVT_u64_f16 $a, CvtRZI)>;
 
 // bf16 -> sint
-def : Pat<(i1  (fp_to_sint bf16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_sint bf16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_sint bf16:$a)), (CVT_s16_bf16 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_sint bf16:$a)), (CVT_s32_bf16 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_sint bf16:$a)), (CVT_s64_bf16 $a, CvtRZI)>;
 
 // bf16 -> uint
-def : Pat<(i1 (fp_to_uint bf16:$a)),  (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_uint bf16:$a)),  (SETP_i16ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_uint bf16:$a)), (CVT_u16_bf16 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_uint bf16:$a)), (CVT_u32_bf16 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_uint bf16:$a)), (CVT_u64_bf16 $a, CvtRZI)>;
@@ -2410,7 +2139,7 @@ let Predicates = [doF32FTZ] in {
   def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI_FTZ)>;
   def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI_FTZ)>;
 }
-def : Pat<(i1  (fp_to_sint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_sint f32:$a)), (SETP_i32ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI)>;
@@ -2421,19 +2150,19 @@ let Predicates = [doF32FTZ] in {
   def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI_FTZ)>;
   def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI_FTZ)>;
 }
-def : Pat<(i1  (fp_to_uint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_uint f32:$a)), (SETP_i32ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI)>;
 
 // f64 -> sint
-def : Pat<(i1  (fp_to_sint f64:$a)), (SETP_b64ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_sint f64:$a)), (SETP_i64ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_sint f64:$a)), (CVT_s16_f64 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_sint f64:$a)), (CVT_s32_f64 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_sint f64:$a)), (CVT_s64_f64 $a, CvtRZI)>;
 
 // f64 -> uint
-def : Pat<(i1  (fp_to_uint f64:$a)), (SETP_b64ri $a, 0, CmpEQ)>;
+def : Pat<(i1  (fp_to_uint f64:$a)), (SETP_i64ri $a, 0, CmpEQ)>;
 def : Pat<(i16 (fp_to_uint f64:$a)), (CVT_u16_f64 $a, CvtRZI)>;
 def : Pat<(i32 (fp_to_uint f64:$a)), (CVT_u32_f64 $a, CvtRZI)>;
 def : Pat<(i64 (fp_to_uint f64:$a)), (CVT_u64_f64 $a, CvtRZI)>;
@@ -2478,14 +2207,14 @@ def : Pat<(i64 (anyext i32:$a)), (CVT_u64_u32 $a, CvtNONE)>;
 // truncate i64
 def : Pat<(i32 (trunc i64:$a)), (CVT_u32_u64 $a, CvtNONE)>;
 def : Pat<(i16 (trunc i64:$a)), (CVT_u16_u64 $a, CvtNONE)>;
-def : Pat<(i1  (trunc i64:$a)), (SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE)>;
+def : Pat<(i1  (trunc i64:$a)), (SETP_i64ri (ANDb64ri $a, 1), 0, CmpNE)>;
 
 // truncate i32
 def : Pat<(i16 (trunc i32:$a)), (CVT_u16_u32 $a, CvtNONE)>;
-def : Pat<(i1  (trunc i32:$a)), (SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE)>;
+def : Pat<(i1  (trunc i32:$a)), (SETP_i32ri (ANDb32ri $a, 1), 0, CmpNE)>;
 
 // truncate i16
-def : Pat<(i1 (trunc i16:$a)), (SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE)>;
+def : Pat<(i1 (trunc i16:$a)), (SETP_i16ri (ANDb16ri $a, 1), 0, CmpNE)>;
 
 // sext_inreg
 def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
@@ -2734,7 +2463,7 @@ let isTerminator=1 in {
 }
 
 def : Pat<(brcond i32:$a, bb:$target),
-          (CBranch (SETP_u32ri $a, 0, CmpNE), bb:$target)>;
+          (CBranch (SETP_i32ri $a, 0, CmpNE), bb:$target)>;
 
 // SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
 // conditional branch if the target block is the next block so that the code
@@ -2964,31 +2693,18 @@ def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
   return N->getFlags().hasNoSignedZeros() || TM.Options.NoSignedZerosFPMath;
 }]>;
 
-class NVPTXInst_rrr<RegisterClass RC, string Instruction, list<Predicate> Preds>
-  : BasicNVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), Instruction>,
-    Requires<Preds>;
-
-def FMARELU_F16 : NVPTXInst_rrr<B16, "fma.rn.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16_FTZ : NVPTXInst_rrr<B16, "fma.rn.ftz.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_BF16 : NVPTXInst_rrr<B16, "fma.rn.relu.bf16", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16X2 : NVPTXInst_rrr<B32, "fma.rn.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16X2_FTZ : NVPTXInst_rrr<B32, "fma.rn.ftz.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_BF16X2 : NVPTXInst_rrr<B32, "fma.rn.relu.bf16x2", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
-
-// FTZ
-def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
-  (FMARELU_F16_FTZ $a, $b, $c)>,
-  Requires<[doF32FTZ]>;
-def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)),
-  (FMARELU_F16X2_FTZ $a, $b, $c)>,
-  Requires<[doF32FTZ]>;
-
-// NO FTZ
-def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
-  (FMARELU_F16 $a, $b, $c)>;
-def : Pat<(bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan bf16:$a, bf16:$b, bf16:$c), fpimm_any_zero)),
-  (FMARELU_BF16 $a, $b, $c)>;
-def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)),
-  (FMARELU_F16X2 $a, $b, $c)>;
-def : Pat<(v2bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2bf16:$a, v2bf16:$b, v2bf16:$c), fpimm_positive_zero_v2bf16)),
-  (FMARELU_BF16X2 $a, $b, $c)>;
+class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
+  : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
+                   !if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
+                   "fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
+                   [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+
+let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
+  def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_any_zero>;
+  def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, fpimm_positive_zero_v2f16>;
+}
+
+let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
+  def FMARELU_BF16 : FMARELUInst<BF16RT, false, fpimm_any_zero>;
+  def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, fpimm_positive_zero_v2bf16>;
+}
\ No newline at end of file

diff  --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index cc1fd027d8515..d840324ce8238 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1006,12 +1006,14 @@ def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs),
 // Min Max
 //
 
-def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", B32, B32, B32, int_nvvm_fmin_f>;
-def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32", B32, B32, B32, int_nvvm_fmin_ftz_f>;
-def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32", B32, B32, B32, int_nvvm_fmin_nan_f,
-  [hasPTX<70>, hasSM<80>]>;
-def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_f,
-  [hasPTX<70>, hasSM<80>]>;
+def : Pat<(int_nvvm_fmin_f f32:$a, f32:$b), (MIN_f32_rr $a, $b, NoFTZ)>;
+def : Pat<(int_nvvm_fmin_ftz_f f32:$a, f32:$b), (MIN_f32_rr $a, $b, FTZ)>;
+
+let Predicates = [hasPTX<70>, hasSM<80>] in {
+  def : Pat<(int_nvvm_fmin_nan_f f32:$a, f32:$b), (MIN_NAN_f32_rr $a, $b, NoFTZ)>;
+  def : Pat<(int_nvvm_fmin_ftz_nan_f f32:$a, f32:$b), (MIN_NAN_f32_rr $a, $b, FTZ)>;
+}
+
 def INT_NVVM_FMIN_XORSIGN_ABS_F :
  F_MATH_2<"min.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_xorsign_abs_f,
     [hasPTX<72>, hasSM<86>]>;
@@ -1025,12 +1027,15 @@ def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
   F_MATH_2<"min.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
     [hasPTX<72>, hasSM<86>]>;
 
-def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", B32, B32, B32, int_nvvm_fmax_f>;
-def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32", B32, B32, B32, int_nvvm_fmax_ftz_f>;
-def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32", B32, B32, B32, int_nvvm_fmax_nan_f,
-  [hasPTX<70>, hasSM<80>]>;
-def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_f,
-  [hasPTX<70>, hasSM<80>]>;
+
+def : Pat<(int_nvvm_fmax_f f32:$a, f32:$b), (MAX_f32_rr $a, $b, NoFTZ)>;
+def : Pat<(int_nvvm_fmax_ftz_f f32:$a, f32:$b), (MAX_f32_rr $a, $b, FTZ)>;
+
+let Predicates = [hasPTX<70>, hasSM<80>] in {
+  def : Pat<(int_nvvm_fmax_nan_f f32:$a, f32:$b), (MAX_NAN_f32_rr $a, $b, NoFTZ)>;
+  def : Pat<(int_nvvm_fmax_ftz_nan_f f32:$a, f32:$b), (MAX_NAN_f32_rr $a, $b, FTZ)>;
+}
+
 def INT_NVVM_FMAX_XORSIGN_ABS_F :
   F_MATH_2<"max.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_xorsign_abs_f,
     [hasPTX<72>, hasSM<86>]>;
@@ -1044,8 +1049,8 @@ def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
   F_MATH_2<"max.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
     [hasPTX<72>, hasSM<86>]>;
 
-def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", B64, B64, B64, int_nvvm_fmin_d>;
-def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", B64, B64, B64, int_nvvm_fmax_d>;
+def : Pat<(int_nvvm_fmin_d f64:$a, f64:$b), (MIN_f64_rr $a, $b)>;
+def : Pat<(int_nvvm_fmax_d f64:$a, f64:$b), (MAX_f64_rr $a, $b)>;
 
 //
 // Min Max f16, f16x2, bf16, bf16x2
@@ -1181,17 +1186,10 @@ def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64", B64, B64, B64, int_nvvm_div_rz_d>
 def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64", B64, B64, B64, int_nvvm_div_rm_d>;
 def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64", B64, B64, B64, int_nvvm_div_rp_d>;
 
-def : Pat<(int_nvvm_div_full f32:$a, f32:$b),
-          (FDIV32rr $a, $b)>;
-
-def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b),
-          (FDIV32ri $a, f32imm:$b)>;
-
-def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b),
-          (FDIV32rr_ftz $a, $b)>;
-
-def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b),
-          (FDIV32ri_ftz $a, f32imm:$b)>;
+def : Pat<(int_nvvm_div_full f32:$a, f32:$b), (FDIV32rr $a, $b, NoFTZ)>;
+def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), (FDIV32ri $a, f32imm:$b, NoFTZ)>;
+def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), (FDIV32rr $a, $b, FTZ)>;
+def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), (FDIV32ri $a, f32imm:$b, FTZ)>;
 
 //
 // Sad
@@ -1288,48 +1286,36 @@ def : Pat<(int_nvvm_saturate_d f64:$a),     (CVT_f64_f64 $a, CvtSAT)>;
 // Exp2  Log2
 //
 
-def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32",
-  F32RT, F32RT, int_nvvm_ex2_approx_ftz_f>;
-def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32",
-  F32RT, F32RT, int_nvvm_ex2_approx_f>;
-def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64",
-  F64RT, F64RT, int_nvvm_ex2_approx_d>;
-
-def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16",
-  F16RT, F16RT, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
-def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2",
-  F16X2RT, F16X2RT, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
-
-def : Pat<(fexp2 f32:$a), (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
-def : Pat<(fexp2 f32:$a), (INT_NVVM_EX2_APPROX_F $a)>;
-def : Pat<(fexp2 f16:$a), (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>;
-def : Pat<(fexp2 v2f16:$a), (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>;
-
-def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32",
-  F32RT, F32RT, int_nvvm_lg2_approx_ftz_f>;
-def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32",
-  F32RT, F32RT, int_nvvm_lg2_approx_f>;
-def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64",
-  F64RT, F64RT, int_nvvm_lg2_approx_d>;
-
-def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>,
-          Requires<[doF32FTZ]>;
-def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>,
-          Requires<[doNoF32FTZ]>;
+def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>;
+
+let Predicates = [hasPTX<70>, hasSM<75>] in {
+  def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>;
+  def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>;
+}
+
+def LG2_APPROX_f32 :
+  BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
+    "lg2.approx$ftz.f32",
+    [(set f32:$dst, (flog2 f32:$src))]>;
+
+def LG2_APPROX_f64 :
+  BasicNVPTXInst<(outs B64:$dst), (ins B64:$src),
+    "lg2.approx.f64",
+    [(set f64:$dst, (flog2 f64:$src))]>;
+
+def : Pat<(int_nvvm_lg2_approx_ftz_f f32:$a), (LG2_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_lg2_approx_f f32:$a), (LG2_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(int_nvvm_lg2_approx_d f64:$a), (LG2_APPROX_f64 $a)>;
 
 //
 // Sin  Cos
 //
 
-def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32",
-  F32RT, F32RT, int_nvvm_sin_approx_ftz_f>;
-def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32",
-  F32RT, F32RT, int_nvvm_sin_approx_f>;
-
-def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32",
-  F32RT, F32RT, int_nvvm_cos_approx_ftz_f>;
-def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32",
-  F32RT, F32RT, int_nvvm_cos_approx_f>;
+def : Pat<(int_nvvm_sin_approx_ftz_f f32:$a), (SIN_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_sin_approx_f f32:$a), (SIN_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(int_nvvm_cos_approx_ftz_f f32:$a), (COS_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_cos_approx_f f32:$a), (COS_APPROX_f32 $a, NoFTZ)>;
 
 //
 // Fma
@@ -1478,31 +1464,30 @@ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_F $a)>;
 // Rsqrt
 //
 
-def INT_NVVM_RSQRT_APPROX_FTZ_F
-  : F_MATH_1<"rsqrt.approx.ftz.f32", F32RT, F32RT, int_nvvm_rsqrt_approx_ftz_f>;
-def INT_NVVM_RSQRT_APPROX_FTZ_D
-  : F_MATH_1<"rsqrt.approx.ftz.f64", F64RT, F64RT, int_nvvm_rsqrt_approx_ftz_d>;
+foreach t = [F32RT, F64RT] in {
+  def RSQRT_APPROX_ # t.Ty :
+    BasicFlagsNVPTXInst<(outs t.RC:$dst),
+                        (ins t.RC:$a), (ins FTZFlag:$ftz),
+                        "rsqrt.approx$ftz.f" # t.Size>;
+}
+
+def : Pat<(int_nvvm_rsqrt_approx_ftz_f f32:$a), (RSQRT_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_rsqrt_approx_ftz_d f64:$a), (RSQRT_APPROX_f64 $a, FTZ)>;
+def : Pat<(int_nvvm_rsqrt_approx_f f32:$a), (RSQRT_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(int_nvvm_rsqrt_approx_d f64:$a), (RSQRT_APPROX_f64 $a, NoFTZ)>;
 
-def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32",
-  F32RT, F32RT, int_nvvm_rsqrt_approx_f>;
-def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64",
-  F64RT, F64RT, int_nvvm_rsqrt_approx_d>;
 
 // 1.0f / sqrt_approx -> rsqrt_approx
-def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)),
-         (INT_NVVM_RSQRT_APPROX_F $a)>,
-         Requires<[doRsqrtOpt]>;
-def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
-         (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
-         Requires<[doRsqrtOpt]>;
-
-// same for int_nvvm_sqrt_f when non-precision sqrt is requested
-def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
-         (INT_NVVM_RSQRT_APPROX_F $a)>,
-         Requires<[doRsqrtOpt, doNoF32FTZ]>;
-def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
-         (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
-         Requires<[doRsqrtOpt, doF32FTZ]>;
+let Predicates = [doRsqrtOpt] in {
+  def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)),
+          (RSQRT_APPROX_f32 $a, NoFTZ)>;
+  def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
+          (RSQRT_APPROX_f32 $a, FTZ)>;
+
+  // same for int_nvvm_sqrt_f when non-precision sqrt is requested
+  def : Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
+          (RSQRT_APPROX_f32 $a)>;
+}
 //
 // Add
 //

diff  --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
index 61c3ba2ee54ab..e3b072549bc04 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -48,8 +48,8 @@ body: |
     %4 = CVT_f32_f64 %3, 5
     %5 = CVT_f32_s32 %2, 5
   ; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00
-    %6 = FADD_rnf32ri %5, float 6.250000e+00
-    %7 = FMUL_rnf32rr %6, %4
+    %6 = FADD_rnf32ri %5, float 6.250000e+00, 0
+    %7 = FMUL_rnf32rr %6, %4, 0
     ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
     Return
 ...
@@ -74,8 +74,8 @@ body: |
     %4 = CVT_f32_f64 %3, 5
     %5 = CVT_f32_s32 %2, 5
   ; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000
-    %6 = FADD_rnf32ri %5, float 0x7FF8000000000000
-    %7 = FMUL_rnf32rr %6, %4
+    %6 = FADD_rnf32ri %5, float 0x7FF8000000000000, 0
+    %7 = FMUL_rnf32rr %6, %4, 0
     ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
     Return
 ...

diff  --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 22a7177650ee2..94f49b01e6ea6 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -71,7 +71,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    and.b32 %r31, %r54, %r3;
 ; CHECKPTX62-NEXT:    or.b32 %r32, %r31, %r30;
 ; CHECKPTX62-NEXT:    atom.cas.b32 %r6, [%r1], %r54, %r32;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p1, %r6, %r54;
+; CHECKPTX62-NEXT:    setp.ne.b32 %p1, %r6, %r54;
 ; CHECKPTX62-NEXT:    mov.b32 %r54, %r6;
 ; CHECKPTX62-NEXT:    @%p1 bra $L__BB0_1;
 ; CHECKPTX62-NEXT:  // %bb.2: // %atomicrmw.end44
@@ -87,7 +87,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    and.b32 %r36, %r55, %r3;
 ; CHECKPTX62-NEXT:    or.b32 %r37, %r36, %r35;
 ; CHECKPTX62-NEXT:    atom.cas.b32 %r9, [%r1], %r55, %r37;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p2, %r9, %r55;
+; CHECKPTX62-NEXT:    setp.ne.b32 %p2, %r9, %r55;
 ; CHECKPTX62-NEXT:    mov.b32 %r55, %r9;
 ; CHECKPTX62-NEXT:    @%p2 bra $L__BB0_3;
 ; CHECKPTX62-NEXT:  // %bb.4: // %atomicrmw.end26
@@ -108,7 +108,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    and.b32 %r44, %r56, %r12;
 ; CHECKPTX62-NEXT:    or.b32 %r45, %r44, %r43;
 ; CHECKPTX62-NEXT:    atom.global.cas.b32 %r15, [%r10], %r56, %r45;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p3, %r15, %r56;
+; CHECKPTX62-NEXT:    setp.ne.b32 %p3, %r15, %r56;
 ; CHECKPTX62-NEXT:    mov.b32 %r56, %r15;
 ; CHECKPTX62-NEXT:    @%p3 bra $L__BB0_5;
 ; CHECKPTX62-NEXT:  // %bb.6: // %atomicrmw.end8
@@ -129,7 +129,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
 ; CHECKPTX62-NEXT:    and.b32 %r52, %r57, %r18;
 ; CHECKPTX62-NEXT:    or.b32 %r53, %r52, %r51;
 ; CHECKPTX62-NEXT:    atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
-; CHECKPTX62-NEXT:    setp.ne.s32 %p4, %r21, %r57;
+; CHECKPTX62-NEXT:    setp.ne.b32 %p4, %r21, %r57;
 ; CHECKPTX62-NEXT:    mov.b32 %r57, %r21;
 ; CHECKPTX62-NEXT:    @%p4 bra $L__BB0_7;
 ; CHECKPTX62-NEXT:  // %bb.8: // %atomicrmw.end

diff  --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index b5a4f94611453..b21bd16d55c2c 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    and.b32 %r31, %r54, %r3;
 ; CHECKPTX71-NEXT:    or.b32 %r32, %r31, %r30;
 ; CHECKPTX71-NEXT:    atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32;
-; CHECKPTX71-NEXT:    setp.ne.s32 %p1, %r6, %r54;
+; CHECKPTX71-NEXT:    setp.ne.b32 %p1, %r6, %r54;
 ; CHECKPTX71-NEXT:    mov.b32 %r54, %r6;
 ; CHECKPTX71-NEXT:    @%p1 bra $L__BB0_1;
 ; CHECKPTX71-NEXT:  // %bb.2: // %atomicrmw.end44
@@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    and.b32 %r36, %r55, %r3;
 ; CHECKPTX71-NEXT:    or.b32 %r37, %r36, %r35;
 ; CHECKPTX71-NEXT:    atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37;
-; CHECKPTX71-NEXT:    setp.ne.s32 %p2, %r9, %r55;
+; CHECKPTX71-NEXT:    setp.ne.b32 %p2, %r9, %r55;
 ; CHECKPTX71-NEXT:    mov.b32 %r55, %r9;
 ; CHECKPTX71-NEXT:    @%p2 bra $L__BB0_3;
 ; CHECKPTX71-NEXT:  // %bb.4: // %atomicrmw.end26
@@ -110,7 +110,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    and.b32 %r44, %r56, %r12;
 ; CHECKPTX71-NEXT:    or.b32 %r45, %r44, %r43;
 ; CHECKPTX71-NEXT:    atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45;
-; CHECKPTX71-NEXT:    setp.ne.s32 %p3, %r15, %r56;
+; CHECKPTX71-NEXT:    setp.ne.b32 %p3, %r15, %r56;
 ; CHECKPTX71-NEXT:    mov.b32 %r56, %r15;
 ; CHECKPTX71-NEXT:    @%p3 bra $L__BB0_5;
 ; CHECKPTX71-NEXT:  // %bb.6: // %atomicrmw.end8
@@ -132,7 +132,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
 ; CHECKPTX71-NEXT:    and.b32 %r52, %r57, %r18;
 ; CHECKPTX71-NEXT:    or.b32 %r53, %r52, %r51;
 ; CHECKPTX71-NEXT:    atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53;
-; CHECKPTX71-NEXT:    setp.ne.s32 %p4, %r21, %r57;
+; CHECKPTX71-NEXT:    setp.ne.b32 %p4, %r21, %r57;
 ; CHECKPTX71-NEXT:    mov.b32 %r57, %r21;
 ; CHECKPTX71-NEXT:    @%p4 bra $L__BB0_7;
 ; CHECKPTX71-NEXT:  // %bb.8: // %atomicrmw.end

diff  --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index 55ce3dfc46539..04a58cf22cfc5 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -453,7 +453,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
 ; CHECK-NEXT:    or.b32 %r17, %r16, %r15;
 ; CHECK-NEXT:    membar.sys;
 ; CHECK-NEXT:    atom.cas.b32 %r5, [%rd1], %r19, %r17;
-; CHECK-NEXT:    setp.ne.s32 %p1, %r5, %r19;
+; CHECK-NEXT:    setp.ne.b32 %p1, %r5, %r19;
 ; CHECK-NEXT:    mov.b32 %r19, %r5;
 ; CHECK-NEXT:    @%p1 bra $L__BB24_1;
 ; CHECK-NEXT:  // %bb.2: // %atomicrmw.end

diff  --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index f59f51c3c57d3..a386e4292777b 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -1339,9 +1339,9 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) {
 ; SM70-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; SM70-NEXT:    setp.nan.f32 %p2, %r4, %r2;
 ; SM70-NEXT:    selp.b16 %rs4, 0x7FC0, %rs3, %p2;
-; SM70-NEXT:    setp.eq.s16 %p3, %rs1, 0;
+; SM70-NEXT:    setp.eq.b16 %p3, %rs1, 0;
 ; SM70-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; SM70-NEXT:    setp.eq.s16 %p4, %rs2, 0;
+; SM70-NEXT:    setp.eq.b16 %p4, %rs2, 0;
 ; SM70-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
 ; SM70-NEXT:    cvt.u32.u16 %r5, %rs4;
 ; SM70-NEXT:    shl.b32 %r6, %r5, 16;
@@ -1462,9 +1462,9 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; SM70-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
 ; SM70-NEXT:    setp.nan.f32 %p2, %r4, %r2;
 ; SM70-NEXT:    selp.b16 %rs6, 0x7FC0, %rs5, %p2;
-; SM70-NEXT:    setp.eq.s16 %p3, %rs2, 0;
+; SM70-NEXT:    setp.eq.b16 %p3, %rs2, 0;
 ; SM70-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; SM70-NEXT:    setp.eq.s16 %p4, %rs4, 0;
+; SM70-NEXT:    setp.eq.b16 %p4, %rs4, 0;
 ; SM70-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
 ; SM70-NEXT:    cvt.u32.u16 %r5, %rs6;
 ; SM70-NEXT:    shl.b32 %r6, %r5, 16;
@@ -1478,9 +1478,9 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
 ; SM70-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
 ; SM70-NEXT:    setp.nan.f32 %p7, %r10, %r8;
 ; SM70-NEXT:    selp.b16 %rs11, 0x7FC0, %rs10, %p7;
-; SM70-NEXT:    setp.eq.s16 %p8, %rs1, 0;
+; SM70-NEXT:    setp.eq.b16 %p8, %rs1, 0;
 ; SM70-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; SM70-NEXT:    setp.eq.s16 %p9, %rs3, 0;
+; SM70-NEXT:    setp.eq.b16 %p9, %rs3, 0;
 ; SM70-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
 ; SM70-NEXT:    cvt.u32.u16 %r11, %rs11;
 ; SM70-NEXT:    shl.b32 %r12, %r11, 16;

diff  --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir
index 4d80d52de8da8..ca6f49feff052 100644
--- a/llvm/test/CodeGen/NVPTX/branch-fold.mir
+++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir
@@ -58,7 +58,7 @@ body:             |
   ; CHECK-NEXT:   successors: %bb.2(0x7c000000), %bb.3(0x04000000)
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   [[ADDi64ri:%[0-9]+]]:b64 = ADDi64ri [[ADDi64ri]], 1
-  ; CHECK-NEXT:   [[SETP_s64ri:%[0-9]+]]:b1 = SETP_s64ri [[ADDi64ri]], 1, 2
+  ; CHECK-NEXT:   [[SETP_s64ri:%[0-9]+]]:b1 = SETP_i64ri [[ADDi64ri]], 1, 2
   ; CHECK-NEXT:   CBranch [[SETP_s64ri]], %bb.2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.3.bb4:
@@ -77,7 +77,7 @@ body:             |
     successors: %bb.2(0x7c000000), %bb.3(0x04000000)
 
     %5:b64 = ADDi64ri %5, 1
-    %4:b1 = SETP_s64ri %5, 1, 2
+    %4:b1 = SETP_i64ri %5, 1, 2
     CBranch %4, %bb.2
 
   bb.3.bb4:

diff  --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index c99860cc5cc1b..9f900c961d2ed 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB0_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB0_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB0_1;
 ; SM60-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
@@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB1_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB1_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB1_1;
 ; SM60-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
@@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB2_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB2_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB2_1;
 ; SM60-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
@@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB3_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB3_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB3_1;
 ; SM60-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
@@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB4_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB4_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB4_1;
 ; SM60-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
@@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB5_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB5_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB5_1;
 ; SM60-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
@@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB6_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB6_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB6_1;
 ; SM60-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
@@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB7_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB7_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB7_1;
 ; SM60-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
@@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB8_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB8_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB8_1;
 ; SM60-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
@@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB9_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB9_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB9_1;
 ; SM60-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
@@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB10_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB10_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB10_1;
 ; SM60-NEXT:  $L__BB10_3: // %partword.cmpxchg.end
@@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB11_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB11_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB11_1;
 ; SM60-NEXT:  $L__BB11_3: // %partword.cmpxchg.end
@@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB12_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB12_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB12_1;
 ; SM60-NEXT:  $L__BB12_3: // %partword.cmpxchg.end
@@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB13_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB13_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB13_1;
 ; SM60-NEXT:  $L__BB13_3: // %partword.cmpxchg.end
@@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB14_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB14_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB14_1;
 ; SM60-NEXT:  $L__BB14_3: // %partword.cmpxchg.end
@@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB15_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB15_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB15_1;
 ; SM60-NEXT:  $L__BB15_3: // %partword.cmpxchg.end
@@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB16_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB16_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB16_1;
 ; SM60-NEXT:  $L__BB16_3: // %partword.cmpxchg.end
@@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB17_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB17_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB17_1;
 ; SM60-NEXT:  $L__BB17_3: // %partword.cmpxchg.end
@@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB18_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB18_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB18_1;
 ; SM60-NEXT:  $L__BB18_3: // %partword.cmpxchg.end
@@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB19_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB19_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB19_1;
 ; SM60-NEXT:  $L__BB19_3: // %partword.cmpxchg.end
@@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB20_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB20_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB20_1;
 ; SM60-NEXT:  $L__BB20_3: // %partword.cmpxchg.end
@@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB21_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB21_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB21_1;
 ; SM60-NEXT:  $L__BB21_3: // %partword.cmpxchg.end
@@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB22_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB22_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB22_1;
 ; SM60-NEXT:  $L__BB22_3: // %partword.cmpxchg.end
@@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB23_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB23_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB23_1;
 ; SM60-NEXT:  $L__BB23_3: // %partword.cmpxchg.end
@@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB24_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB24_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB24_1;
 ; SM60-NEXT:  $L__BB24_3: // %partword.cmpxchg.end
@@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB25_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB25_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB25_1;
 ; SM60-NEXT:  $L__BB25_3: // %partword.cmpxchg.end
@@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB26_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB26_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB26_1;
 ; SM60-NEXT:  $L__BB26_3: // %partword.cmpxchg.end
@@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB27_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB27_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB27_1;
 ; SM60-NEXT:  $L__BB27_3: // %partword.cmpxchg.end
@@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB28_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB28_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB28_1;
 ; SM60-NEXT:  $L__BB28_3: // %partword.cmpxchg.end
@@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB29_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB29_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB29_1;
 ; SM60-NEXT:  $L__BB29_3: // %partword.cmpxchg.end
@@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB30_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB30_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB30_1;
 ; SM60-NEXT:  $L__BB30_3: // %partword.cmpxchg.end
@@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB31_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB31_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB31_1;
 ; SM60-NEXT:  $L__BB31_3: // %partword.cmpxchg.end
@@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB32_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB32_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB32_1;
 ; SM60-NEXT:  $L__BB32_3: // %partword.cmpxchg.end
@@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB33_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB33_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB33_1;
 ; SM60-NEXT:  $L__BB33_3: // %partword.cmpxchg.end
@@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB34_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB34_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB34_1;
 ; SM60-NEXT:  $L__BB34_3: // %partword.cmpxchg.end
@@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB35_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB35_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB35_1;
 ; SM60-NEXT:  $L__BB35_3: // %partword.cmpxchg.end
@@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB36_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB36_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB36_1;
 ; SM60-NEXT:  $L__BB36_3: // %partword.cmpxchg.end
@@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB37_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB37_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB37_1;
 ; SM60-NEXT:  $L__BB37_3: // %partword.cmpxchg.end
@@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB38_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB38_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB38_1;
 ; SM60-NEXT:  $L__BB38_3: // %partword.cmpxchg.end
@@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB39_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB39_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB39_1;
 ; SM60-NEXT:  $L__BB39_3: // %partword.cmpxchg.end
@@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB40_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB40_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB40_1;
 ; SM60-NEXT:  $L__BB40_3: // %partword.cmpxchg.end
@@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB41_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB41_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB41_1;
 ; SM60-NEXT:  $L__BB41_3: // %partword.cmpxchg.end
@@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB42_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB42_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB42_1;
 ; SM60-NEXT:  $L__BB42_3: // %partword.cmpxchg.end
@@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB43_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB43_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB43_1;
 ; SM60-NEXT:  $L__BB43_3: // %partword.cmpxchg.end
@@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM60-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM60-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM60-NEXT:    @%p1 bra $L__BB44_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB44_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM60-NEXT:    mov.b32 %r20, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB44_1;
 ; SM60-NEXT:  $L__BB44_3: // %partword.cmpxchg.end
@@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB45_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB45_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB45_1;
 ; SM60-NEXT:  $L__BB45_3: // %partword.cmpxchg.end
@@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB46_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB46_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB46_1;
 ; SM60-NEXT:  $L__BB46_3: // %partword.cmpxchg.end
@@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB47_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB47_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB47_1;
 ; SM60-NEXT:  $L__BB47_3: // %partword.cmpxchg.end
@@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB48_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB48_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB48_1;
 ; SM60-NEXT:  $L__BB48_3: // %partword.cmpxchg.end
@@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB49_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB49_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB49_1;
 ; SM60-NEXT:  $L__BB49_3: // %partword.cmpxchg.end
@@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB50_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB50_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB50_1;
 ; SM60-NEXT:  $L__BB50_3: // %partword.cmpxchg.end
@@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB51_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB51_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB51_1;
 ; SM60-NEXT:  $L__BB51_3: // %partword.cmpxchg.end
@@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB52_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB52_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB52_1;
 ; SM60-NEXT:  $L__BB52_3: // %partword.cmpxchg.end
@@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB53_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB53_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB53_1;
 ; SM60-NEXT:  $L__BB53_3: // %partword.cmpxchg.end
@@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB54_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB54_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB54_1;
 ; SM60-NEXT:  $L__BB54_3: // %partword.cmpxchg.end
@@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB55_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB55_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB55_1;
 ; SM60-NEXT:  $L__BB55_3: // %partword.cmpxchg.end
@@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB56_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB56_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB56_1;
 ; SM60-NEXT:  $L__BB56_3: // %partword.cmpxchg.end
@@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB57_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB57_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB57_1;
 ; SM60-NEXT:  $L__BB57_3: // %partword.cmpxchg.end
@@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB58_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB58_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB58_1;
 ; SM60-NEXT:  $L__BB58_3: // %partword.cmpxchg.end
@@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB59_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB59_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB59_1;
 ; SM60-NEXT:  $L__BB59_3: // %partword.cmpxchg.end
@@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB60_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB60_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB60_1;
 ; SM60-NEXT:  $L__BB60_3: // %partword.cmpxchg.end
@@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB61_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB61_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB61_1;
 ; SM60-NEXT:  $L__BB61_3: // %partword.cmpxchg.end
@@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB62_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB62_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB62_1;
 ; SM60-NEXT:  $L__BB62_3: // %partword.cmpxchg.end
@@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB63_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB63_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB63_1;
 ; SM60-NEXT:  $L__BB63_3: // %partword.cmpxchg.end
@@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB64_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB64_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB64_1;
 ; SM60-NEXT:  $L__BB64_3: // %partword.cmpxchg.end
@@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB65_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB65_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB65_1;
 ; SM60-NEXT:  $L__BB65_3: // %partword.cmpxchg.end
@@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB66_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB66_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB66_1;
 ; SM60-NEXT:  $L__BB66_3: // %partword.cmpxchg.end
@@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB67_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB67_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB67_1;
 ; SM60-NEXT:  $L__BB67_3: // %partword.cmpxchg.end
@@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB68_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB68_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB68_1;
 ; SM60-NEXT:  $L__BB68_3: // %partword.cmpxchg.end
@@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB69_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB69_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB69_1;
 ; SM60-NEXT:  $L__BB69_3: // %partword.cmpxchg.end
@@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB70_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB70_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB70_1;
 ; SM60-NEXT:  $L__BB70_3: // %partword.cmpxchg.end
@@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB71_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB71_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB71_1;
 ; SM60-NEXT:  $L__BB71_3: // %partword.cmpxchg.end
@@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB72_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB72_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB72_1;
 ; SM60-NEXT:  $L__BB72_3: // %partword.cmpxchg.end
@@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB73_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB73_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB73_1;
 ; SM60-NEXT:  $L__BB73_3: // %partword.cmpxchg.end
@@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB74_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB74_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB74_1;
 ; SM60-NEXT:  $L__BB74_3: // %partword.cmpxchg.end
@@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB75_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB75_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB75_1;
 ; SM60-NEXT:  $L__BB75_3: // %partword.cmpxchg.end
@@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB76_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB76_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB76_1;
 ; SM60-NEXT:  $L__BB76_3: // %partword.cmpxchg.end
@@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB77_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB77_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB77_1;
 ; SM60-NEXT:  $L__BB77_3: // %partword.cmpxchg.end
@@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB78_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB78_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB78_1;
 ; SM60-NEXT:  $L__BB78_3: // %partword.cmpxchg.end
@@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB79_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB79_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB79_1;
 ; SM60-NEXT:  $L__BB79_3: // %partword.cmpxchg.end
@@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB80_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB80_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB80_1;
 ; SM60-NEXT:  $L__BB80_3: // %partword.cmpxchg.end
@@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB81_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB81_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB81_1;
 ; SM60-NEXT:  $L__BB81_3: // %partword.cmpxchg.end
@@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB82_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB82_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB82_1;
 ; SM60-NEXT:  $L__BB82_3: // %partword.cmpxchg.end
@@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB83_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB83_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB83_1;
 ; SM60-NEXT:  $L__BB83_3: // %partword.cmpxchg.end
@@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB84_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB84_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB84_1;
 ; SM60-NEXT:  $L__BB84_3: // %partword.cmpxchg.end
@@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB85_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB85_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB85_1;
 ; SM60-NEXT:  $L__BB85_3: // %partword.cmpxchg.end
@@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB86_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB86_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB86_1;
 ; SM60-NEXT:  $L__BB86_3: // %partword.cmpxchg.end
@@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB87_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB87_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB87_1;
 ; SM60-NEXT:  $L__BB87_3: // %partword.cmpxchg.end
@@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB88_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB88_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB88_1;
 ; SM60-NEXT:  $L__BB88_3: // %partword.cmpxchg.end
@@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM60-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM60-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM60-NEXT:    atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM60-NEXT:    @%p1 bra $L__BB89_3;
 ; SM60-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM60-NEXT:    // in Loop: Header=BB89_1 Depth=1
 ; SM60-NEXT:    and.b32 %r8, %r7, %r2;
-; SM60-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM60-NEXT:    mov.b32 %r19, %r8;
 ; SM60-NEXT:    @%p2 bra $L__BB89_1;
 ; SM60-NEXT:  $L__BB89_3: // %partword.cmpxchg.end

diff  --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
index 68de517f65bb9..28b258dc2a868 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
@@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB0_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB0_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB0_1;
 ; SM70-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
@@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB1_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB1_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB1_1;
 ; SM70-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
@@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB2_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB2_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB2_1;
 ; SM70-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
@@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB3_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB3_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB3_1;
 ; SM70-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
@@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB4_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB4_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB4_1;
 ; SM70-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
@@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB5_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB5_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB5_1;
 ; SM70-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
@@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB6_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB6_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB6_1;
 ; SM70-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
@@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB7_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB7_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB7_1;
 ; SM70-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
@@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB8_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB8_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB8_1;
 ; SM70-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
@@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB9_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB9_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB9_1;
 ; SM70-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
@@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB10_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB10_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB10_1;
 ; SM70-NEXT:  $L__BB10_3: // %partword.cmpxchg.end
@@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB11_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB11_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB11_1;
 ; SM70-NEXT:  $L__BB11_3: // %partword.cmpxchg.end
@@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB12_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB12_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB12_1;
 ; SM70-NEXT:  $L__BB12_3: // %partword.cmpxchg.end
@@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB13_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB13_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB13_1;
 ; SM70-NEXT:  $L__BB13_3: // %partword.cmpxchg.end
@@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB14_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB14_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB14_1;
 ; SM70-NEXT:  $L__BB14_3: // %partword.cmpxchg.end
@@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB15_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB15_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB15_1;
 ; SM70-NEXT:  $L__BB15_3: // %partword.cmpxchg.end
@@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB16_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB16_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB16_1;
 ; SM70-NEXT:  $L__BB16_3: // %partword.cmpxchg.end
@@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB17_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB17_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB17_1;
 ; SM70-NEXT:  $L__BB17_3: // %partword.cmpxchg.end
@@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB18_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB18_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB18_1;
 ; SM70-NEXT:  $L__BB18_3: // %partword.cmpxchg.end
@@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB19_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB19_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB19_1;
 ; SM70-NEXT:  $L__BB19_3: // %partword.cmpxchg.end
@@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB20_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB20_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB20_1;
 ; SM70-NEXT:  $L__BB20_3: // %partword.cmpxchg.end
@@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB21_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB21_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB21_1;
 ; SM70-NEXT:  $L__BB21_3: // %partword.cmpxchg.end
@@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB22_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB22_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB22_1;
 ; SM70-NEXT:  $L__BB22_3: // %partword.cmpxchg.end
@@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB23_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB23_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB23_1;
 ; SM70-NEXT:  $L__BB23_3: // %partword.cmpxchg.end
@@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB24_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB24_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB24_1;
 ; SM70-NEXT:  $L__BB24_3: // %partword.cmpxchg.end
@@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB25_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB25_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB25_1;
 ; SM70-NEXT:  $L__BB25_3: // %partword.cmpxchg.end
@@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB26_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB26_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB26_1;
 ; SM70-NEXT:  $L__BB26_3: // %partword.cmpxchg.end
@@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB27_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB27_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB27_1;
 ; SM70-NEXT:  $L__BB27_3: // %partword.cmpxchg.end
@@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB28_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB28_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB28_1;
 ; SM70-NEXT:  $L__BB28_3: // %partword.cmpxchg.end
@@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB29_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB29_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB29_1;
 ; SM70-NEXT:  $L__BB29_3: // %partword.cmpxchg.end
@@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB30_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB30_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB30_1;
 ; SM70-NEXT:  $L__BB30_3: // %partword.cmpxchg.end
@@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB31_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB31_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB31_1;
 ; SM70-NEXT:  $L__BB31_3: // %partword.cmpxchg.end
@@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB32_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB32_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB32_1;
 ; SM70-NEXT:  $L__BB32_3: // %partword.cmpxchg.end
@@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB33_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB33_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB33_1;
 ; SM70-NEXT:  $L__BB33_3: // %partword.cmpxchg.end
@@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB34_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB34_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB34_1;
 ; SM70-NEXT:  $L__BB34_3: // %partword.cmpxchg.end
@@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB35_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB35_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB35_1;
 ; SM70-NEXT:  $L__BB35_3: // %partword.cmpxchg.end
@@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB36_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB36_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB36_1;
 ; SM70-NEXT:  $L__BB36_3: // %partword.cmpxchg.end
@@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB37_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB37_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB37_1;
 ; SM70-NEXT:  $L__BB37_3: // %partword.cmpxchg.end
@@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB38_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB38_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB38_1;
 ; SM70-NEXT:  $L__BB38_3: // %partword.cmpxchg.end
@@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB39_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB39_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB39_1;
 ; SM70-NEXT:  $L__BB39_3: // %partword.cmpxchg.end
@@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB40_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB40_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB40_1;
 ; SM70-NEXT:  $L__BB40_3: // %partword.cmpxchg.end
@@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB41_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB41_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB41_1;
 ; SM70-NEXT:  $L__BB41_3: // %partword.cmpxchg.end
@@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB42_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB42_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB42_1;
 ; SM70-NEXT:  $L__BB42_3: // %partword.cmpxchg.end
@@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB43_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB43_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB43_1;
 ; SM70-NEXT:  $L__BB43_3: // %partword.cmpxchg.end
@@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB44_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB44_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB44_1;
 ; SM70-NEXT:  $L__BB44_3: // %partword.cmpxchg.end
@@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB45_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB45_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB45_1;
 ; SM70-NEXT:  $L__BB45_3: // %partword.cmpxchg.end
@@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB46_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB46_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB46_1;
 ; SM70-NEXT:  $L__BB46_3: // %partword.cmpxchg.end
@@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB47_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB47_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB47_1;
 ; SM70-NEXT:  $L__BB47_3: // %partword.cmpxchg.end
@@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB48_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB48_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB48_1;
 ; SM70-NEXT:  $L__BB48_3: // %partword.cmpxchg.end
@@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB49_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB49_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB49_1;
 ; SM70-NEXT:  $L__BB49_3: // %partword.cmpxchg.end
@@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB50_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB50_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB50_1;
 ; SM70-NEXT:  $L__BB50_3: // %partword.cmpxchg.end
@@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB51_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB51_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB51_1;
 ; SM70-NEXT:  $L__BB51_3: // %partword.cmpxchg.end
@@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB52_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB52_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB52_1;
 ; SM70-NEXT:  $L__BB52_3: // %partword.cmpxchg.end
@@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB53_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB53_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB53_1;
 ; SM70-NEXT:  $L__BB53_3: // %partword.cmpxchg.end
@@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB54_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB54_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB54_1;
 ; SM70-NEXT:  $L__BB54_3: // %partword.cmpxchg.end
@@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB55_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB55_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB55_1;
 ; SM70-NEXT:  $L__BB55_3: // %partword.cmpxchg.end
@@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB56_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB56_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB56_1;
 ; SM70-NEXT:  $L__BB56_3: // %partword.cmpxchg.end
@@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB57_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB57_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB57_1;
 ; SM70-NEXT:  $L__BB57_3: // %partword.cmpxchg.end
@@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB58_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB58_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB58_1;
 ; SM70-NEXT:  $L__BB58_3: // %partword.cmpxchg.end
@@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB59_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB59_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB59_1;
 ; SM70-NEXT:  $L__BB59_3: // %partword.cmpxchg.end
@@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB60_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB60_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB60_1;
 ; SM70-NEXT:  $L__BB60_3: // %partword.cmpxchg.end
@@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB61_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB61_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB61_1;
 ; SM70-NEXT:  $L__BB61_3: // %partword.cmpxchg.end
@@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB62_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB62_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB62_1;
 ; SM70-NEXT:  $L__BB62_3: // %partword.cmpxchg.end
@@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB63_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB63_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB63_1;
 ; SM70-NEXT:  $L__BB63_3: // %partword.cmpxchg.end
@@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB64_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB64_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB64_1;
 ; SM70-NEXT:  $L__BB64_3: // %partword.cmpxchg.end
@@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB65_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB65_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB65_1;
 ; SM70-NEXT:  $L__BB65_3: // %partword.cmpxchg.end
@@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB66_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB66_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB66_1;
 ; SM70-NEXT:  $L__BB66_3: // %partword.cmpxchg.end
@@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB67_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB67_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB67_1;
 ; SM70-NEXT:  $L__BB67_3: // %partword.cmpxchg.end
@@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB68_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB68_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB68_1;
 ; SM70-NEXT:  $L__BB68_3: // %partword.cmpxchg.end
@@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB69_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB69_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB69_1;
 ; SM70-NEXT:  $L__BB69_3: // %partword.cmpxchg.end
@@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB70_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB70_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB70_1;
 ; SM70-NEXT:  $L__BB70_3: // %partword.cmpxchg.end
@@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB71_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB71_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB71_1;
 ; SM70-NEXT:  $L__BB71_3: // %partword.cmpxchg.end
@@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB72_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB72_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB72_1;
 ; SM70-NEXT:  $L__BB72_3: // %partword.cmpxchg.end
@@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB73_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB73_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB73_1;
 ; SM70-NEXT:  $L__BB73_3: // %partword.cmpxchg.end
@@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB74_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB74_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB74_1;
 ; SM70-NEXT:  $L__BB74_3: // %partword.cmpxchg.end
@@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB75_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB75_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB75_1;
 ; SM70-NEXT:  $L__BB75_3: // %partword.cmpxchg.end
@@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB76_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB76_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB76_1;
 ; SM70-NEXT:  $L__BB76_3: // %partword.cmpxchg.end
@@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB77_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB77_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB77_1;
 ; SM70-NEXT:  $L__BB77_3: // %partword.cmpxchg.end
@@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB78_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB78_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB78_1;
 ; SM70-NEXT:  $L__BB78_3: // %partword.cmpxchg.end
@@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB79_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB79_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB79_1;
 ; SM70-NEXT:  $L__BB79_3: // %partword.cmpxchg.end
@@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB80_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB80_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB80_1;
 ; SM70-NEXT:  $L__BB80_3: // %partword.cmpxchg.end
@@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB81_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB81_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB81_1;
 ; SM70-NEXT:  $L__BB81_3: // %partword.cmpxchg.end
@@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB82_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB82_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB82_1;
 ; SM70-NEXT:  $L__BB82_3: // %partword.cmpxchg.end
@@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB83_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB83_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB83_1;
 ; SM70-NEXT:  $L__BB83_3: // %partword.cmpxchg.end
@@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB84_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB84_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB84_1;
 ; SM70-NEXT:  $L__BB84_3: // %partword.cmpxchg.end
@@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB85_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB85_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB85_1;
 ; SM70-NEXT:  $L__BB85_3: // %partword.cmpxchg.end
@@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB86_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB86_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB86_1;
 ; SM70-NEXT:  $L__BB86_3: // %partword.cmpxchg.end
@@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB87_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB87_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB87_1;
 ; SM70-NEXT:  $L__BB87_3: // %partword.cmpxchg.end
@@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB88_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB88_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB88_1;
 ; SM70-NEXT:  $L__BB88_3: // %partword.cmpxchg.end
@@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB89_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB89_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB89_1;
 ; SM70-NEXT:  $L__BB89_3: // %partword.cmpxchg.end

diff  --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
index e20f988577282..368fe3f036c9e 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
@@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB0_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB0_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB0_1;
 ; SM90-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
@@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB1_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB1_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB1_1;
 ; SM90-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
@@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB2_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB2_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB2_1;
 ; SM90-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
@@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB3_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB3_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB3_1;
 ; SM90-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
@@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB4_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB4_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB4_1;
 ; SM90-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
@@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB5_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB5_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB5_1;
 ; SM90-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
@@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB6_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB6_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB6_1;
 ; SM90-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
@@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB7_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB7_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB7_1;
 ; SM90-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
@@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB8_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB8_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB8_1;
 ; SM90-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
@@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB9_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB9_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB9_1;
 ; SM90-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
@@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB10_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB10_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB10_1;
 ; SM90-NEXT:  $L__BB10_3: // %partword.cmpxchg.end
@@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB11_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB11_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB11_1;
 ; SM90-NEXT:  $L__BB11_3: // %partword.cmpxchg.end
@@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB12_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB12_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB12_1;
 ; SM90-NEXT:  $L__BB12_3: // %partword.cmpxchg.end
@@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB13_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB13_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB13_1;
 ; SM90-NEXT:  $L__BB13_3: // %partword.cmpxchg.end
@@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB14_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB14_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB14_1;
 ; SM90-NEXT:  $L__BB14_3: // %partword.cmpxchg.end
@@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB15_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB15_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB15_1;
 ; SM90-NEXT:  $L__BB15_3: // %partword.cmpxchg.end
@@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB16_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB16_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB16_1;
 ; SM90-NEXT:  $L__BB16_3: // %partword.cmpxchg.end
@@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB17_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB17_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB17_1;
 ; SM90-NEXT:  $L__BB17_3: // %partword.cmpxchg.end
@@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB18_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB18_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB18_1;
 ; SM90-NEXT:  $L__BB18_3: // %partword.cmpxchg.end
@@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB19_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB19_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB19_1;
 ; SM90-NEXT:  $L__BB19_3: // %partword.cmpxchg.end
@@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB20_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB20_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB20_1;
 ; SM90-NEXT:  $L__BB20_3: // %partword.cmpxchg.end
@@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB21_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB21_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB21_1;
 ; SM90-NEXT:  $L__BB21_3: // %partword.cmpxchg.end
@@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB22_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB22_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB22_1;
 ; SM90-NEXT:  $L__BB22_3: // %partword.cmpxchg.end
@@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB23_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB23_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB23_1;
 ; SM90-NEXT:  $L__BB23_3: // %partword.cmpxchg.end
@@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB24_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB24_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB24_1;
 ; SM90-NEXT:  $L__BB24_3: // %partword.cmpxchg.end
@@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB25_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB25_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB25_1;
 ; SM90-NEXT:  $L__BB25_3: // %partword.cmpxchg.end
@@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB26_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB26_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB26_1;
 ; SM90-NEXT:  $L__BB26_3: // %partword.cmpxchg.end
@@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB27_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB27_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB27_1;
 ; SM90-NEXT:  $L__BB27_3: // %partword.cmpxchg.end
@@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB28_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB28_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB28_1;
 ; SM90-NEXT:  $L__BB28_3: // %partword.cmpxchg.end
@@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB29_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB29_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB29_1;
 ; SM90-NEXT:  $L__BB29_3: // %partword.cmpxchg.end
@@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB30_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB30_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB30_1;
 ; SM90-NEXT:  $L__BB30_3: // %partword.cmpxchg.end
@@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB31_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB31_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB31_1;
 ; SM90-NEXT:  $L__BB31_3: // %partword.cmpxchg.end
@@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB32_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB32_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB32_1;
 ; SM90-NEXT:  $L__BB32_3: // %partword.cmpxchg.end
@@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB33_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB33_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB33_1;
 ; SM90-NEXT:  $L__BB33_3: // %partword.cmpxchg.end
@@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB34_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB34_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB34_1;
 ; SM90-NEXT:  $L__BB34_3: // %partword.cmpxchg.end
@@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB35_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB35_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB35_1;
 ; SM90-NEXT:  $L__BB35_3: // %partword.cmpxchg.end
@@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB36_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB36_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB36_1;
 ; SM90-NEXT:  $L__BB36_3: // %partword.cmpxchg.end
@@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB37_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB37_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB37_1;
 ; SM90-NEXT:  $L__BB37_3: // %partword.cmpxchg.end
@@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB38_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB38_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB38_1;
 ; SM90-NEXT:  $L__BB38_3: // %partword.cmpxchg.end
@@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB39_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB39_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB39_1;
 ; SM90-NEXT:  $L__BB39_3: // %partword.cmpxchg.end
@@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB40_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB40_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB40_1;
 ; SM90-NEXT:  $L__BB40_3: // %partword.cmpxchg.end
@@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB41_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB41_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB41_1;
 ; SM90-NEXT:  $L__BB41_3: // %partword.cmpxchg.end
@@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB42_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB42_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB42_1;
 ; SM90-NEXT:  $L__BB42_3: // %partword.cmpxchg.end
@@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB43_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB43_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB43_1;
 ; SM90-NEXT:  $L__BB43_3: // %partword.cmpxchg.end
@@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
 ; SM90-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM90-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM90-NEXT:    @%p1 bra $L__BB44_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB44_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM90-NEXT:    mov.b32 %r20, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB44_1;
 ; SM90-NEXT:  $L__BB44_3: // %partword.cmpxchg.end
@@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB45_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB45_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB45_1;
 ; SM90-NEXT:  $L__BB45_3: // %partword.cmpxchg.end
@@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB46_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB46_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB46_1;
 ; SM90-NEXT:  $L__BB46_3: // %partword.cmpxchg.end
@@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB47_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB47_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB47_1;
 ; SM90-NEXT:  $L__BB47_3: // %partword.cmpxchg.end
@@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB48_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB48_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB48_1;
 ; SM90-NEXT:  $L__BB48_3: // %partword.cmpxchg.end
@@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB49_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB49_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB49_1;
 ; SM90-NEXT:  $L__BB49_3: // %partword.cmpxchg.end
@@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB50_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB50_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB50_1;
 ; SM90-NEXT:  $L__BB50_3: // %partword.cmpxchg.end
@@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB51_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB51_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB51_1;
 ; SM90-NEXT:  $L__BB51_3: // %partword.cmpxchg.end
@@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB52_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB52_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB52_1;
 ; SM90-NEXT:  $L__BB52_3: // %partword.cmpxchg.end
@@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB53_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB53_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB53_1;
 ; SM90-NEXT:  $L__BB53_3: // %partword.cmpxchg.end
@@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB54_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB54_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB54_1;
 ; SM90-NEXT:  $L__BB54_3: // %partword.cmpxchg.end
@@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB55_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB55_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB55_1;
 ; SM90-NEXT:  $L__BB55_3: // %partword.cmpxchg.end
@@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB56_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB56_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB56_1;
 ; SM90-NEXT:  $L__BB56_3: // %partword.cmpxchg.end
@@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB57_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB57_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB57_1;
 ; SM90-NEXT:  $L__BB57_3: // %partword.cmpxchg.end
@@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB58_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB58_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB58_1;
 ; SM90-NEXT:  $L__BB58_3: // %partword.cmpxchg.end
@@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB59_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB59_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB59_1;
 ; SM90-NEXT:  $L__BB59_3: // %partword.cmpxchg.end
@@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB60_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB60_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB60_1;
 ; SM90-NEXT:  $L__BB60_3: // %partword.cmpxchg.end
@@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB61_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB61_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB61_1;
 ; SM90-NEXT:  $L__BB61_3: // %partword.cmpxchg.end
@@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB62_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB62_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB62_1;
 ; SM90-NEXT:  $L__BB62_3: // %partword.cmpxchg.end
@@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB63_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB63_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB63_1;
 ; SM90-NEXT:  $L__BB63_3: // %partword.cmpxchg.end
@@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB64_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB64_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB64_1;
 ; SM90-NEXT:  $L__BB64_3: // %partword.cmpxchg.end
@@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB65_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB65_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB65_1;
 ; SM90-NEXT:  $L__BB65_3: // %partword.cmpxchg.end
@@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB66_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB66_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB66_1;
 ; SM90-NEXT:  $L__BB66_3: // %partword.cmpxchg.end
@@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB67_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB67_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB67_1;
 ; SM90-NEXT:  $L__BB67_3: // %partword.cmpxchg.end
@@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB68_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB68_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB68_1;
 ; SM90-NEXT:  $L__BB68_3: // %partword.cmpxchg.end
@@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB69_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB69_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB69_1;
 ; SM90-NEXT:  $L__BB69_3: // %partword.cmpxchg.end
@@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB70_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB70_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB70_1;
 ; SM90-NEXT:  $L__BB70_3: // %partword.cmpxchg.end
@@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB71_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB71_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB71_1;
 ; SM90-NEXT:  $L__BB71_3: // %partword.cmpxchg.end
@@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB72_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB72_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB72_1;
 ; SM90-NEXT:  $L__BB72_3: // %partword.cmpxchg.end
@@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB73_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB73_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB73_1;
 ; SM90-NEXT:  $L__BB73_3: // %partword.cmpxchg.end
@@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB74_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB74_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB74_1;
 ; SM90-NEXT:  $L__BB74_3: // %partword.cmpxchg.end
@@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB75_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB75_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB75_1;
 ; SM90-NEXT:  $L__BB75_3: // %partword.cmpxchg.end
@@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB76_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB76_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB76_1;
 ; SM90-NEXT:  $L__BB76_3: // %partword.cmpxchg.end
@@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB77_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB77_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB77_1;
 ; SM90-NEXT:  $L__BB77_3: // %partword.cmpxchg.end
@@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB78_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB78_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB78_1;
 ; SM90-NEXT:  $L__BB78_3: // %partword.cmpxchg.end
@@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB79_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB79_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB79_1;
 ; SM90-NEXT:  $L__BB79_3: // %partword.cmpxchg.end
@@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB80_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB80_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB80_1;
 ; SM90-NEXT:  $L__BB80_3: // %partword.cmpxchg.end
@@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB81_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB81_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB81_1;
 ; SM90-NEXT:  $L__BB81_3: // %partword.cmpxchg.end
@@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB82_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB82_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB82_1;
 ; SM90-NEXT:  $L__BB82_3: // %partword.cmpxchg.end
@@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB83_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB83_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB83_1;
 ; SM90-NEXT:  $L__BB83_3: // %partword.cmpxchg.end
@@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB84_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB84_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB84_1;
 ; SM90-NEXT:  $L__BB84_3: // %partword.cmpxchg.end
@@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB85_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB85_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB85_1;
 ; SM90-NEXT:  $L__BB85_3: // %partword.cmpxchg.end
@@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB86_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB86_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB86_1;
 ; SM90-NEXT:  $L__BB86_3: // %partword.cmpxchg.end
@@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB87_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB87_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB87_1;
 ; SM90-NEXT:  $L__BB87_3: // %partword.cmpxchg.end
@@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB88_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB88_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB88_1;
 ; SM90-NEXT:  $L__BB88_3: // %partword.cmpxchg.end
@@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
 ; SM90-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM90-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM90-NEXT:    atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM90-NEXT:    @%p1 bra $L__BB89_3;
 ; SM90-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM90-NEXT:    // in Loop: Header=BB89_1 Depth=1
 ; SM90-NEXT:    and.b32 %r8, %r7, %r2;
-; SM90-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM90-NEXT:    mov.b32 %r19, %r8;
 ; SM90-NEXT:    @%p2 bra $L__BB89_1;
 ; SM90-NEXT:  $L__BB89_3: // %partword.cmpxchg.end

diff  --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index 85414a2ab04e8..25b4c74086dc1 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -39,12 +39,12 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM30-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM30-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM30-NEXT:    @%p1 bra $L__BB0_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB0_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM30-NEXT:    mov.b32 %r20, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB0_1;
 ; SM30-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
@@ -80,12 +80,12 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB0_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB0_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB0_1;
 ; SM70-NEXT:  $L__BB0_3: // %partword.cmpxchg.end
@@ -165,12 +165,12 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM30-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM30-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM30-NEXT:    @%p1 bra $L__BB1_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB1_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM30-NEXT:    mov.b32 %r20, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB1_1;
 ; SM30-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
@@ -207,12 +207,12 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB1_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB1_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB1_1;
 ; SM70-NEXT:  $L__BB1_3: // %partword.cmpxchg.end
@@ -295,12 +295,12 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM30-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM30-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM30-NEXT:    @%p1 bra $L__BB2_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB2_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM30-NEXT:    mov.b32 %r20, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB2_1;
 ; SM30-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
@@ -337,12 +337,12 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB2_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB2_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB2_1;
 ; SM70-NEXT:  $L__BB2_3: // %partword.cmpxchg.end
@@ -424,12 +424,12 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM30-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM30-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM30-NEXT:    @%p1 bra $L__BB3_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB3_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM30-NEXT:    mov.b32 %r20, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB3_1;
 ; SM30-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
@@ -467,12 +467,12 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB3_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB3_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB3_1;
 ; SM70-NEXT:  $L__BB3_3: // %partword.cmpxchg.end
@@ -556,12 +556,12 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM30-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM30-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM30-NEXT:    @%p1 bra $L__BB4_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB4_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM30-NEXT:    mov.b32 %r20, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB4_1;
 ; SM30-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
@@ -599,12 +599,12 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
 ; SM70-NEXT:    or.b32 %r17, %r20, %r3;
 ; SM70-NEXT:    or.b32 %r18, %r20, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r18;
 ; SM70-NEXT:    @%p1 bra $L__BB4_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB4_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r20, %r8;
 ; SM70-NEXT:    mov.b32 %r20, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB4_1;
 ; SM70-NEXT:  $L__BB4_3: // %partword.cmpxchg.end
@@ -687,12 +687,12 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM30-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM30-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM30-NEXT:    @%p1 bra $L__BB5_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB5_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM30-NEXT:    mov.b32 %r19, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB5_1;
 ; SM30-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
@@ -727,12 +727,12 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB5_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB5_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB5_1;
 ; SM70-NEXT:  $L__BB5_3: // %partword.cmpxchg.end
@@ -810,12 +810,12 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM30-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM30-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM30-NEXT:    @%p1 bra $L__BB6_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB6_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM30-NEXT:    mov.b32 %r19, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB6_1;
 ; SM30-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
@@ -851,12 +851,12 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB6_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB6_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB6_1;
 ; SM70-NEXT:  $L__BB6_3: // %partword.cmpxchg.end
@@ -937,12 +937,12 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM30-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM30-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM30-NEXT:    @%p1 bra $L__BB7_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB7_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM30-NEXT:    mov.b32 %r19, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB7_1;
 ; SM30-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
@@ -978,12 +978,12 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB7_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB7_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB7_1;
 ; SM70-NEXT:  $L__BB7_3: // %partword.cmpxchg.end
@@ -1063,12 +1063,12 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM30-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM30-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM30-NEXT:    @%p1 bra $L__BB8_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB8_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM30-NEXT:    mov.b32 %r19, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB8_1;
 ; SM30-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
@@ -1105,12 +1105,12 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB8_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB8_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB8_1;
 ; SM70-NEXT:  $L__BB8_3: // %partword.cmpxchg.end
@@ -1193,12 +1193,12 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM30-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM30-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM30-NEXT:    atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM30-NEXT:    @%p1 bra $L__BB9_3;
 ; SM30-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM30-NEXT:    // in Loop: Header=BB9_1 Depth=1
 ; SM30-NEXT:    and.b32 %r8, %r7, %r2;
-; SM30-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM30-NEXT:    mov.b32 %r19, %r8;
 ; SM30-NEXT:    @%p2 bra $L__BB9_1;
 ; SM30-NEXT:  $L__BB9_3: // %partword.cmpxchg.end
@@ -1235,12 +1235,12 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
 ; SM70-NEXT:    or.b32 %r16, %r19, %r3;
 ; SM70-NEXT:    or.b32 %r17, %r19, %r4;
 ; SM70-NEXT:    atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT:    setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT:    setp.eq.b32 %p1, %r7, %r17;
 ; SM70-NEXT:    @%p1 bra $L__BB9_3;
 ; SM70-NEXT:  // %bb.2: // %partword.cmpxchg.failure
 ; SM70-NEXT:    // in Loop: Header=BB9_1 Depth=1
 ; SM70-NEXT:    and.b32 %r8, %r7, %r2;
-; SM70-NEXT:    setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT:    setp.ne.b32 %p2, %r19, %r8;
 ; SM70-NEXT:    mov.b32 %r19, %r8;
 ; SM70-NEXT:    @%p2 bra $L__BB9_1;
 ; SM70-NEXT:  $L__BB9_3: // %partword.cmpxchg.end

diff  --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
index ee86fe97ef781..b44ae47d623bd 100644
--- a/llvm/test/CodeGen/NVPTX/compare-int.ll
+++ b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -11,7 +11,7 @@
 ;;; i64
 
 define i64 @icmp_eq_i64(i64 %a, i64 %b) {
-; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
+; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
 ; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp eq i64 %a, %b
@@ -20,7 +20,7 @@ define i64 @icmp_eq_i64(i64 %a, i64 %b) {
 }
 
 define i64 @icmp_ne_i64(i64 %a, i64 %b) {
-; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
+; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
 ; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ne i64 %a, %b
@@ -103,7 +103,7 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) {
 ;;; i32
 
 define i32 @icmp_eq_i32(i32 %a, i32 %b) {
-; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
 ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp eq i32 %a, %b
@@ -112,7 +112,7 @@ define i32 @icmp_eq_i32(i32 %a, i32 %b) {
 }
 
 define i32 @icmp_ne_i32(i32 %a, i32 %b) {
-; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
 ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ne i32 %a, %b
@@ -196,7 +196,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
 ;;; i16
 
 define i16 @icmp_eq_i16(i16 %a, i16 %b) {
-; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
 ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp eq i16 %a, %b
@@ -205,7 +205,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) {
 }
 
 define i16 @icmp_ne_i16(i16 %a, i16 %b) {
-; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
 ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ne i16 %a, %b
@@ -290,7 +290,7 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
 
 define i8 @icmp_eq_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
 ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp eq i8 %a, %b
@@ -300,7 +300,7 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
 
 define i8 @icmp_ne_i8(i8 %a, i8 %b) {
 ; Comparison happens in 16-bit
-; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
 ; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
 ; CHECK: ret
   %cmp = icmp ne i8 %a, %b

diff  --git a/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll b/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
index 193cf674ecdfc..a1020e68e1bae 100644
--- a/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
+++ b/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
@@ -210,12 +210,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    or.b32 %r39, %r48, %r3;
 ; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r6, [%rd1], %r39, %r48;
-; CHECK-NEXT:    setp.eq.s32 %p1, %r6, %r39;
+; CHECK-NEXT:    setp.eq.b32 %p1, %r6, %r39;
 ; CHECK-NEXT:    @%p1 bra $L__BB4_3;
 ; CHECK-NEXT:  // %bb.2: // %partword.cmpxchg.failure32
 ; CHECK-NEXT:    // in Loop: Header=BB4_1 Depth=1
 ; CHECK-NEXT:    and.b32 %r7, %r6, %r2;
-; CHECK-NEXT:    setp.ne.s32 %p2, %r48, %r7;
+; CHECK-NEXT:    setp.ne.b32 %p2, %r48, %r7;
 ; CHECK-NEXT:    mov.b32 %r48, %r7;
 ; CHECK-NEXT:    @%p2 bra $L__BB4_1;
 ; CHECK-NEXT:  $L__BB4_3: // %partword.cmpxchg.end31
@@ -225,12 +225,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    or.b32 %r41, %r49, %r3;
 ; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r10, [%rd1], %r41, %r49;
-; CHECK-NEXT:    setp.eq.s32 %p3, %r10, %r41;
+; CHECK-NEXT:    setp.eq.b32 %p3, %r10, %r41;
 ; CHECK-NEXT:    @%p3 bra $L__BB4_6;
 ; CHECK-NEXT:  // %bb.5: // %partword.cmpxchg.failure22
 ; CHECK-NEXT:    // in Loop: Header=BB4_4 Depth=1
 ; CHECK-NEXT:    and.b32 %r11, %r10, %r2;
-; CHECK-NEXT:    setp.ne.s32 %p4, %r49, %r11;
+; CHECK-NEXT:    setp.ne.b32 %p4, %r49, %r11;
 ; CHECK-NEXT:    mov.b32 %r49, %r11;
 ; CHECK-NEXT:    @%p4 bra $L__BB4_4;
 ; CHECK-NEXT:  $L__BB4_6: // %partword.cmpxchg.end21
@@ -242,12 +242,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    or.b32 %r43, %r50, %r3;
 ; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r14, [%rd1], %r43, %r50;
-; CHECK-NEXT:    setp.eq.s32 %p5, %r14, %r43;
+; CHECK-NEXT:    setp.eq.b32 %p5, %r14, %r43;
 ; CHECK-NEXT:    @%p5 bra $L__BB4_9;
 ; CHECK-NEXT:  // %bb.8: // %partword.cmpxchg.failure12
 ; CHECK-NEXT:    // in Loop: Header=BB4_7 Depth=1
 ; CHECK-NEXT:    and.b32 %r15, %r14, %r2;
-; CHECK-NEXT:    setp.ne.s32 %p6, %r50, %r15;
+; CHECK-NEXT:    setp.ne.b32 %p6, %r50, %r15;
 ; CHECK-NEXT:    mov.b32 %r50, %r15;
 ; CHECK-NEXT:    @%p6 bra $L__BB4_7;
 ; CHECK-NEXT:  $L__BB4_9: // %partword.cmpxchg.end11
@@ -258,12 +258,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    or.b32 %r45, %r51, %r3;
 ; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r18, [%rd1], %r45, %r51;
-; CHECK-NEXT:    setp.eq.s32 %p7, %r18, %r45;
+; CHECK-NEXT:    setp.eq.b32 %p7, %r18, %r45;
 ; CHECK-NEXT:    @%p7 bra $L__BB4_12;
 ; CHECK-NEXT:  // %bb.11: // %partword.cmpxchg.failure2
 ; CHECK-NEXT:    // in Loop: Header=BB4_10 Depth=1
 ; CHECK-NEXT:    and.b32 %r19, %r18, %r2;
-; CHECK-NEXT:    setp.ne.s32 %p8, %r51, %r19;
+; CHECK-NEXT:    setp.ne.b32 %p8, %r51, %r19;
 ; CHECK-NEXT:    mov.b32 %r51, %r19;
 ; CHECK-NEXT:    @%p8 bra $L__BB4_10;
 ; CHECK-NEXT:  $L__BB4_12: // %partword.cmpxchg.end1
@@ -275,12 +275,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    or.b32 %r47, %r52, %r3;
 ; CHECK-NEXT:    atom.relaxed.shared::cluster.cas.b32 %r22, [%rd1], %r47, %r52;
-; CHECK-NEXT:    setp.eq.s32 %p9, %r22, %r47;
+; CHECK-NEXT:    setp.eq.b32 %p9, %r22, %r47;
 ; CHECK-NEXT:    @%p9 bra $L__BB4_15;
 ; CHECK-NEXT:  // %bb.14: // %partword.cmpxchg.failure
 ; CHECK-NEXT:    // in Loop: Header=BB4_13 Depth=1
 ; CHECK-NEXT:    and.b32 %r23, %r22, %r2;
-; CHECK-NEXT:    setp.ne.s32 %p10, %r52, %r23;
+; CHECK-NEXT:    setp.ne.b32 %p10, %r52, %r23;
 ; CHECK-NEXT:    mov.b32 %r52, %r23;
 ; CHECK-NEXT:    @%p10 bra $L__BB4_13;
 ; CHECK-NEXT:  $L__BB4_15: // %partword.cmpxchg.end

diff  --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll
index f37777ab954e2..e04732ebad66b 100644
--- a/llvm/test/CodeGen/NVPTX/extractelement.ll
+++ b/llvm/test/CodeGen/NVPTX/extractelement.ll
@@ -40,7 +40,7 @@ define i1  @test_v2i8_load(ptr %a) {
 ; CHECK-NEXT:    ld.v2.b8 {%rs1, %rs2}, [%rd1];
 ; CHECK-NEXT:    or.b16 %rs5, %rs1, %rs2;
 ; CHECK-NEXT:    and.b16 %rs6, %rs5, 255;
-; CHECK-NEXT:    setp.eq.s16 %p1, %rs6, 0;
+; CHECK-NEXT:    setp.eq.b16 %p1, %rs6, 0;
 ; CHECK-NEXT:    selp.b32 %r1, -1, 0, %p1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r1;
 ; CHECK-NEXT:    ret;

diff  --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 13f1c2f30b830..2b7e4184670c7 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -880,8 +880,9 @@ define half @test_sqrt(half %a) #0 {
 ; CHECK-LABEL: test_sin(
 ; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_sin_param_0];
 ; CHECK-NOFTZ:      cvt.f32.f16     [[AF:%r[0-9]+]], [[A]];
-; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[AF:%r[0-9]+]], [[A]];
-; CHECK:      sin.approx.f32  [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ:    cvt.ftz.f32.f16     [[AF:%r[0-9]+]], [[A]];
+; CHECK-NOF16:      sin.approx.f32  [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ:    sin.approx.ftz.f32  [[RF:%r[0-9]+]], [[AF]];
 ; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
 ; CHECK:      st.param.b16    [func_retval0], [[R]];
 ; CHECK:      ret;
@@ -893,8 +894,9 @@ define half @test_sin(half %a) #0 #1 {
 ; CHECK-LABEL: test_cos(
 ; CHECK:      ld.param.b16    [[A:%rs[0-9]+]], [test_cos_param_0];
 ; CHECK-NOFTZ:      cvt.f32.f16     [[AF:%r[0-9]+]], [[A]];
-; CHECK-F16-FTZ:      cvt.ftz.f32.f16     [[AF:%r[0-9]+]], [[A]];
-; CHECK:      cos.approx.f32  [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ:    cvt.ftz.f32.f16     [[AF:%r[0-9]+]], [[A]];
+; CHECK-NOF16:      cos.approx.f32  [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ:    cos.approx.ftz.f32  [[RF:%r[0-9]+]], [[AF]];
 ; CHECK:      cvt.rn.f16.f32  [[R:%rs[0-9]+]], [[RF]];
 ; CHECK:      st.param.b16    [func_retval0], [[R]];
 ; CHECK:      ret;

diff  --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 43a605f2b34d7..093bc20547b85 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -83,7 +83,7 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b64 %rd1, [test_extract_i_param_1];
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_extract_i_param_0];
-; CHECK-NEXT:    setp.eq.s64 %p1, %rd1, 0;
+; CHECK-NEXT:    setp.eq.b64 %p1, %rd1, 0;
 ; CHECK-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
 ; CHECK-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-NEXT:    st.param.b16 [func_retval0], %rs3;

diff  --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index bc48d242f88fd..5eda3a1e2dda1 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -291,6 +291,34 @@ define float @fcos_approx(float %a) #0 {
   ret float %r
 }
 
+define float @fsin_approx_ftz(float %a) #0 #1 {
+; CHECK-LABEL: fsin_approx_ftz(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [fsin_approx_ftz_param_0];
+; CHECK-NEXT:    sin.approx.ftz.f32 %r2, %r1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT:    ret;
+  %r = tail call float @llvm.sin.f32(float %a)
+  ret float %r
+}
+
+define float @fcos_approx_ftz(float %a) #0 #1 {
+; CHECK-LABEL: fcos_approx_ftz(
+; CHECK:       {
+; CHECK-NEXT:    .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT:  // %bb.0:
+; CHECK-NEXT:    ld.param.b32 %r1, [fcos_approx_ftz_param_0];
+; CHECK-NEXT:    cos.approx.ftz.f32 %r2, %r1;
+; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT:    ret;
+  %r = tail call float @llvm.cos.f32(float %a)
+  ret float %r
+}
+
 define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
 ; CHECK-LABEL: repeated_div_recip_allowed(
 ; CHECK:       {

diff  --git a/llvm/test/CodeGen/NVPTX/i1-select.ll b/llvm/test/CodeGen/NVPTX/i1-select.ll
index 562c746200d87..f1adc3489c0d9 100644
--- a/llvm/test/CodeGen/NVPTX/i1-select.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-select.ll
@@ -72,11 +72,11 @@ define i32 @test_select_i1_basic(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %fals
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_select_i1_basic_param_0];
 ; CHECK-NEXT:    ld.param.b32 %r2, [test_select_i1_basic_param_1];
 ; CHECK-NEXT:    or.b32 %r4, %r1, %r2;
-; CHECK-NEXT:    setp.ne.s32 %p1, %r1, 0;
+; CHECK-NEXT:    setp.ne.b32 %p1, %r1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r5, [test_select_i1_basic_param_2];
-; CHECK-NEXT:    setp.eq.s32 %p2, %r5, 0;
+; CHECK-NEXT:    setp.eq.b32 %p2, %r5, 0;
 ; CHECK-NEXT:    ld.param.b32 %r7, [test_select_i1_basic_param_3];
-; CHECK-NEXT:    setp.eq.s32 %p3, %r4, 0;
+; CHECK-NEXT:    setp.eq.b32 %p3, %r4, 0;
 ; CHECK-NEXT:    ld.param.b32 %r8, [test_select_i1_basic_param_4];
 ; CHECK-NEXT:    selp.b32 %r9, %r7, %r8, %p2;
 ; CHECK-NEXT:    selp.b32 %r10, %r9, %r8, %p1;
@@ -99,12 +99,12 @@ define i32 @test_select_i1_basic_folding(i32 %v1, i32 %v2, i32 %v3, i32 %true, i
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_select_i1_basic_folding_param_0];
-; CHECK-NEXT:    setp.eq.s32 %p1, %r1, 0;
+; CHECK-NEXT:    setp.eq.b32 %p1, %r1, 0;
 ; CHECK-NEXT:    ld.param.b32 %r3, [test_select_i1_basic_folding_param_1];
-; CHECK-NEXT:    setp.ne.s32 %p2, %r3, 0;
-; CHECK-NEXT:    setp.eq.s32 %p3, %r3, 0;
+; CHECK-NEXT:    setp.ne.b32 %p2, %r3, 0;
+; CHECK-NEXT:    setp.eq.b32 %p3, %r3, 0;
 ; CHECK-NEXT:    ld.param.b32 %r5, [test_select_i1_basic_folding_param_2];
-; CHECK-NEXT:    setp.eq.s32 %p4, %r5, 0;
+; CHECK-NEXT:    setp.eq.b32 %p4, %r5, 0;
 ; CHECK-NEXT:    ld.param.b32 %r6, [test_select_i1_basic_folding_param_3];
 ; CHECK-NEXT:    xor.pred %p6, %p1, %p3;
 ; CHECK-NEXT:    ld.param.b32 %r7, [test_select_i1_basic_folding_param_4];

diff  --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index 49dbc5c385dc0..f2211eb1c0b8e 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -24,18 +24,18 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    selp.b64 %rd6, %rd54, %rd50, %p2;
 ; CHECK-NEXT:    selp.b64 %rd5, %rd53, %rd49, %p2;
 ; CHECK-NEXT:    or.b64 %rd55, %rd5, %rd6;
-; CHECK-NEXT:    setp.eq.s64 %p3, %rd55, 0;
+; CHECK-NEXT:    setp.eq.b64 %p3, %rd55, 0;
 ; CHECK-NEXT:    or.b64 %rd56, %rd3, %rd4;
-; CHECK-NEXT:    setp.eq.s64 %p4, %rd56, 0;
+; CHECK-NEXT:    setp.eq.b64 %p4, %rd56, 0;
 ; CHECK-NEXT:    or.pred %p5, %p3, %p4;
-; CHECK-NEXT:    setp.ne.s64 %p6, %rd6, 0;
+; CHECK-NEXT:    setp.ne.b64 %p6, %rd6, 0;
 ; CHECK-NEXT:    clz.b64 %r1, %rd6;
 ; CHECK-NEXT:    cvt.u64.u32 %rd57, %r1;
 ; CHECK-NEXT:    clz.b64 %r2, %rd5;
 ; CHECK-NEXT:    cvt.u64.u32 %rd58, %r2;
 ; CHECK-NEXT:    add.s64 %rd59, %rd58, 64;
 ; CHECK-NEXT:    selp.b64 %rd60, %rd57, %rd59, %p6;
-; CHECK-NEXT:    setp.ne.s64 %p7, %rd4, 0;
+; CHECK-NEXT:    setp.ne.b64 %p7, %rd4, 0;
 ; CHECK-NEXT:    clz.b64 %r3, %rd4;
 ; CHECK-NEXT:    cvt.u64.u32 %rd61, %r3;
 ; CHECK-NEXT:    clz.b64 %r4, %rd3;
@@ -45,16 +45,16 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    mov.b64 %rd116, 0;
 ; CHECK-NEXT:    sub.cc.s64 %rd66, %rd60, %rd64;
 ; CHECK-NEXT:    subc.cc.s64 %rd8, %rd116, 0;
-; CHECK-NEXT:    setp.ne.s64 %p8, %rd8, 0;
+; CHECK-NEXT:    setp.ne.b64 %p8, %rd8, 0;
 ; CHECK-NEXT:    and.pred %p10, %p8, %p8;
-; CHECK-NEXT:    setp.eq.s64 %p11, %rd8, 0;
+; CHECK-NEXT:    setp.eq.b64 %p11, %rd8, 0;
 ; CHECK-NEXT:    setp.gt.u64 %p12, %rd66, 127;
 ; CHECK-NEXT:    and.pred %p13, %p11, %p12;
 ; CHECK-NEXT:    or.pred %p14, %p13, %p10;
 ; CHECK-NEXT:    or.pred %p15, %p5, %p14;
 ; CHECK-NEXT:    xor.b64 %rd67, %rd66, 127;
 ; CHECK-NEXT:    or.b64 %rd68, %rd67, %rd8;
-; CHECK-NEXT:    setp.eq.s64 %p16, %rd68, 0;
+; CHECK-NEXT:    setp.eq.b64 %p16, %rd68, 0;
 ; CHECK-NEXT:    selp.b64 %rd125, 0, %rd4, %p15;
 ; CHECK-NEXT:    selp.b64 %rd124, 0, %rd3, %p15;
 ; CHECK-NEXT:    or.pred %p17, %p15, %p16;
@@ -63,7 +63,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd118, %rd66, 1;
 ; CHECK-NEXT:    addc.cc.s64 %rd119, %rd8, 0;
 ; CHECK-NEXT:    or.b64 %rd71, %rd118, %rd119;
-; CHECK-NEXT:    setp.eq.s64 %p18, %rd71, 0;
+; CHECK-NEXT:    setp.eq.b64 %p18, %rd71, 0;
 ; CHECK-NEXT:    cvt.u32.u64 %r5, %rd66;
 ; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
 ; CHECK-NEXT:    shl.b64 %rd72, %rd4, %r6;
@@ -117,7 +117,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd118, %rd118, -1;
 ; CHECK-NEXT:    addc.cc.s64 %rd119, %rd119, -1;
 ; CHECK-NEXT:    or.b64 %rd97, %rd118, %rd119;
-; CHECK-NEXT:    setp.eq.s64 %p21, %rd97, 0;
+; CHECK-NEXT:    setp.eq.b64 %p21, %rd97, 0;
 ; CHECK-NEXT:    @%p21 bra $L__BB0_4;
 ; CHECK-NEXT:    bra.uni $L__BB0_2;
 ; CHECK-NEXT:  $L__BB0_4: // %udiv-loop-exit
@@ -155,18 +155,18 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd41, %rd42}, [urem_i128_param_0];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd3, %rd4}, [urem_i128_param_1];
 ; CHECK-NEXT:    or.b64 %rd45, %rd3, %rd4;
-; CHECK-NEXT:    setp.eq.s64 %p1, %rd45, 0;
+; CHECK-NEXT:    setp.eq.b64 %p1, %rd45, 0;
 ; CHECK-NEXT:    or.b64 %rd46, %rd41, %rd42;
-; CHECK-NEXT:    setp.eq.s64 %p2, %rd46, 0;
+; CHECK-NEXT:    setp.eq.b64 %p2, %rd46, 0;
 ; CHECK-NEXT:    or.pred %p3, %p1, %p2;
-; CHECK-NEXT:    setp.ne.s64 %p4, %rd4, 0;
+; CHECK-NEXT:    setp.ne.b64 %p4, %rd4, 0;
 ; CHECK-NEXT:    clz.b64 %r1, %rd4;
 ; CHECK-NEXT:    cvt.u64.u32 %rd47, %r1;
 ; CHECK-NEXT:    clz.b64 %r2, %rd3;
 ; CHECK-NEXT:    cvt.u64.u32 %rd48, %r2;
 ; CHECK-NEXT:    add.s64 %rd49, %rd48, 64;
 ; CHECK-NEXT:    selp.b64 %rd50, %rd47, %rd49, %p4;
-; CHECK-NEXT:    setp.ne.s64 %p5, %rd42, 0;
+; CHECK-NEXT:    setp.ne.b64 %p5, %rd42, 0;
 ; CHECK-NEXT:    clz.b64 %r3, %rd42;
 ; CHECK-NEXT:    cvt.u64.u32 %rd51, %r3;
 ; CHECK-NEXT:    clz.b64 %r4, %rd41;
@@ -177,14 +177,14 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    sub.cc.s64 %rd5, %rd50, %rd54;
 ; CHECK-NEXT:    subc.cc.s64 %rd6, %rd101, 0;
 ; CHECK-NEXT:    setp.gt.u64 %p6, %rd5, 127;
-; CHECK-NEXT:    setp.eq.s64 %p7, %rd6, 0;
+; CHECK-NEXT:    setp.eq.b64 %p7, %rd6, 0;
 ; CHECK-NEXT:    and.pred %p8, %p7, %p6;
-; CHECK-NEXT:    setp.ne.s64 %p9, %rd6, 0;
+; CHECK-NEXT:    setp.ne.b64 %p9, %rd6, 0;
 ; CHECK-NEXT:    or.pred %p10, %p8, %p9;
 ; CHECK-NEXT:    or.pred %p11, %p3, %p10;
 ; CHECK-NEXT:    xor.b64 %rd56, %rd5, 127;
 ; CHECK-NEXT:    or.b64 %rd57, %rd56, %rd6;
-; CHECK-NEXT:    setp.eq.s64 %p12, %rd57, 0;
+; CHECK-NEXT:    setp.eq.b64 %p12, %rd57, 0;
 ; CHECK-NEXT:    selp.b64 %rd110, 0, %rd42, %p11;
 ; CHECK-NEXT:    selp.b64 %rd109, 0, %rd41, %p11;
 ; CHECK-NEXT:    or.pred %p13, %p11, %p12;
@@ -193,7 +193,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd103, %rd5, 1;
 ; CHECK-NEXT:    addc.cc.s64 %rd104, %rd6, 0;
 ; CHECK-NEXT:    or.b64 %rd60, %rd103, %rd104;
-; CHECK-NEXT:    setp.eq.s64 %p14, %rd60, 0;
+; CHECK-NEXT:    setp.eq.b64 %p14, %rd60, 0;
 ; CHECK-NEXT:    cvt.u32.u64 %r5, %rd5;
 ; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
 ; CHECK-NEXT:    shl.b64 %rd61, %rd42, %r6;
@@ -247,7 +247,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd103, %rd103, -1;
 ; CHECK-NEXT:    addc.cc.s64 %rd104, %rd104, -1;
 ; CHECK-NEXT:    or.b64 %rd86, %rd103, %rd104;
-; CHECK-NEXT:    setp.eq.s64 %p17, %rd86, 0;
+; CHECK-NEXT:    setp.eq.b64 %p17, %rd86, 0;
 ; CHECK-NEXT:    @%p17 bra $L__BB1_4;
 ; CHECK-NEXT:    bra.uni $L__BB1_2;
 ; CHECK-NEXT:  $L__BB1_4: // %udiv-loop-exit
@@ -327,18 +327,18 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    xor.b64 %rd55, %rd50, %rd46;
 ; CHECK-NEXT:    shr.s64 %rd5, %rd55, 63;
 ; CHECK-NEXT:    or.b64 %rd56, %rd3, %rd4;
-; CHECK-NEXT:    setp.eq.s64 %p3, %rd56, 0;
+; CHECK-NEXT:    setp.eq.b64 %p3, %rd56, 0;
 ; CHECK-NEXT:    or.b64 %rd57, %rd1, %rd2;
-; CHECK-NEXT:    setp.eq.s64 %p4, %rd57, 0;
+; CHECK-NEXT:    setp.eq.b64 %p4, %rd57, 0;
 ; CHECK-NEXT:    or.pred %p5, %p3, %p4;
-; CHECK-NEXT:    setp.ne.s64 %p6, %rd4, 0;
+; CHECK-NEXT:    setp.ne.b64 %p6, %rd4, 0;
 ; CHECK-NEXT:    clz.b64 %r1, %rd4;
 ; CHECK-NEXT:    cvt.u64.u32 %rd58, %r1;
 ; CHECK-NEXT:    clz.b64 %r2, %rd3;
 ; CHECK-NEXT:    cvt.u64.u32 %rd59, %r2;
 ; CHECK-NEXT:    add.s64 %rd60, %rd59, 64;
 ; CHECK-NEXT:    selp.b64 %rd61, %rd58, %rd60, %p6;
-; CHECK-NEXT:    setp.ne.s64 %p7, %rd2, 0;
+; CHECK-NEXT:    setp.ne.b64 %p7, %rd2, 0;
 ; CHECK-NEXT:    clz.b64 %r3, %rd2;
 ; CHECK-NEXT:    cvt.u64.u32 %rd62, %r3;
 ; CHECK-NEXT:    clz.b64 %r4, %rd1;
@@ -348,16 +348,16 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    mov.b64 %rd111, 0;
 ; CHECK-NEXT:    sub.cc.s64 %rd67, %rd61, %rd65;
 ; CHECK-NEXT:    subc.cc.s64 %rd8, %rd111, 0;
-; CHECK-NEXT:    setp.ne.s64 %p8, %rd8, 0;
+; CHECK-NEXT:    setp.ne.b64 %p8, %rd8, 0;
 ; CHECK-NEXT:    and.pred %p10, %p8, %p8;
-; CHECK-NEXT:    setp.eq.s64 %p11, %rd8, 0;
+; CHECK-NEXT:    setp.eq.b64 %p11, %rd8, 0;
 ; CHECK-NEXT:    setp.gt.u64 %p12, %rd67, 127;
 ; CHECK-NEXT:    and.pred %p13, %p11, %p12;
 ; CHECK-NEXT:    or.pred %p14, %p13, %p10;
 ; CHECK-NEXT:    or.pred %p15, %p5, %p14;
 ; CHECK-NEXT:    xor.b64 %rd68, %rd67, 127;
 ; CHECK-NEXT:    or.b64 %rd69, %rd68, %rd8;
-; CHECK-NEXT:    setp.eq.s64 %p16, %rd69, 0;
+; CHECK-NEXT:    setp.eq.b64 %p16, %rd69, 0;
 ; CHECK-NEXT:    selp.b64 %rd120, 0, %rd2, %p15;
 ; CHECK-NEXT:    selp.b64 %rd119, 0, %rd1, %p15;
 ; CHECK-NEXT:    or.pred %p17, %p15, %p16;
@@ -366,7 +366,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd113, %rd67, 1;
 ; CHECK-NEXT:    addc.cc.s64 %rd114, %rd8, 0;
 ; CHECK-NEXT:    or.b64 %rd72, %rd113, %rd114;
-; CHECK-NEXT:    setp.eq.s64 %p18, %rd72, 0;
+; CHECK-NEXT:    setp.eq.b64 %p18, %rd72, 0;
 ; CHECK-NEXT:    cvt.u32.u64 %r5, %rd67;
 ; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
 ; CHECK-NEXT:    shl.b64 %rd73, %rd2, %r6;
@@ -420,7 +420,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd113, %rd113, -1;
 ; CHECK-NEXT:    addc.cc.s64 %rd114, %rd114, -1;
 ; CHECK-NEXT:    or.b64 %rd98, %rd113, %rd114;
-; CHECK-NEXT:    setp.eq.s64 %p21, %rd98, 0;
+; CHECK-NEXT:    setp.eq.b64 %p21, %rd98, 0;
 ; CHECK-NEXT:    @%p21 bra $L__BB4_4;
 ; CHECK-NEXT:    bra.uni $L__BB4_2;
 ; CHECK-NEXT:  $L__BB4_4: // %udiv-loop-exit
@@ -452,18 +452,18 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd41, %rd42}, [udiv_i128_param_0];
 ; CHECK-NEXT:    ld.param.v2.b64 {%rd43, %rd44}, [udiv_i128_param_1];
 ; CHECK-NEXT:    or.b64 %rd45, %rd43, %rd44;
-; CHECK-NEXT:    setp.eq.s64 %p1, %rd45, 0;
+; CHECK-NEXT:    setp.eq.b64 %p1, %rd45, 0;
 ; CHECK-NEXT:    or.b64 %rd46, %rd41, %rd42;
-; CHECK-NEXT:    setp.eq.s64 %p2, %rd46, 0;
+; CHECK-NEXT:    setp.eq.b64 %p2, %rd46, 0;
 ; CHECK-NEXT:    or.pred %p3, %p1, %p2;
-; CHECK-NEXT:    setp.ne.s64 %p4, %rd44, 0;
+; CHECK-NEXT:    setp.ne.b64 %p4, %rd44, 0;
 ; CHECK-NEXT:    clz.b64 %r1, %rd44;
 ; CHECK-NEXT:    cvt.u64.u32 %rd47, %r1;
 ; CHECK-NEXT:    clz.b64 %r2, %rd43;
 ; CHECK-NEXT:    cvt.u64.u32 %rd48, %r2;
 ; CHECK-NEXT:    add.s64 %rd49, %rd48, 64;
 ; CHECK-NEXT:    selp.b64 %rd50, %rd47, %rd49, %p4;
-; CHECK-NEXT:    setp.ne.s64 %p5, %rd42, 0;
+; CHECK-NEXT:    setp.ne.b64 %p5, %rd42, 0;
 ; CHECK-NEXT:    clz.b64 %r3, %rd42;
 ; CHECK-NEXT:    cvt.u64.u32 %rd51, %r3;
 ; CHECK-NEXT:    clz.b64 %r4, %rd41;
@@ -474,14 +474,14 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    sub.cc.s64 %rd5, %rd50, %rd54;
 ; CHECK-NEXT:    subc.cc.s64 %rd6, %rd95, 0;
 ; CHECK-NEXT:    setp.gt.u64 %p6, %rd5, 127;
-; CHECK-NEXT:    setp.eq.s64 %p7, %rd6, 0;
+; CHECK-NEXT:    setp.eq.b64 %p7, %rd6, 0;
 ; CHECK-NEXT:    and.pred %p8, %p7, %p6;
-; CHECK-NEXT:    setp.ne.s64 %p9, %rd6, 0;
+; CHECK-NEXT:    setp.ne.b64 %p9, %rd6, 0;
 ; CHECK-NEXT:    or.pred %p10, %p8, %p9;
 ; CHECK-NEXT:    or.pred %p11, %p3, %p10;
 ; CHECK-NEXT:    xor.b64 %rd56, %rd5, 127;
 ; CHECK-NEXT:    or.b64 %rd57, %rd56, %rd6;
-; CHECK-NEXT:    setp.eq.s64 %p12, %rd57, 0;
+; CHECK-NEXT:    setp.eq.b64 %p12, %rd57, 0;
 ; CHECK-NEXT:    selp.b64 %rd104, 0, %rd42, %p11;
 ; CHECK-NEXT:    selp.b64 %rd103, 0, %rd41, %p11;
 ; CHECK-NEXT:    or.pred %p13, %p11, %p12;
@@ -490,7 +490,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd97, %rd5, 1;
 ; CHECK-NEXT:    addc.cc.s64 %rd98, %rd6, 0;
 ; CHECK-NEXT:    or.b64 %rd60, %rd97, %rd98;
-; CHECK-NEXT:    setp.eq.s64 %p14, %rd60, 0;
+; CHECK-NEXT:    setp.eq.b64 %p14, %rd60, 0;
 ; CHECK-NEXT:    cvt.u32.u64 %r5, %rd5;
 ; CHECK-NEXT:    sub.s32 %r6, 127, %r5;
 ; CHECK-NEXT:    shl.b64 %rd61, %rd42, %r6;
@@ -544,7 +544,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
 ; CHECK-NEXT:    add.cc.s64 %rd97, %rd97, -1;
 ; CHECK-NEXT:    addc.cc.s64 %rd98, %rd98, -1;
 ; CHECK-NEXT:    or.b64 %rd86, %rd97, %rd98;
-; CHECK-NEXT:    setp.eq.s64 %p17, %rd86, 0;
+; CHECK-NEXT:    setp.eq.b64 %p17, %rd86, 0;
 ; CHECK-NEXT:    @%p17 bra $L__BB5_4;
 ; CHECK-NEXT:    bra.uni $L__BB5_2;
 ; CHECK-NEXT:  $L__BB5_4: // %udiv-loop-exit

diff  --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index e89ab7a5605c3..2b7a06c33d948 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -98,7 +98,7 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
 ; COMMON-NEXT:  // %bb.0:
 ; COMMON-NEXT:    ld.param.b64 %rd1, [test_extract_i_param_1];
 ; COMMON-NEXT:    ld.param.b32 %r1, [test_extract_i_param_0];
-; COMMON-NEXT:    setp.eq.s64 %p1, %rd1, 0;
+; COMMON-NEXT:    setp.eq.b64 %p1, %rd1, 0;
 ; COMMON-NEXT:    mov.b32 {%rs1, %rs2}, %r1;
 ; COMMON-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; COMMON-NEXT:    cvt.u32.u16 %r2, %rs3;
@@ -735,8 +735,8 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x
 ; COMMON-NEXT:    ld.param.b32 %r1, [test_select_cc_param_0];
 ; COMMON-NEXT:    mov.b32 {%rs1, %rs2}, %r4;
 ; COMMON-NEXT:    mov.b32 {%rs3, %rs4}, %r3;
-; COMMON-NEXT:    setp.ne.s16 %p1, %rs3, %rs1;
-; COMMON-NEXT:    setp.ne.s16 %p2, %rs4, %rs2;
+; COMMON-NEXT:    setp.ne.b16 %p1, %rs3, %rs1;
+; COMMON-NEXT:    setp.ne.b16 %p2, %rs4, %rs2;
 ; COMMON-NEXT:    mov.b32 {%rs5, %rs6}, %r2;
 ; COMMON-NEXT:    mov.b32 {%rs7, %rs8}, %r1;
 ; COMMON-NEXT:    selp.b16 %rs9, %rs8, %rs6, %p2;
@@ -762,8 +762,8 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b,
 ; COMMON-NEXT:    ld.param.b32 %r5, [test_select_cc_i32_i16_param_2];
 ; COMMON-NEXT:    mov.b32 {%rs1, %rs2}, %r6;
 ; COMMON-NEXT:    mov.b32 {%rs3, %rs4}, %r5;
-; COMMON-NEXT:    setp.ne.s16 %p1, %rs3, %rs1;
-; COMMON-NEXT:    setp.ne.s16 %p2, %rs4, %rs2;
+; COMMON-NEXT:    setp.ne.b16 %p1, %rs3, %rs1;
+; COMMON-NEXT:    setp.ne.b16 %p2, %rs4, %rs2;
 ; COMMON-NEXT:    selp.b32 %r7, %r2, %r4, %p2;
 ; COMMON-NEXT:    selp.b32 %r8, %r1, %r3, %p1;
 ; COMMON-NEXT:    st.param.v2.b32 [func_retval0], {%r8, %r7};
@@ -786,8 +786,8 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b,
 ; COMMON-NEXT:    ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i16_i32_param_2];
 ; COMMON-NEXT:    ld.param.b32 %r2, [test_select_cc_i16_i32_param_1];
 ; COMMON-NEXT:    ld.param.b32 %r1, [test_select_cc_i16_i32_param_0];
-; COMMON-NEXT:    setp.ne.s32 %p1, %r3, %r5;
-; COMMON-NEXT:    setp.ne.s32 %p2, %r4, %r6;
+; COMMON-NEXT:    setp.ne.b32 %p1, %r3, %r5;
+; COMMON-NEXT:    setp.ne.b32 %p2, %r4, %r6;
 ; COMMON-NEXT:    mov.b32 {%rs1, %rs2}, %r2;
 ; COMMON-NEXT:    mov.b32 {%rs3, %rs4}, %r1;
 ; COMMON-NEXT:    selp.b16 %rs5, %rs4, %rs2, %p2;

diff  --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index fd2e56bb126bb..328da60a1f783 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -300,16 +300,16 @@ define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 {
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_umax_param_0];
 ; CHECK-NEXT:    bfe.u32 %r3, %r2, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r4, %r1, 0, 8;
-; CHECK-NEXT:    setp.hi.u32 %p1, %r4, %r3;
+; CHECK-NEXT:    setp.gt.u32 %p1, %r4, %r3;
 ; CHECK-NEXT:    bfe.u32 %r5, %r2, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r6, %r1, 8, 8;
-; CHECK-NEXT:    setp.hi.u32 %p2, %r6, %r5;
+; CHECK-NEXT:    setp.gt.u32 %p2, %r6, %r5;
 ; CHECK-NEXT:    bfe.u32 %r7, %r2, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r8, %r1, 16, 8;
-; CHECK-NEXT:    setp.hi.u32 %p3, %r8, %r7;
+; CHECK-NEXT:    setp.gt.u32 %p3, %r8, %r7;
 ; CHECK-NEXT:    bfe.u32 %r9, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r10, %r1, 24, 8;
-; CHECK-NEXT:    setp.hi.u32 %p4, %r10, %r9;
+; CHECK-NEXT:    setp.gt.u32 %p4, %r10, %r9;
 ; CHECK-NEXT:    selp.b32 %r11, %r10, %r9, %p4;
 ; CHECK-NEXT:    selp.b32 %r12, %r8, %r7, %p3;
 ; CHECK-NEXT:    prmt.b32 %r13, %r12, %r11, 0x3340U;
@@ -378,16 +378,16 @@ define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 {
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_umin_param_0];
 ; CHECK-NEXT:    bfe.u32 %r3, %r2, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r4, %r1, 0, 8;
-; CHECK-NEXT:    setp.ls.u32 %p1, %r4, %r3;
+; CHECK-NEXT:    setp.le.u32 %p1, %r4, %r3;
 ; CHECK-NEXT:    bfe.u32 %r5, %r2, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r6, %r1, 8, 8;
-; CHECK-NEXT:    setp.ls.u32 %p2, %r6, %r5;
+; CHECK-NEXT:    setp.le.u32 %p2, %r6, %r5;
 ; CHECK-NEXT:    bfe.u32 %r7, %r2, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r8, %r1, 16, 8;
-; CHECK-NEXT:    setp.ls.u32 %p3, %r8, %r7;
+; CHECK-NEXT:    setp.le.u32 %p3, %r8, %r7;
 ; CHECK-NEXT:    bfe.u32 %r9, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r10, %r1, 24, 8;
-; CHECK-NEXT:    setp.ls.u32 %p4, %r10, %r9;
+; CHECK-NEXT:    setp.le.u32 %p4, %r10, %r9;
 ; CHECK-NEXT:    selp.b32 %r11, %r10, %r9, %p4;
 ; CHECK-NEXT:    selp.b32 %r12, %r8, %r7, %p3;
 ; CHECK-NEXT:    prmt.b32 %r13, %r12, %r11, 0x3340U;
@@ -414,16 +414,16 @@ define <4 x i8> @test_eq(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 {
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_eq_param_0];
 ; CHECK-NEXT:    bfe.u32 %r4, %r2, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r5, %r1, 0, 8;
-; CHECK-NEXT:    setp.eq.u32 %p1, %r5, %r4;
+; CHECK-NEXT:    setp.eq.b32 %p1, %r5, %r4;
 ; CHECK-NEXT:    bfe.u32 %r6, %r2, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r7, %r1, 8, 8;
-; CHECK-NEXT:    setp.eq.u32 %p2, %r7, %r6;
+; CHECK-NEXT:    setp.eq.b32 %p2, %r7, %r6;
 ; CHECK-NEXT:    bfe.u32 %r8, %r2, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r9, %r1, 16, 8;
-; CHECK-NEXT:    setp.eq.u32 %p3, %r9, %r8;
+; CHECK-NEXT:    setp.eq.b32 %p3, %r9, %r8;
 ; CHECK-NEXT:    bfe.u32 %r10, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r11, %r1, 24, 8;
-; CHECK-NEXT:    setp.eq.u32 %p4, %r11, %r10;
+; CHECK-NEXT:    setp.eq.b32 %p4, %r11, %r10;
 ; CHECK-NEXT:    bfe.u32 %r12, %r3, 24, 8;
 ; CHECK-NEXT:    selp.b32 %r13, %r11, %r12, %p4;
 ; CHECK-NEXT:    bfe.u32 %r14, %r3, 16, 8;
@@ -454,16 +454,16 @@ define <4 x i8> @test_ne(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 {
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_ne_param_0];
 ; CHECK-NEXT:    bfe.u32 %r4, %r2, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r5, %r1, 0, 8;
-; CHECK-NEXT:    setp.ne.u32 %p1, %r5, %r4;
+; CHECK-NEXT:    setp.ne.b32 %p1, %r5, %r4;
 ; CHECK-NEXT:    bfe.u32 %r6, %r2, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r7, %r1, 8, 8;
-; CHECK-NEXT:    setp.ne.u32 %p2, %r7, %r6;
+; CHECK-NEXT:    setp.ne.b32 %p2, %r7, %r6;
 ; CHECK-NEXT:    bfe.u32 %r8, %r2, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r9, %r1, 16, 8;
-; CHECK-NEXT:    setp.ne.u32 %p3, %r9, %r8;
+; CHECK-NEXT:    setp.ne.b32 %p3, %r9, %r8;
 ; CHECK-NEXT:    bfe.u32 %r10, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r11, %r1, 24, 8;
-; CHECK-NEXT:    setp.ne.u32 %p4, %r11, %r10;
+; CHECK-NEXT:    setp.ne.b32 %p4, %r11, %r10;
 ; CHECK-NEXT:    bfe.u32 %r12, %r3, 24, 8;
 ; CHECK-NEXT:    selp.b32 %r13, %r11, %r12, %p4;
 ; CHECK-NEXT:    bfe.u32 %r14, %r3, 16, 8;
@@ -920,16 +920,16 @@ define <4 x i8> @test_select_cc(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_select_cc_param_0];
 ; CHECK-NEXT:    bfe.u32 %r5, %r4, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r6, %r3, 0, 8;
-; CHECK-NEXT:    setp.ne.u32 %p1, %r6, %r5;
+; CHECK-NEXT:    setp.ne.b32 %p1, %r6, %r5;
 ; CHECK-NEXT:    bfe.u32 %r7, %r4, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r8, %r3, 8, 8;
-; CHECK-NEXT:    setp.ne.u32 %p2, %r8, %r7;
+; CHECK-NEXT:    setp.ne.b32 %p2, %r8, %r7;
 ; CHECK-NEXT:    bfe.u32 %r9, %r4, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r10, %r3, 16, 8;
-; CHECK-NEXT:    setp.ne.u32 %p3, %r10, %r9;
+; CHECK-NEXT:    setp.ne.b32 %p3, %r10, %r9;
 ; CHECK-NEXT:    bfe.u32 %r11, %r4, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r12, %r3, 24, 8;
-; CHECK-NEXT:    setp.ne.u32 %p4, %r12, %r11;
+; CHECK-NEXT:    setp.ne.b32 %p4, %r12, %r11;
 ; CHECK-NEXT:    bfe.u32 %r13, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r14, %r1, 24, 8;
 ; CHECK-NEXT:    selp.b32 %r15, %r14, %r13, %p4;
@@ -965,16 +965,16 @@ define <4 x i32> @test_select_cc_i32_i8(<4 x i32> %a, <4 x i32> %b,
 ; CHECK-NEXT:    ld.param.b32 %r9, [test_select_cc_i32_i8_param_2];
 ; CHECK-NEXT:    bfe.u32 %r11, %r10, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r12, %r9, 0, 8;
-; CHECK-NEXT:    setp.ne.u32 %p1, %r12, %r11;
+; CHECK-NEXT:    setp.ne.b32 %p1, %r12, %r11;
 ; CHECK-NEXT:    bfe.u32 %r13, %r10, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r14, %r9, 8, 8;
-; CHECK-NEXT:    setp.ne.u32 %p2, %r14, %r13;
+; CHECK-NEXT:    setp.ne.b32 %p2, %r14, %r13;
 ; CHECK-NEXT:    bfe.u32 %r15, %r10, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r16, %r9, 16, 8;
-; CHECK-NEXT:    setp.ne.u32 %p3, %r16, %r15;
+; CHECK-NEXT:    setp.ne.b32 %p3, %r16, %r15;
 ; CHECK-NEXT:    bfe.u32 %r17, %r10, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r18, %r9, 24, 8;
-; CHECK-NEXT:    setp.ne.u32 %p4, %r18, %r17;
+; CHECK-NEXT:    setp.ne.b32 %p4, %r18, %r17;
 ; CHECK-NEXT:    selp.b32 %r19, %r4, %r8, %p4;
 ; CHECK-NEXT:    selp.b32 %r20, %r3, %r7, %p3;
 ; CHECK-NEXT:    selp.b32 %r21, %r2, %r6, %p2;
@@ -998,10 +998,10 @@ define <4 x i8> @test_select_cc_i8_i32(<4 x i8> %a, <4 x i8> %b,
 ; CHECK-NEXT:    ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [test_select_cc_i8_i32_param_2];
 ; CHECK-NEXT:    ld.param.b32 %r2, [test_select_cc_i8_i32_param_1];
 ; CHECK-NEXT:    ld.param.b32 %r1, [test_select_cc_i8_i32_param_0];
-; CHECK-NEXT:    setp.ne.s32 %p1, %r3, %r7;
-; CHECK-NEXT:    setp.ne.s32 %p2, %r4, %r8;
-; CHECK-NEXT:    setp.ne.s32 %p3, %r5, %r9;
-; CHECK-NEXT:    setp.ne.s32 %p4, %r6, %r10;
+; CHECK-NEXT:    setp.ne.b32 %p1, %r3, %r7;
+; CHECK-NEXT:    setp.ne.b32 %p2, %r4, %r8;
+; CHECK-NEXT:    setp.ne.b32 %p3, %r5, %r9;
+; CHECK-NEXT:    setp.ne.b32 %p4, %r6, %r10;
 ; CHECK-NEXT:    bfe.u32 %r11, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r12, %r1, 24, 8;
 ; CHECK-NEXT:    selp.b32 %r13, %r12, %r11, %p4;
@@ -1421,16 +1421,16 @@ define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) {
 ; CHECK-NEXT:    ld.b32 %r2, [%rd2];
 ; CHECK-NEXT:    bfe.u32 %r3, %r2, 0, 8;
 ; CHECK-NEXT:    bfe.u32 %r4, %r1, 0, 8;
-; CHECK-NEXT:    setp.hi.u32 %p1, %r4, %r3;
+; CHECK-NEXT:    setp.gt.u32 %p1, %r4, %r3;
 ; CHECK-NEXT:    bfe.u32 %r5, %r2, 8, 8;
 ; CHECK-NEXT:    bfe.u32 %r6, %r1, 8, 8;
-; CHECK-NEXT:    setp.hi.u32 %p2, %r6, %r5;
+; CHECK-NEXT:    setp.gt.u32 %p2, %r6, %r5;
 ; CHECK-NEXT:    bfe.u32 %r7, %r2, 16, 8;
 ; CHECK-NEXT:    bfe.u32 %r8, %r1, 16, 8;
-; CHECK-NEXT:    setp.hi.u32 %p3, %r8, %r7;
+; CHECK-NEXT:    setp.gt.u32 %p3, %r8, %r7;
 ; CHECK-NEXT:    bfe.u32 %r9, %r2, 24, 8;
 ; CHECK-NEXT:    bfe.u32 %r10, %r1, 24, 8;
-; CHECK-NEXT:    setp.hi.u32 %p4, %r10, %r9;
+; CHECK-NEXT:    setp.gt.u32 %p4, %r10, %r9;
 ; CHECK-NEXT:    selp.b32 %r11, -1, 0, %p4;
 ; CHECK-NEXT:    selp.b32 %r12, -1, 0, %p3;
 ; CHECK-NEXT:    prmt.b32 %r13, %r12, %r11, 0x3340U;

diff  --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
index 5cfdbb7447ad8..307e2c8550914 100644
--- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
+++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
@@ -62,7 +62,7 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) {
 ; CHECK-NEXT:    ld.param.b64 %rd2, [test_b128_input_from_select_param_0];
 ; CHECK-NEXT:    cvta.to.global.u64 %rd3, %rd2;
 ; CHECK-NEXT:    ld.global.b8 %rs1, [%rd3];
-; CHECK-NEXT:    setp.eq.s16 %p1, %rs1, 0;
+; CHECK-NEXT:    setp.eq.b16 %p1, %rs1, 0;
 ; CHECK-NEXT:    selp.b64 %rd4, 24, 42, %p1;
 ; CHECK-NEXT:    mov.b64 %rd5, 0;
 ; CHECK-NEXT:    mov.b128 %rq1, {%rd4, %rd5};

diff  --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
index 6dbf44f38aa2f..037d7df1aee59 100644
--- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
+++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
@@ -16,7 +16,7 @@ define void @test_b128_in_loop() {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0:
 ; CHECK-NEXT:    ld.global.s32 %rd1, [size];
-; CHECK-NEXT:    setp.eq.s64 %p1, %rd1, 0;
+; CHECK-NEXT:    setp.eq.b64 %p1, %rd1, 0;
 ; CHECK-NEXT:    @%p1 bra $L__BB0_3;
 ; CHECK-NEXT:  // %bb.1: // %BB1
 ; CHECK-NEXT:    ld.global.v2.b64 {%rd12, %rd13}, [x];
@@ -36,7 +36,7 @@ define void @test_b128_in_loop() {
 ; CHECK-NEXT:    mov.b128 {%rd12, %rd13}, %rq1;
 ; CHECK-NEXT:    st.global.v2.b64 [x], {%rd12, %rd13};
 ; CHECK-NEXT:    add.s64 %rd14, %rd14, 1;
-; CHECK-NEXT:    setp.ne.s64 %p2, %rd1, %rd14;
+; CHECK-NEXT:    setp.ne.b64 %p2, %rd1, %rd14;
 ; CHECK-NEXT:    @%p2 bra $L__BB0_2;
 ; CHECK-NEXT:  $L__BB0_3: // %BB3
 ; CHECK-NEXT:    ret;

diff  --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll
index 955befc624c71..a6238352179ca 100644
--- a/llvm/test/CodeGen/NVPTX/jump-table.ll
+++ b/llvm/test/CodeGen/NVPTX/jump-table.ll
@@ -99,7 +99,7 @@ define i32 @test2(i32 %tmp158) {
 ; CHECK-NEXT:    st.param.b32 [func_retval0], 12;
 ; CHECK-NEXT:    ret;
 ; CHECK-NEXT:  $L__BB1_5: // %entry
-; CHECK-NEXT:    setp.eq.s32 %p3, %r1, 1024;
+; CHECK-NEXT:    setp.eq.b32 %p3, %r1, 1024;
 ; CHECK-NEXT:    @%p3 bra $L__BB1_3;
 ; CHECK-NEXT:    bra.uni $L__BB1_6;
 ; CHECK-NEXT:  $L__BB1_3: // %bb338

diff  --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
index d494ee30c2821..b6a00e03a80ab 100644
--- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
+++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
@@ -613,7 +613,7 @@ define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
 ; SM20-NEXT:    add.rn.f32 %r9, %r7, %r9;
 ; SM20-NEXT:    add.s64 %rd7, %rd7, 4;
 ; SM20-NEXT:    add.s32 %r8, %r8, -1;
-; SM20-NEXT:    setp.ne.s32 %p1, %r8, 0;
+; SM20-NEXT:    setp.ne.b32 %p1, %r8, 0;
 ; SM20-NEXT:    @%p1 bra $L__BB18_1;
 ; SM20-NEXT:  // %bb.2: // %exit
 ; SM20-NEXT:    st.global.b32 [%rd2], %r9;
@@ -638,7 +638,7 @@ define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
 ; SM35-NEXT:    add.rn.f32 %r9, %r7, %r9;
 ; SM35-NEXT:    add.s64 %rd7, %rd7, 4;
 ; SM35-NEXT:    add.s32 %r8, %r8, -1;
-; SM35-NEXT:    setp.ne.s32 %p1, %r8, 0;
+; SM35-NEXT:    setp.ne.b32 %p1, %r8, 0;
 ; SM35-NEXT:    @%p1 bra $L__BB18_1;
 ; SM35-NEXT:  // %bb.2: // %exit
 ; SM35-NEXT:    st.global.b32 [%rd2], %r9;

diff  --git a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
index 99212fc0dff79..297b2b984cdae 100644
--- a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -160,7 +160,7 @@ entry:
 
 ; PTX-LABEL:  .visible .func (.param .b64 func_retval0) memmove_caller(
 ; PTX:        ld.param.b64 %rd[[N:[0-9]+]]
-; PTX-DAG:    setp.eq.s64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0
+; PTX-DAG:    setp.eq.b64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0
 ; PTX-DAG:    setp.ge.u64 %p[[SRC_GT_THAN_DST:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
 ; PTX-NEXT:   @%p[[SRC_GT_THAN_DST]] bra $L__BB[[FORWARD_BB:[0-9_]+]]
 ; -- this is the backwards copying BB

diff  --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index dde71b009d564..e9635e9393984 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -621,9 +621,9 @@ define half @minimum_half(half %a, half %b) {
 ; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, -32768;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, -32768;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, -32768;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, -32768;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -657,9 +657,9 @@ define half @minimum_half(half %a, half %b) {
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, -32768;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, -32768;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, -32768;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, -32768;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
 ; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -682,9 +682,9 @@ define float @minimum_float(float %a, float %b) {
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
 ; CHECK-NOF16-NEXT:    min.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
 ; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, -2147483648;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, -2147483648;
 ; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r4, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
@@ -727,7 +727,7 @@ define float @minimum_imm1(float %a) {
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
 ; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
 ; CHECK-NOF16-NEXT:    selp.f32 %r4, %r1, %r3, %p2;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r3, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r5, %r4, %r3, %p3;
@@ -768,7 +768,7 @@ define float @minimum_imm2(float %a) {
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r1;
 ; CHECK-NOF16-NEXT:    min.f32 %r2, %r1, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
 ; CHECK-NOF16-NEXT:    selp.f32 %r4, %r1, %r3, %p2;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p3, %r3, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r5, %r4, %r3, %p3;
@@ -810,9 +810,9 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
 ; CHECK-NOF16-NEXT:    min.ftz.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, -2147483648;
 ; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, -2147483648;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, -2147483648;
 ; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
 ; CHECK-NOF16-NEXT:    setp.eq.ftz.f32 %p4, %r4, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
@@ -826,7 +826,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-F16-NEXT:  // %bb.0:
 ; CHECK-F16-NEXT:    ld.param.b32 %r1, [minimum_float_ftz_param_0];
 ; CHECK-F16-NEXT:    ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-F16-NEXT:    min.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-F16-NEXT:    min.ftz.NaN.f32 %r3, %r1, %r2;
 ; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-F16-NEXT:    ret;
 ;
@@ -837,7 +837,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r1, [minimum_float_ftz_param_0];
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-SM80-NOF16-NEXT:    min.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-SM80-NOF16-NEXT:    min.ftz.NaN.f32 %r3, %r1, %r2;
 ; CHECK-SM80-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call float @llvm.minimum.f32(float %a, float %b)
@@ -856,9 +856,9 @@ define double @minimum_double(double %a, double %b) {
 ; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
 ; CHECK-NEXT:    min.f64 %rd3, %rd1, %rd2;
 ; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT:    setp.eq.s64 %p2, %rd1, -9223372036854775808;
+; CHECK-NEXT:    setp.eq.b64 %p2, %rd1, -9223372036854775808;
 ; CHECK-NEXT:    selp.f64 %rd5, %rd1, %rd4, %p2;
-; CHECK-NEXT:    setp.eq.s64 %p3, %rd2, -9223372036854775808;
+; CHECK-NEXT:    setp.eq.b64 %p3, %rd2, -9223372036854775808;
 ; CHECK-NEXT:    selp.f64 %rd6, %rd2, %rd5, %p3;
 ; CHECK-NEXT:    setp.eq.f64 %p4, %rd4, 0d0000000000000000;
 ; CHECK-NEXT:    selp.f64 %rd7, %rd6, %rd4, %p4;
@@ -884,9 +884,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs2, -32768;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, -32768;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs4, -32768;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, -32768;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -897,9 +897,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p8, %rs1, -32768;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, -32768;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p9, %rs3, -32768;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, -32768;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
@@ -933,9 +933,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
 ; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs2, -32768;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, -32768;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs4, -32768;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, -32768;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
 ; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -946,9 +946,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
 ; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p8, %rs1, -32768;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, -32768;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p9, %rs3, -32768;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, -32768;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
 ; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
@@ -1152,9 +1152,9 @@ define half @maximum_half(half %a, half %b) {
 ; CHECK-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, 0;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, 0;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1188,9 +1188,9 @@ define half @maximum_half(half %a, half %b) {
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs3, %rs1, %rs2, %p1;
 ; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs1, 0;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs2, 0;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, %rs2, %rs5, %p4;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs4;
 ; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1291,9 +1291,9 @@ define float @maximum_float(float %a, float %b) {
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p1, %r1, %r2;
 ; CHECK-NOF16-NEXT:    max.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, 0;
 ; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, 0;
 ; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p4, %r4, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
@@ -1337,9 +1337,9 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
 ; CHECK-NOF16-NEXT:    setp.nan.ftz.f32 %p1, %r1, %r2;
 ; CHECK-NOF16-NEXT:    max.ftz.f32 %r3, %r1, %r2;
 ; CHECK-NOF16-NEXT:    selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p2, %r1, 0;
 ; CHECK-NOF16-NEXT:    selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b32 %p3, %r2, 0;
 ; CHECK-NOF16-NEXT:    selp.f32 %r6, %r2, %r5, %p3;
 ; CHECK-NOF16-NEXT:    setp.eq.ftz.f32 %p4, %r4, 0f00000000;
 ; CHECK-NOF16-NEXT:    selp.f32 %r7, %r6, %r4, %p4;
@@ -1353,7 +1353,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
 ; CHECK-F16-NEXT:  // %bb.0:
 ; CHECK-F16-NEXT:    ld.param.b32 %r1, [maximum_float_ftz_param_0];
 ; CHECK-F16-NEXT:    ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-F16-NEXT:    max.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-F16-NEXT:    max.ftz.NaN.f32 %r3, %r1, %r2;
 ; CHECK-F16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-F16-NEXT:    ret;
 ;
@@ -1364,7 +1364,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
 ; CHECK-SM80-NOF16-NEXT:  // %bb.0:
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r1, [maximum_float_ftz_param_0];
 ; CHECK-SM80-NOF16-NEXT:    ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-SM80-NOF16-NEXT:    max.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-SM80-NOF16-NEXT:    max.ftz.NaN.f32 %r3, %r1, %r2;
 ; CHECK-SM80-NOF16-NEXT:    st.param.b32 [func_retval0], %r3;
 ; CHECK-SM80-NOF16-NEXT:    ret;
   %x = call float @llvm.maximum.f32(float %a, float %b)
@@ -1383,9 +1383,9 @@ define double @maximum_double(double %a, double %b) {
 ; CHECK-NEXT:    setp.nan.f64 %p1, %rd1, %rd2;
 ; CHECK-NEXT:    max.f64 %rd3, %rd1, %rd2;
 ; CHECK-NEXT:    selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT:    setp.eq.s64 %p2, %rd1, 0;
+; CHECK-NEXT:    setp.eq.b64 %p2, %rd1, 0;
 ; CHECK-NEXT:    selp.f64 %rd5, %rd1, %rd4, %p2;
-; CHECK-NEXT:    setp.eq.s64 %p3, %rd2, 0;
+; CHECK-NEXT:    setp.eq.b64 %p3, %rd2, 0;
 ; CHECK-NEXT:    selp.f64 %rd6, %rd2, %rd5, %p3;
 ; CHECK-NEXT:    setp.eq.f64 %p4, %rd4, 0d0000000000000000;
 ; CHECK-NEXT:    selp.f64 %rd7, %rd6, %rd4, %p4;
@@ -1411,9 +1411,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p3, %rs2, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, 0;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p4, %rs4, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, 0;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1424,9 +1424,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
 ; CHECK-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p8, %rs1, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, 0;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-NOF16-NEXT:    setp.eq.s16 %p9, %rs3, 0;
+; CHECK-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, 0;
 ; CHECK-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
 ; CHECK-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
 ; CHECK-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;
@@ -1460,9 +1460,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs5, %rs2, %rs4, %p1;
 ; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p2, %r2, %r1;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p3, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p3, %rs2, 0;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p4, %rs4, 0;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p4, %rs4, 0;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs8, %rs4, %rs7, %p4;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r3, %rs6;
 ; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1473,9 +1473,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs10, %rs1, %rs3, %p6;
 ; CHECK-SM80-NOF16-NEXT:    setp.nan.f32 %p7, %r5, %r4;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p8, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p8, %rs1, 0;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-SM80-NOF16-NEXT:    setp.eq.s16 %p9, %rs3, 0;
+; CHECK-SM80-NOF16-NEXT:    setp.eq.b16 %p9, %rs3, 0;
 ; CHECK-SM80-NOF16-NEXT:    selp.b16 %rs13, %rs3, %rs12, %p9;
 ; CHECK-SM80-NOF16-NEXT:    cvt.f32.f16 %r6, %rs11;
 ; CHECK-SM80-NOF16-NEXT:    setp.eq.f32 %p10, %r6, 0f00000000;

diff  --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
index 0af8190f20d18..f6e6196345fcb 100644
--- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
@@ -12,8 +12,8 @@ define <2 x i16> @sext_setcc_v2i1_to_v2i16(ptr %p) {
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:    ld.param.b64 %rd1, [sext_setcc_v2i1_to_v2i16_param_0];
 ; CHECK-NEXT:    ld.v2.b16 {%rs1, %rs2}, [%rd1];
-; CHECK-NEXT:    setp.eq.s16 %p1, %rs1, 0;
-; CHECK-NEXT:    setp.eq.s16 %p2, %rs2, 0;
+; CHECK-NEXT:    setp.eq.b16 %p1, %rs1, 0;
+; CHECK-NEXT:    setp.eq.b16 %p2, %rs2, 0;
 ; CHECK-NEXT:    selp.b16 %rs3, -1, 0, %p2;
 ; CHECK-NEXT:    selp.b16 %rs4, -1, 0, %p1;
 ; CHECK-NEXT:    st.param.v2.b16 [func_retval0], {%rs4, %rs3};
@@ -39,19 +39,19 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
 ; CHECK-NEXT:    bfe.u32 %r2, %r1, 0, 8;
 ; CHECK-NEXT:    cvt.u16.u32 %rs1, %r2;
 ; CHECK-NEXT:    and.b16 %rs2, %rs1, 255;
-; CHECK-NEXT:    setp.eq.s16 %p1, %rs2, 0;
+; CHECK-NEXT:    setp.eq.b16 %p1, %rs2, 0;
 ; CHECK-NEXT:    bfe.u32 %r3, %r1, 8, 8;
 ; CHECK-NEXT:    cvt.u16.u32 %rs3, %r3;
 ; CHECK-NEXT:    and.b16 %rs4, %rs3, 255;
-; CHECK-NEXT:    setp.eq.s16 %p2, %rs4, 0;
+; CHECK-NEXT:    setp.eq.b16 %p2, %rs4, 0;
 ; CHECK-NEXT:    bfe.u32 %r4, %r1, 16, 8;
 ; CHECK-NEXT:    cvt.u16.u32 %rs5, %r4;
 ; CHECK-NEXT:    and.b16 %rs6, %rs5, 255;
-; CHECK-NEXT:    setp.eq.s16 %p3, %rs6, 0;
+; CHECK-NEXT:    setp.eq.b16 %p3, %rs6, 0;
 ; CHECK-NEXT:    bfe.u32 %r5, %r1, 24, 8;
 ; CHECK-NEXT:    cvt.u16.u32 %rs7, %r5;
 ; CHECK-NEXT:    and.b16 %rs8, %rs7, 255;
-; CHECK-NEXT:    setp.eq.s16 %p4, %rs8, 0;
+; CHECK-NEXT:    setp.eq.b16 %p4, %rs8, 0;
 ; CHECK-NEXT:    selp.b32 %r6, -1, 0, %p4;
 ; CHECK-NEXT:    selp.b32 %r7, -1, 0, %p3;
 ; CHECK-NEXT:    prmt.b32 %r8, %r7, %r6, 0x3340U;

diff  --git a/llvm/test/CodeGen/NVPTX/tid-range.ll b/llvm/test/CodeGen/NVPTX/tid-range.ll
index 019814e47c2b1..3ec33eace6441 100644
--- a/llvm/test/CodeGen/NVPTX/tid-range.ll
+++ b/llvm/test/CodeGen/NVPTX/tid-range.ll
@@ -12,7 +12,7 @@ define i1 @test1() {
 ; CHECK-EMPTY:
 ; CHECK-NEXT:  // %bb.0: // %entry
 ; CHECK-NEXT:    mov.u32 %r1, %tid.x;
-; CHECK-NEXT:    setp.eq.s32 %p1, %r1, 1;
+; CHECK-NEXT:    setp.eq.b32 %p1, %r1, 1;
 ; CHECK-NEXT:    selp.b32 %r2, -1, 0, %p1;
 ; CHECK-NEXT:    st.param.b32 [func_retval0], %r2;
 ; CHECK-NEXT:    ret;


        


More information about the llvm-commits mailing list