[llvm] b44c50d - [NVPTX] Rework and cleanup FTZ ISel (#146410)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 11:16:53 PDT 2025
Author: Alex MacLean
Date: 2025-07-09T11:16:48-07:00
New Revision: b44c50d41626b7b81da7cdfb2292a0b58fcc838f
URL: https://github.com/llvm/llvm-project/commit/b44c50d41626b7b81da7cdfb2292a0b58fcc838f
DIFF: https://github.com/llvm/llvm-project/commit/b44c50d41626b7b81da7cdfb2292a0b58fcc838f.diff
LOG: [NVPTX] Rework and cleanup FTZ ISel (#146410)
This change cleans up DAG-to-DAG instruction selection around FTZ and
SETP comparison mode. Largely these changes do not impact functionality
though support for `{sin.cos}.approx.ftz.f32` is added.
Added:
Modified:
llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
llvm/lib/Target/NVPTX/NVPTX.h
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
llvm/test/CodeGen/NVPTX/atomics-sm70.ll
llvm/test/CodeGen/NVPTX/atomics-sm90.ll
llvm/test/CodeGen/NVPTX/atomics.ll
llvm/test/CodeGen/NVPTX/bf16-instructions.ll
llvm/test/CodeGen/NVPTX/branch-fold.mir
llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
llvm/test/CodeGen/NVPTX/cmpxchg.ll
llvm/test/CodeGen/NVPTX/compare-int.ll
llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
llvm/test/CodeGen/NVPTX/extractelement.ll
llvm/test/CodeGen/NVPTX/f16-instructions.ll
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
llvm/test/CodeGen/NVPTX/fast-math.ll
llvm/test/CodeGen/NVPTX/i1-select.ll
llvm/test/CodeGen/NVPTX/i128.ll
llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
llvm/test/CodeGen/NVPTX/jump-table.ll
llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
llvm/test/CodeGen/NVPTX/math-intrins.ll
llvm/test/CodeGen/NVPTX/sext-setcc.ll
llvm/test/CodeGen/NVPTX/tid-range.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
index 28f6968ee6caf..443db4391a523 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.cpp
@@ -154,73 +154,114 @@ void NVPTXInstPrinter::printCvtMode(const MCInst *MI, int OpNum, raw_ostream &O,
llvm_unreachable("Invalid conversion modifier");
}
+void NVPTXInstPrinter::printFTZFlag(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ const MCOperand &MO = MI->getOperand(OpNum);
+ const int Imm = MO.getImm();
+ if (Imm)
+ O << ".ftz";
+}
+
void NVPTXInstPrinter::printCmpMode(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier) {
const MCOperand &MO = MI->getOperand(OpNum);
int64_t Imm = MO.getImm();
- if (Modifier == "ftz") {
- // FTZ flag
- if (Imm & NVPTX::PTXCmpMode::FTZ_FLAG)
- O << ".ftz";
- return;
- } else if (Modifier == "base") {
- switch (Imm & NVPTX::PTXCmpMode::BASE_MASK) {
+ if (Modifier == "FCmp") {
+ switch (Imm) {
default:
return;
case NVPTX::PTXCmpMode::EQ:
- O << ".eq";
+ O << "eq";
return;
case NVPTX::PTXCmpMode::NE:
- O << ".ne";
+ O << "ne";
return;
case NVPTX::PTXCmpMode::LT:
- O << ".lt";
+ O << "lt";
return;
case NVPTX::PTXCmpMode::LE:
- O << ".le";
+ O << "le";
return;
case NVPTX::PTXCmpMode::GT:
- O << ".gt";
+ O << "gt";
return;
case NVPTX::PTXCmpMode::GE:
- O << ".ge";
- return;
- case NVPTX::PTXCmpMode::LO:
- O << ".lo";
- return;
- case NVPTX::PTXCmpMode::LS:
- O << ".ls";
- return;
- case NVPTX::PTXCmpMode::HI:
- O << ".hi";
- return;
- case NVPTX::PTXCmpMode::HS:
- O << ".hs";
+ O << "ge";
return;
case NVPTX::PTXCmpMode::EQU:
- O << ".equ";
+ O << "equ";
return;
case NVPTX::PTXCmpMode::NEU:
- O << ".neu";
+ O << "neu";
return;
case NVPTX::PTXCmpMode::LTU:
- O << ".ltu";
+ O << "ltu";
return;
case NVPTX::PTXCmpMode::LEU:
- O << ".leu";
+ O << "leu";
return;
case NVPTX::PTXCmpMode::GTU:
- O << ".gtu";
+ O << "gtu";
return;
case NVPTX::PTXCmpMode::GEU:
- O << ".geu";
+ O << "geu";
return;
case NVPTX::PTXCmpMode::NUM:
- O << ".num";
+ O << "num";
return;
case NVPTX::PTXCmpMode::NotANumber:
- O << ".nan";
+ O << "nan";
+ return;
+ }
+ }
+ if (Modifier == "ICmp") {
+ switch (Imm) {
+ default:
+ llvm_unreachable("Invalid ICmp mode");
+ case NVPTX::PTXCmpMode::EQ:
+ O << "eq";
+ return;
+ case NVPTX::PTXCmpMode::NE:
+ O << "ne";
+ return;
+ case NVPTX::PTXCmpMode::LT:
+ case NVPTX::PTXCmpMode::LTU:
+ O << "lt";
+ return;
+ case NVPTX::PTXCmpMode::LE:
+ case NVPTX::PTXCmpMode::LEU:
+ O << "le";
+ return;
+ case NVPTX::PTXCmpMode::GT:
+ case NVPTX::PTXCmpMode::GTU:
+ O << "gt";
+ return;
+ case NVPTX::PTXCmpMode::GE:
+ case NVPTX::PTXCmpMode::GEU:
+ O << "ge";
+ return;
+ }
+ }
+ if (Modifier == "IType") {
+ switch (Imm) {
+ default:
+ llvm_unreachable("Invalid IType");
+ case NVPTX::PTXCmpMode::EQ:
+ case NVPTX::PTXCmpMode::NE:
+ O << "b";
+ return;
+ case NVPTX::PTXCmpMode::LT:
+ case NVPTX::PTXCmpMode::LE:
+ case NVPTX::PTXCmpMode::GT:
+ case NVPTX::PTXCmpMode::GE:
+ O << "s";
+ return;
+ case NVPTX::PTXCmpMode::LTU:
+ case NVPTX::PTXCmpMode::LEU:
+ case NVPTX::PTXCmpMode::GTU:
+ case NVPTX::PTXCmpMode::GEU:
+ O << "u";
return;
}
}
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
index 6189284e8a58c..193c436939f66 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXInstPrinter.h
@@ -54,6 +54,7 @@ class NVPTXInstPrinter : public MCInstPrinter {
void printCTAGroup(const MCInst *MI, int OpNum, raw_ostream &O);
void printCallOperand(const MCInst *MI, int OpNum, raw_ostream &O,
StringRef Modifier = {});
+ void printFTZFlag(const MCInst *MI, int OpNum, raw_ostream &O);
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h
index b7fd7090299a9..15997bc3878d8 100644
--- a/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/llvm/lib/Target/NVPTX/NVPTX.h
@@ -14,12 +14,12 @@
#ifndef LLVM_LIB_TARGET_NVPTX_NVPTX_H
#define LLVM_LIB_TARGET_NVPTX_NVPTX_H
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Target/TargetMachine.h"
-
namespace llvm {
class FunctionPass;
class MachineFunctionPass;
@@ -224,10 +224,6 @@ enum CmpMode {
LE,
GT,
GE,
- LO,
- LS,
- HI,
- HS,
EQU,
NEU,
LTU,
@@ -237,9 +233,6 @@ enum CmpMode {
NUM,
// NAN is a MACRO
NotANumber,
-
- BASE_MASK = 0xFF,
- FTZ_FLAG = 0x100
};
}
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 5631342ecc13e..429d52fb6f230 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -363,23 +363,29 @@ bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
// Map ISD:CONDCODE value to appropriate CmpMode expected by
// NVPTXInstPrinter::printCmpMode()
-static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
+SDValue NVPTXDAGToDAGISel::getPTXCmpMode(const CondCodeSDNode &CondCode) {
using NVPTX::PTXCmpMode::CmpMode;
- unsigned PTXCmpMode = [](ISD::CondCode CC) {
+ const unsigned PTXCmpMode = [](ISD::CondCode CC) {
switch (CC) {
default:
llvm_unreachable("Unexpected condition code.");
case ISD::SETOEQ:
+ case ISD::SETEQ:
return CmpMode::EQ;
case ISD::SETOGT:
+ case ISD::SETGT:
return CmpMode::GT;
case ISD::SETOGE:
+ case ISD::SETGE:
return CmpMode::GE;
case ISD::SETOLT:
+ case ISD::SETLT:
return CmpMode::LT;
case ISD::SETOLE:
+ case ISD::SETLE:
return CmpMode::LE;
case ISD::SETONE:
+ case ISD::SETNE:
return CmpMode::NE;
case ISD::SETO:
return CmpMode::NUM;
@@ -397,45 +403,29 @@ static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
return CmpMode::LEU;
case ISD::SETUNE:
return CmpMode::NEU;
- case ISD::SETEQ:
- return CmpMode::EQ;
- case ISD::SETGT:
- return CmpMode::GT;
- case ISD::SETGE:
- return CmpMode::GE;
- case ISD::SETLT:
- return CmpMode::LT;
- case ISD::SETLE:
- return CmpMode::LE;
- case ISD::SETNE:
- return CmpMode::NE;
}
}(CondCode.get());
-
- if (FTZ)
- PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
-
- return PTXCmpMode;
+ return CurDAG->getTargetConstant(PTXCmpMode, SDLoc(), MVT::i32);
}
bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
- unsigned PTXCmpMode =
- getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+ SDValue PTXCmpMode = getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)));
SDLoc DL(N);
SDNode *SetP = CurDAG->getMachineNode(
- NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
- N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+ NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1,
+ {N->getOperand(0), N->getOperand(1), PTXCmpMode,
+ CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});
ReplaceNode(N, SetP);
return true;
}
bool NVPTXDAGToDAGISel::SelectSETP_BF16X2(SDNode *N) {
- unsigned PTXCmpMode =
- getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
+ SDValue PTXCmpMode = getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)));
SDLoc DL(N);
SDNode *SetP = CurDAG->getMachineNode(
- NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
- N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
+ NVPTX::SETP_bf16x2rr, DL, MVT::i1, MVT::i1,
+ {N->getOperand(0), N->getOperand(1), PTXCmpMode,
+ CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, DL, MVT::i1)});
ReplaceNode(N, SetP);
return true;
}
@@ -1953,7 +1943,7 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
llvm_unreachable("Unexpected opcode");
};
- int Opcode = IsVec ? NVPTX::BFMA16x2rrr : NVPTX::BFMA16rrr;
+ int Opcode = IsVec ? NVPTX::FMA_BF16x2rrr : NVPTX::FMA_BF16rrr;
MachineSDNode *FMA = CurDAG->getMachineNode(Opcode, DL, VT, Operands);
ReplaceNode(N, FMA);
return true;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 0e4dec1adca67..b314c4ccefe8b 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -104,12 +104,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
}
bool SelectADDR(SDValue Addr, SDValue &Base, SDValue &Offset);
+ SDValue getPTXCmpMode(const CondCodeSDNode &CondCode);
SDValue selectPossiblyImm(SDValue V);
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
- static unsigned GetConvertOpcode(MVT DestTy, MVT SrcTy, LoadSDNode *N);
-
// Returns the Memory Order and Scope that the PTX memory instruction should
// use, and inserts appropriate fence instruction before the memory
// instruction, if needed to implement the instructions memory order. Required
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 441ddeeb7d667..dcdebb81e3c86 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -68,48 +68,28 @@ def CvtMode : Operand<i32> {
let PrintMethod = "printCvtMode";
}
+// FTZ flag
+
+def FTZ : PatLeaf<(i1 1)>;
+def NoFTZ : PatLeaf<(i1 0)>;
+
+def getFTZFlag : SDNodeXForm<imm, [{
+ (void)N;
+ return CurDAG->getTargetConstant(useF32FTZ() ? 1 : 0, SDLoc(), MVT::i1);
+}]>;
+
+def FTZFlag : OperandWithDefaultOps<i1, (ops (getFTZFlag (i1 0)))> {
+ let PrintMethod = "printFTZFlag";
+}
+
// Compare modes
// These must match the enum in NVPTX.h
-def CmpEQ : PatLeaf<(i32 0)>;
-def CmpNE : PatLeaf<(i32 1)>;
-def CmpLT : PatLeaf<(i32 2)>;
-def CmpLE : PatLeaf<(i32 3)>;
-def CmpGT : PatLeaf<(i32 4)>;
-def CmpGE : PatLeaf<(i32 5)>;
-def CmpLO : PatLeaf<(i32 6)>;
-def CmpLS : PatLeaf<(i32 7)>;
-def CmpHI : PatLeaf<(i32 8)>;
-def CmpHS : PatLeaf<(i32 9)>;
-def CmpEQU : PatLeaf<(i32 10)>;
-def CmpNEU : PatLeaf<(i32 11)>;
-def CmpLTU : PatLeaf<(i32 12)>;
-def CmpLEU : PatLeaf<(i32 13)>;
-def CmpGTU : PatLeaf<(i32 14)>;
-def CmpGEU : PatLeaf<(i32 15)>;
-def CmpNUM : PatLeaf<(i32 16)>;
-def CmpNAN : PatLeaf<(i32 17)>;
-
-def CmpEQ_FTZ : PatLeaf<(i32 0x100)>;
-def CmpNE_FTZ : PatLeaf<(i32 0x101)>;
-def CmpLT_FTZ : PatLeaf<(i32 0x102)>;
-def CmpLE_FTZ : PatLeaf<(i32 0x103)>;
-def CmpGT_FTZ : PatLeaf<(i32 0x104)>;
-def CmpGE_FTZ : PatLeaf<(i32 0x105)>;
-def CmpEQU_FTZ : PatLeaf<(i32 0x10A)>;
-def CmpNEU_FTZ : PatLeaf<(i32 0x10B)>;
-def CmpLTU_FTZ : PatLeaf<(i32 0x10C)>;
-def CmpLEU_FTZ : PatLeaf<(i32 0x10D)>;
-def CmpGTU_FTZ : PatLeaf<(i32 0x10E)>;
-def CmpGEU_FTZ : PatLeaf<(i32 0x10F)>;
-def CmpNUM_FTZ : PatLeaf<(i32 0x110)>;
-def CmpNAN_FTZ : PatLeaf<(i32 0x111)>;
+def CmpEQ : PatLeaf<(i32 0)>;
+def CmpNE : PatLeaf<(i32 1)>;
def CmpMode : Operand<i32> {
let PrintMethod = "printCmpMode";
}
-def VecElement : Operand<i32> {
- let PrintMethod = "printVecElement";
-}
// PRMT modes
// These must match the enum in NVPTX.h
@@ -152,8 +132,6 @@ def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
def hasDotInstructions : Predicate<"Subtarget->hasDotInstructions()">;
def hasTcgen05Instructions : Predicate<"Subtarget->hasTcgen05Instructions()">;
-def True : Predicate<"true">;
-
class hasPTX<int version>: Predicate<"Subtarget->getPTXVersion() >= " # version>;
class hasSM<int version>: Predicate<"Subtarget->getSmVersion() >= " # version>;
@@ -198,7 +176,7 @@ def RI64 : Operand<Any>;
// Utility class to wrap up information about a register and DAG type for more
// convenient iteration and parameterization
-class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
+class RegTyInfo<ValueType ty, NVPTXRegClass rc, string ptx_type, Operand imm, SDNode imm_node,
bit supports_imm = 1> {
ValueType Ty = ty;
NVPTXRegClass RC = rc;
@@ -206,20 +184,21 @@ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
SDNode ImmNode = imm_node;
bit SupportsImm = supports_imm;
int Size = ty.Size;
+ string PtxType = ptx_type;
}
-def I1RT : RegTyInfo<i1, B1, i1imm, imm>;
-def I16RT : RegTyInfo<i16, B16, i16imm, imm>;
-def I32RT : RegTyInfo<i32, B32, i32imm, imm>;
-def I64RT : RegTyInfo<i64, B64, i64imm, imm>;
+def I1RT : RegTyInfo<i1, B1, "pred", i1imm, imm>;
+def I16RT : RegTyInfo<i16, B16, "b16", i16imm, imm>;
+def I32RT : RegTyInfo<i32, B32, "b32", i32imm, imm>;
+def I64RT : RegTyInfo<i64, B64, "b64", i64imm, imm>;
-def F32RT : RegTyInfo<f32, B32, f32imm, fpimm>;
-def F64RT : RegTyInfo<f64, B64, f64imm, fpimm>;
-def F16RT : RegTyInfo<f16, B16, f16imm, fpimm, supports_imm = 0>;
-def BF16RT : RegTyInfo<bf16, B16, bf16imm, fpimm, supports_imm = 0>;
+def F32RT : RegTyInfo<f32, B32, "f32", f32imm, fpimm>;
+def F64RT : RegTyInfo<f64, B64, "f64", f64imm, fpimm>;
+def F16RT : RegTyInfo<f16, B16, "f16", f16imm, fpimm, supports_imm = 0>;
+def BF16RT : RegTyInfo<bf16, B16, "bf16", bf16imm, fpimm, supports_imm = 0>;
-def F16X2RT : RegTyInfo<v2f16, B32, ?, ?, supports_imm = 0>;
-def BF16X2RT : RegTyInfo<v2bf16, B32, ?, ?, supports_imm = 0>;
+def F16X2RT : RegTyInfo<v2f16, B32, "f16x2", ?, ?, supports_imm = 0>;
+def BF16X2RT : RegTyInfo<v2bf16, B32, "bf16x2", ?, ?, supports_imm = 0>;
// This class provides a basic wrapper around an NVPTXInst that abstracts the
@@ -321,76 +300,57 @@ multiclass ADD_SUB_INT_CARRY<string op_str, SDNode op_node, bit commutative> {
// Also defines ftz (flush subnormal inputs and results to sign-preserving
// zero) variants for fp32 functions.
multiclass FMINIMUMMAXIMUM<string OpcStr, bit NaN, SDNode OpNode> {
+ defvar nan_str = !if(NaN, ".NaN", "");
if !not(NaN) then {
- def f64rr :
+ def _f64_rr :
BasicNVPTXInst<(outs B64:$dst),
(ins B64:$a, B64:$b),
OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a, f64:$b))]>;
- def f64ri :
+ def _f64_ri :
BasicNVPTXInst<(outs B64:$dst),
(ins B64:$a, f64imm:$b),
OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a, fpimm:$b))]>;
}
- def f32rr_ftz :
- BasicNVPTXInst<(outs B32:$dst),
+ def _f32_rr :
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- OpcStr # ".ftz.f32",
- [(set f32:$dst, (OpNode f32:$a, f32:$b))]>,
- Requires<[doF32FTZ]>;
- def f32ri_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, f32imm:$b),
- OpcStr # ".ftz.f32",
- [(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>,
- Requires<[doF32FTZ]>;
- def f32rr :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- OpcStr # ".f32",
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz" # nan_str # ".f32",
[(set f32:$dst, (OpNode f32:$a, f32:$b))]>;
- def f32ri :
- BasicNVPTXInst<(outs B32:$dst),
+ def _f32_ri :
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b),
- OpcStr # ".f32",
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz" # nan_str # ".f32",
[(set f32:$dst, (OpNode f32:$a, fpimm:$b))]>;
- def f16rr_ftz :
- BasicNVPTXInst<(outs B16:$dst),
- (ins B16:$a, B16:$b),
- OpcStr # ".ftz.f16",
- [(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
- Requires<[useFP16Math, doF32FTZ]>;
- def f16rr :
- BasicNVPTXInst<(outs B16:$dst),
+ def _f16_rr :
+ BasicFlagsNVPTXInst<(outs B16:$dst),
(ins B16:$a, B16:$b),
- OpcStr # ".f16",
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz" # nan_str # ".f16",
[(set f16:$dst, (OpNode f16:$a, f16:$b))]>,
- Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
+ Requires<[useFP16Math]>;
- def f16x2rr_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- OpcStr # ".ftz.f16x2",
- [(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
- Requires<[useFP16Math, hasSM<80>, hasPTX<70>, doF32FTZ]>;
- def f16x2rr :
- BasicNVPTXInst<(outs B32:$dst),
+ def _f16x2_rr :
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- OpcStr # ".f16x2",
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz" # nan_str # ".f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math, hasSM<80>, hasPTX<70>]>;
- def bf16rr :
+ def _bf16_rr :
BasicNVPTXInst<(outs B16:$dst),
(ins B16:$a, B16:$b),
- OpcStr # ".bf16",
+ OpcStr # nan_str # ".bf16",
[(set bf16:$dst, (OpNode bf16:$a, bf16:$b))]>,
Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
- def bf16x2rr :
+ def _bf16x2_rr :
BasicNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- OpcStr # ".bf16x2",
+ OpcStr # nan_str # ".bf16x2",
[(set v2bf16:$dst, (OpNode v2bf16:$a, v2bf16:$b))]>,
Requires<[hasBF16Math, hasSM<80>, hasPTX<70>]>;
}
@@ -415,52 +375,31 @@ multiclass F3<string op_str, SDPatternOperator op_pat> {
(ins B64:$a, f64imm:$b),
op_str # ".f64",
[(set f64:$dst, (op_pat f64:$a, fpimm:$b))]>;
- def f32rr_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- op_str # ".ftz.f32",
- [(set f32:$dst, (op_pat f32:$a, f32:$b))]>,
- Requires<[doF32FTZ]>;
- def f32ri_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, f32imm:$b),
- op_str # ".ftz.f32",
- [(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>,
- Requires<[doF32FTZ]>;
def f32rr :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- op_str # ".f32",
+ (ins FTZFlag:$ftz),
+ op_str # "$ftz.f32",
[(set f32:$dst, (op_pat f32:$a, f32:$b))]>;
def f32ri :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b),
- op_str # ".f32",
+ (ins FTZFlag:$ftz),
+ op_str # "$ftz.f32",
[(set f32:$dst, (op_pat f32:$a, fpimm:$b))]>;
- def f16rr_ftz :
- BasicNVPTXInst<(outs B16:$dst),
- (ins B16:$a, B16:$b),
- op_str # ".ftz.f16",
- [(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
- Requires<[useFP16Math, doF32FTZ]>;
def f16rr :
- BasicNVPTXInst<(outs B16:$dst),
+ BasicFlagsNVPTXInst<(outs B16:$dst),
(ins B16:$a, B16:$b),
- op_str # ".f16",
+ (ins FTZFlag:$ftz),
+ op_str # "$ftz.f16",
[(set f16:$dst, (op_pat f16:$a, f16:$b))]>,
Requires<[useFP16Math]>;
-
- def f16x2rr_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- op_str # ".ftz.f16x2",
- [(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
- Requires<[useFP16Math, doF32FTZ]>;
def f16x2rr :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- op_str # ".f16x2",
+ (ins FTZFlag:$ftz),
+ op_str # "$ftz.f16x2",
[(set v2f16:$dst, (op_pat v2f16:$a, v2f16:$b))]>,
Requires<[useFP16Math]>;
def bf16rr :
@@ -493,15 +432,12 @@ multiclass F3_fma_component<string op_str, SDNode op_node> {
// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush
// subnormal inputs and results to zero).
multiclass F2<string OpcStr, SDNode OpNode> {
- def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a),
+ def f64 : BasicNVPTXInst<(outs B64:$dst), (ins B64:$a),
OpcStr # ".f64",
[(set f64:$dst, (OpNode f64:$a))]>;
- def f32_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
- OpcStr # ".ftz.f32",
- [(set f32:$dst, (OpNode f32:$a))]>,
- Requires<[doF32FTZ]>;
- def f32 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
- OpcStr # ".f32",
+ def f32 : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a),
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz.f32",
[(set f32:$dst, (OpNode f32:$a))]>;
}
@@ -514,37 +450,19 @@ multiclass F2_Support_Half<string OpcStr, SDNode OpNode> {
OpcStr # ".bf16x2",
[(set v2bf16:$dst, (OpNode v2bf16:$a))]>,
Requires<[hasSM<80>, hasPTX<70>]>;
- def f16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
- OpcStr # ".ftz.f16",
- [(set f16:$dst, (OpNode f16:$a))]>,
- Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
- def f16x2_ftz : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
- OpcStr # ".ftz.f16x2",
- [(set v2f16:$dst, (OpNode v2f16:$a))]>,
- Requires<[hasSM<53>, hasPTX<65>, doF32FTZ]>;
- def f16 : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
- OpcStr # ".f16",
+ def f16 : BasicFlagsNVPTXInst<(outs B16:$dst), (ins B16:$a),
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz.f16",
[(set f16:$dst, (OpNode f16:$a))]>,
Requires<[hasSM<53>, hasPTX<65>]>;
- def f16x2 : BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
- OpcStr # ".f16x2",
+ def f16x2 : BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$a),
+ (ins FTZFlag:$ftz),
+ OpcStr # "$ftz.f16x2",
[(set v2f16:$dst, (OpNode v2f16:$a))]>,
Requires<[hasSM<53>, hasPTX<65>]>;
}
-// Variant where only .ftz.bf16 is supported.
-multiclass F2_Support_Half_BF<string OpcStr, SDNode OpNode> {
- def bf16_ftz : BasicNVPTXInst<(outs B16:$dst), (ins B16:$a),
- OpcStr # ".ftz.bf16",
- [(set bf16:$dst, (OpNode bf16:$a))]>,
- Requires<[hasSM<90>, hasPTX<78>]>;
- def bf16x2_ftz: BasicNVPTXInst<(outs B32:$dst), (ins B32:$a),
- OpcStr # ".ftz.bf16x2",
- [(set v2bf16:$dst, (OpNode v2bf16:$a))]>,
- Requires<[hasSM<90>, hasPTX<78>]>;
-}
-
//===----------------------------------------------------------------------===//
// NVPTX Instructions.
//===----------------------------------------------------------------------===//
@@ -992,48 +910,38 @@ def SHL2MUL16 : SDNodeXForm<imm, [{
}]>;
// Convert "sign/zero-extend, then shift left by an immediate" to mul.wide.
-def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)),
- (MULWIDES64Imm $a, (SHL2MUL32 $b))>,
- Requires<[doMulWide]>;
-def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)),
- (MULWIDEU64Imm $a, (SHL2MUL32 $b))>,
- Requires<[doMulWide]>;
-
-def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)),
- (MULWIDES32Imm $a, (SHL2MUL16 $b))>,
- Requires<[doMulWide]>;
-def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)),
- (MULWIDEU32Imm $a, (SHL2MUL16 $b))>,
- Requires<[doMulWide]>;
-
-// Convert "sign/zero-extend then multiply" to mul.wide.
-def : Pat<(mul (sext i32:$a), (sext i32:$b)),
- (MULWIDES64 $a, $b)>,
- Requires<[doMulWide]>;
-def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)),
- (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>,
- Requires<[doMulWide]>;
-
-def : Pat<(mul (zext i32:$a), (zext i32:$b)),
- (MULWIDEU64 $a, $b)>,
- Requires<[doMulWide]>;
-def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)),
- (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>,
- Requires<[doMulWide]>;
-
-def : Pat<(mul (sext i16:$a), (sext i16:$b)),
- (MULWIDES32 $a, $b)>,
- Requires<[doMulWide]>;
-def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)),
- (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>,
- Requires<[doMulWide]>;
-
-def : Pat<(mul (zext i16:$a), (zext i16:$b)),
- (MULWIDEU32 $a, $b)>,
- Requires<[doMulWide]>;
-def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)),
- (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>,
- Requires<[doMulWide]>;
+let Predicates = [doMulWide] in {
+ def : Pat<(shl (sext i32:$a), (i32 IntConst_0_30:$b)),
+ (MULWIDES64Imm $a, (SHL2MUL32 $b))>;
+ def : Pat<(shl (zext i32:$a), (i32 IntConst_0_30:$b)),
+ (MULWIDEU64Imm $a, (SHL2MUL32 $b))>;
+
+ def : Pat<(shl (sext i16:$a), (i16 IntConst_0_14:$b)),
+ (MULWIDES32Imm $a, (SHL2MUL16 $b))>;
+ def : Pat<(shl (zext i16:$a), (i16 IntConst_0_14:$b)),
+ (MULWIDEU32Imm $a, (SHL2MUL16 $b))>;
+
+ // Convert "sign/zero-extend then multiply" to mul.wide.
+ def : Pat<(mul (sext i32:$a), (sext i32:$b)),
+ (MULWIDES64 $a, $b)>;
+ def : Pat<(mul (sext i32:$a), (i64 SInt32Const:$b)),
+ (MULWIDES64Imm64 $a, (i64 SInt32Const:$b))>;
+
+ def : Pat<(mul (zext i32:$a), (zext i32:$b)),
+ (MULWIDEU64 $a, $b)>;
+ def : Pat<(mul (zext i32:$a), (i64 UInt32Const:$b)),
+ (MULWIDEU64Imm64 $a, (i64 UInt32Const:$b))>;
+
+ def : Pat<(mul (sext i16:$a), (sext i16:$b)),
+ (MULWIDES32 $a, $b)>;
+ def : Pat<(mul (sext i16:$a), (i32 SInt16Const:$b)),
+ (MULWIDES32Imm32 $a, (i32 SInt16Const:$b))>;
+
+ def : Pat<(mul (zext i16:$a), (zext i16:$b)),
+ (MULWIDEU32 $a, $b)>;
+ def : Pat<(mul (zext i16:$a), (i32 UInt16Const:$b)),
+ (MULWIDEU32Imm32 $a, (i32 UInt16Const:$b))>;
+}
//
// Integer multiply-add
@@ -1101,10 +1009,10 @@ defm FADD : F3_fma_component<"add", fadd>;
defm FSUB : F3_fma_component<"sub", fsub>;
defm FMUL : F3_fma_component<"mul", fmul>;
-defm FMIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>;
-defm FMAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>;
-defm FMINNAN : FMINIMUMMAXIMUM<"min.NaN", /* NaN */ true, fminimum>;
-defm FMAXNAN : FMINIMUMMAXIMUM<"max.NaN", /* NaN */ true, fmaximum>;
+defm MIN : FMINIMUMMAXIMUM<"min", /* NaN */ false, fminnum>;
+defm MAX : FMINIMUMMAXIMUM<"max", /* NaN */ false, fmaxnum>;
+defm MIN_NAN : FMINIMUMMAXIMUM<"min", /* NaN */ true, fminimum>;
+defm MAX_NAN : FMINIMUMMAXIMUM<"max", /* NaN */ true, fmaximum>;
defm FABS : F2<"abs", fabs>;
defm FNEG : F2<"neg", fneg>;
@@ -1113,36 +1021,43 @@ defm FNEG_H: F2_Support_Half<"neg", fneg>;
defm FSQRT : F2<"sqrt.rn", fsqrt>;
-defm FEXP2_H: F2_Support_Half_BF<"ex2.approx", fexp2>;
-
//
// F16 NEG
//
-class FNEG_F16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
- BasicNVPTXInst<(outs RC:$dst), (ins RC:$src),
- OpcStr,
- [(set T:$dst, (fneg T:$src))]>,
- Requires<[useFP16Math, hasPTX<60>, hasSM<53>, Pred]>;
-def FNEG16_ftz : FNEG_F16_F16X2<"neg.ftz.f16", f16, B16, doF32FTZ>;
-def FNEG16 : FNEG_F16_F16X2<"neg.f16", f16, B16, True>;
-def FNEG16x2_ftz : FNEG_F16_F16X2<"neg.ftz.f16x2", v2f16, B32, doF32FTZ>;
-def FNEG16x2 : FNEG_F16_F16X2<"neg.f16x2", v2f16, B32, True>;
+class FNEG16<RegTyInfo t> :
+ BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src), (ins FTZFlag:$ftz),
+ "neg$ftz." # t.PtxType,
+ [(set t.Ty:$dst, (fneg t.Ty:$src))]>;
+
+let Predicates = [useFP16Math, hasPTX<60>, hasSM<53>] in {
+ def NEG_F16 : FNEG16<F16RT>;
+ def NEG_F16x2 : FNEG16<F16X2RT>;
+}
+let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
+ def NEG_BF16 : FNEG16<BF16RT>;
+ def NEG_BF16x2 : FNEG16<BF16X2RT>;
+}
//
-// BF16 NEG
+// EX2
//
-class FNEG_BF16_F16X2<string OpcStr, ValueType T, RegisterClass RC, Predicate Pred> :
- BasicNVPTXInst<(outs RC:$dst), (ins RC:$src),
- OpcStr,
- [(set T:$dst, (fneg T:$src))]>,
- Requires<[hasBF16Math, hasPTX<70>, hasSM<80>, Pred]>;
-def BFNEG16_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16", bf16, B16, doF32FTZ>;
-def BFNEG16 : FNEG_BF16_F16X2<"neg.bf16", bf16, B16, True>;
-def BFNEG16x2_ftz : FNEG_BF16_F16X2<"neg.ftz.bf16x2", v2bf16, B32, doF32FTZ>;
-def BFNEG16x2 : FNEG_BF16_F16X2<"neg.bf16x2", v2bf16, B32, True>;
+class FEXP2Inst<RegTyInfo t, dag flags, string flag_str> :
+ BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$src),
+ flags, "ex2.approx" # flag_str # "." # t.PtxType,
+ [(set t.Ty:$dst, (fexp2 t.Ty:$src))]>;
+
+def EX2_APPROX_f32 : FEXP2Inst<F32RT, (ins FTZFlag:$ftz), "$ftz">;
+
+let Predicates = [useFP16Math, hasPTX<70>, hasSM<75>] in {
+ def EX2_APPROX_f16 : FEXP2Inst<F16RT, (ins), "">;
+ def EX2_APPROX_f16x2 : FEXP2Inst<F16X2RT, (ins), "">;
+}
+let Predicates = [hasPTX<78>, hasSM<90>] in {
+ def EX2_APPROX_bf16 : FEXP2Inst<BF16RT, (ins), ".ftz">;
+ def EX2_APPROX_bf16x2 : FEXP2Inst<BF16X2RT, (ins), ".ftz">;
+}
-//
// F64 division
//
def FRCP64r :
@@ -1176,42 +1091,27 @@ def fdiv_approx : PatFrag<(ops node:$a, node:$b),
}]>;
-def FRCP32_approx_r_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$b),
- "rcp.approx.ftz.f32",
- [(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>,
- Requires<[doF32FTZ]>;
def FRCP32_approx_r :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$b),
- "rcp.approx.f32",
+ (ins FTZFlag:$ftz),
+ "rcp.approx$ftz.f32",
[(set f32:$dst, (fdiv_approx f32imm_1, f32:$b))]>;
//
// F32 Approximate division
//
-def FDIV32approxrr_ftz :
- BasicNVPTXInst<(outs B32:$dst),
+def FDIV32_approx_rr :
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- "div.approx.ftz.f32",
- [(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>,
- Requires<[doF32FTZ]>;
-def FDIV32approxri_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, f32imm:$b),
- "div.approx.ftz.f32",
- [(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>,
- Requires<[doF32FTZ]>;
-def FDIV32approxrr :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- "div.approx.f32",
+ (ins FTZFlag:$ftz),
+ "div.approx$ftz.f32",
[(set f32:$dst, (fdiv_approx f32:$a, f32:$b))]>;
-def FDIV32approxri :
- BasicNVPTXInst<(outs B32:$dst),
+def FDIV32_approx_ri :
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b),
- "div.approx.f32",
+ (ins FTZFlag:$ftz),
+ "div.approx$ftz.f32",
[(set f32:$dst, (fdiv_approx f32:$a, fpimm:$b))]>;
//
// F32 Semi-accurate reciprocal
@@ -1225,37 +1125,23 @@ def fdiv_full : PatFrag<(ops node:$a, node:$b),
}]>;
-def : Pat<(fdiv_full f32imm_1, f32:$b),
- (FRCP32_approx_r_ftz $b)>,
- Requires<[doF32FTZ]>;
-
def : Pat<(fdiv_full f32imm_1, f32:$b),
(FRCP32_approx_r $b)>;
//
// F32 Semi-accurate division
//
-def FDIV32rr_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- "div.full.ftz.f32",
- [(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>,
- Requires<[doF32FTZ]>;
-def FDIV32ri_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, f32imm:$b),
- "div.full.ftz.f32",
- [(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>,
- Requires<[doF32FTZ]>;
def FDIV32rr :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- "div.full.f32",
+ (ins FTZFlag:$ftz),
+ "div.full$ftz.f32",
[(set f32:$dst, (fdiv_full f32:$a, f32:$b))]>;
def FDIV32ri :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b),
- "div.full.f32",
+ (ins FTZFlag:$ftz),
+ "div.full$ftz.f32",
[(set f32:$dst, (fdiv_full f32:$a, fpimm:$b))]>;
//
// F32 Accurate reciprocal
@@ -1266,86 +1152,73 @@ def fdiv_ftz : PatFrag<(ops node:$a, node:$b),
return getDivF32Level(N) == NVPTX::DivPrecisionLevel::IEEE754;
}]>;
-def FRCP32r_prec_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$b),
- "rcp.rn.ftz.f32",
- [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>,
- Requires<[doF32FTZ]>;
def FRCP32r_prec :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$b),
- "rcp.rn.f32",
- [(set f32:$dst, (fdiv f32imm_1, f32:$b))]>;
+ (ins FTZFlag:$ftz),
+ "rcp.rn$ftz.f32",
+ [(set f32:$dst, (fdiv_ftz f32imm_1, f32:$b))]>;
//
// F32 Accurate division
//
-def FDIV32rr_prec_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, B32:$b),
- "div.rn.ftz.f32",
- [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>,
- Requires<[doF32FTZ]>;
-def FDIV32ri_prec_ftz :
- BasicNVPTXInst<(outs B32:$dst),
- (ins B32:$a, f32imm:$b),
- "div.rn.ftz.f32",
- [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>,
- Requires<[doF32FTZ]>;
def FDIV32rr_prec :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, B32:$b),
- "div.rn.f32",
- [(set f32:$dst, (fdiv f32:$a, f32:$b))]>;
+ (ins FTZFlag:$ftz),
+ "div.rn$ftz.f32",
+ [(set f32:$dst, (fdiv_ftz f32:$a, f32:$b))]>;
def FDIV32ri_prec :
- BasicNVPTXInst<(outs B32:$dst),
+ BasicFlagsNVPTXInst<(outs B32:$dst),
(ins B32:$a, f32imm:$b),
- "div.rn.f32",
- [(set f32:$dst, (fdiv f32:$a, fpimm:$b))]>;
+ (ins FTZFlag:$ftz),
+ "div.rn$ftz.f32",
+ [(set f32:$dst, (fdiv_ftz f32:$a, fpimm:$b))]>;
+
+def : Pat<(fdiv f32imm_1, f32:$b), (FRCP32r_prec $b, NoFTZ)>;
+def : Pat<(fdiv f32:$a, f32:$b), (FDIV32rr_prec $a, $b, NoFTZ)>;
+def : Pat<(fdiv f32:$a, fpimm:$b), (FDIV32ri_prec $a, fpimm:$b, NoFTZ)>;
//
// FMA
//
-multiclass FMA<string asmstr, RegTyInfo t, list<Predicate> Preds = []> {
- def rrr : BasicNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
- asmstr,
- [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, t.Ty:$c))]>,
- Requires<Preds>;
-
- if t.SupportsImm then {
- def rri : BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.RC:$a, t.RC:$b, t.Imm:$c),
- asmstr,
- [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, fpimm:$c))]>,
- Requires<Preds>;
- def rir : BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.RC:$a, t.Imm:$b, t.RC:$c),
- asmstr,
- [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, t.Ty:$c))]>,
- Requires<Preds>;
- def rii : BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.RC:$a, t.Imm:$b, t.Imm:$c),
- asmstr,
- [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, fpimm:$c))]>,
- Requires<Preds>;
- def iir : BasicNVPTXInst<(outs t.RC:$dst),
- (ins t.Imm:$a, t.Imm:$b, t.RC:$c),
- asmstr,
- [(set t.Ty:$dst, (fma fpimm:$a, fpimm:$b, t.Ty:$c))]>,
- Requires<Preds>;
+multiclass FMA<RegTyInfo t, bit allow_ftz = true, list<Predicate> preds = []> {
+ defvar flag_str = !if(allow_ftz, "$ftz", "");
+ defvar flag_ops = !if(allow_ftz, (ins FTZFlag:$ftz), (ins));
+ defvar op_str = "fma.rn" # flag_str # "." # t.PtxType;
+
+ let Predicates = preds in {
+ def rrr : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
+ flag_ops, op_str,
+ [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, t.Ty:$c))]>;
+
+ if t.SupportsImm then {
+ def rri : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+ (ins t.RC:$a, t.RC:$b, t.Imm:$c),
+ flag_ops, op_str,
+ [(set t.Ty:$dst, (fma t.Ty:$a, t.Ty:$b, fpimm:$c))]>;
+ def rir : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+ (ins t.RC:$a, t.Imm:$b, t.RC:$c),
+ flag_ops, op_str,
+ [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, t.Ty:$c))]>;
+ def rii : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+ (ins t.RC:$a, t.Imm:$b, t.Imm:$c),
+ flag_ops, op_str,
+ [(set t.Ty:$dst, (fma t.Ty:$a, fpimm:$b, fpimm:$c))]>;
+ def iir : BasicFlagsNVPTXInst<(outs t.RC:$dst),
+ (ins t.Imm:$a, t.Imm:$b, t.RC:$c),
+ flag_ops, op_str,
+ [(set t.Ty:$dst, (fma fpimm:$a, fpimm:$b, t.Ty:$c))]>;
+ }
}
}
-defm FMA16_ftz : FMA<"fma.rn.ftz.f16", F16RT, [useFP16Math, doF32FTZ]>;
-defm FMA16 : FMA<"fma.rn.f16", F16RT, [useFP16Math]>;
-defm FMA16x2_ftz : FMA<"fma.rn.ftz.f16x2", F16X2RT, [useFP16Math, doF32FTZ]>;
-defm FMA16x2 : FMA<"fma.rn.f16x2", F16X2RT, [useFP16Math]>;
-defm BFMA16 : FMA<"fma.rn.bf16", BF16RT, [hasBF16Math]>;
-defm BFMA16x2 : FMA<"fma.rn.bf16x2", BF16X2RT, [hasBF16Math]>;
-defm FMA32_ftz : FMA<"fma.rn.ftz.f32", F32RT, [doF32FTZ]>;
-defm FMA32 : FMA<"fma.rn.f32", F32RT>;
-defm FMA64 : FMA<"fma.rn.f64", F64RT>;
+defm FMA_F16 : FMA<F16RT, allow_ftz = true, preds = [useFP16Math]>;
+defm FMA_F16x2 : FMA<F16X2RT, allow_ftz = true, preds = [useFP16Math]>;
+defm FMA_BF16 : FMA<BF16RT, allow_ftz = false, preds = [hasBF16Math]>;
+defm FMA_BF16x2 : FMA<BF16X2RT, allow_ftz = false, preds = [hasBF16Math]>;
+defm FMA_F32 : FMA<F32RT, allow_ftz = true>;
+defm FMA_F64 : FMA<F64RT, allow_ftz = false>;
// sin/cos
@@ -1355,11 +1228,13 @@ class UnaryOpAllowsApproxFn<SDPatternOperator operator>
return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
}]>;
-def SINF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
- "sin.approx.f32",
+def SIN_APPROX_f32 :
+ BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
+ "sin.approx$ftz.f32",
[(set f32:$dst, (UnaryOpAllowsApproxFn<fsin> f32:$src))]>;
-def COSF: BasicNVPTXInst<(outs B32:$dst), (ins B32:$src),
- "cos.approx.f32",
+def COS_APPROX_f32 :
+ BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
+ "cos.approx$ftz.f32",
[(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;
//-----------------------------------
@@ -1636,53 +1511,80 @@ def : Pat<(i16 (sext_inreg (trunc (srl i64:$s, (i32 imm:$o))), i8)),
// FIXME: This doesn't cover versions of set and setp that combine with a
// boolean predicate, e.g. setp.eq.and.b16.
+def cond2cc : SDNodeXForm<cond, [{
+ return getPTXCmpMode(*N);
+}]>;
-let hasSideEffects = false in {
- multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
+multiclass FSETP<RegTyInfo t, bit allow_ftz = true> {
+ defvar ftz_str = !if(allow_ftz, "$ftz", "");
+ defvar op_str = "setp.${cmp:FCmp}" # ftz_str # "." # t.PtxType;
+ defvar flags = !con((ins CmpMode:$cmp), !if(allow_ftz, (ins FTZFlag:$ftz), (ins)));
+ let hasSideEffects = false in {
def rr :
- BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, RC:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}." # TypeStr>;
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.RC:$b),
+ flags, op_str>;
+
+ if t.SupportsImm then {
+ def ri :
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.Imm:$b),
+ flags, op_str>;
+ def ir :
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.Imm:$a, t.RC:$b),
+ flags, op_str>;
+ }
+ }
+ def : Pat<(i1 (setcc t.Ty:$a, t.Ty:$b, cond:$cc)),
+ (!cast<NVPTXInst>(NAME # "rr") $a, $b, (cond2cc $cc))>;
+ if t.SupportsImm then {
+ def : Pat<(i1 (setcc t.Ty:$a, fpimm:$b, cond:$cc)),
+ (!cast<NVPTXInst>(NAME # "ri") $a, fpimm:$b, (cond2cc $cc))>;
+ def : Pat<(i1 (setcc fpimm:$a, t.Ty:$b, cond:$cc)),
+ (!cast<NVPTXInst>(NAME # "ir") fpimm:$a, $b, (cond2cc $cc))>;
+ }
+}
+
+multiclass ISETP<RegTyInfo t> {
+ defvar op_str = "setp.${cmp:ICmp}.${cmp:IType}" # t.Size;
+ let hasSideEffects = false in {
+ def rr :
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.RC:$b),
+ (ins CmpMode:$cmp), op_str>;
def ri :
- BasicFlagsNVPTXInst<(outs B1:$dst), (ins RC:$a, ImmCls:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}." # TypeStr>;
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.RC:$a, t.Imm:$b),
+ (ins CmpMode:$cmp), op_str>;
def ir :
- BasicFlagsNVPTXInst<(outs B1:$dst), (ins ImmCls:$a, RC:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}." # TypeStr>;
+ BasicFlagsNVPTXInst<(outs B1:$dst), (ins t.Imm:$a, t.RC:$b),
+ (ins CmpMode:$cmp), op_str>;
}
+ def : Pat<(i1 (setcc t.Ty:$a, t.Ty:$b, cond:$cc)),
+ (!cast<NVPTXInst>(NAME # "rr") $a, $b, (cond2cc $cc))>;
+ def : Pat<(i1 (setcc t.Ty:$a, imm:$b, cond:$cc)),
+ (!cast<NVPTXInst>(NAME # "ri") $a, imm:$b, (cond2cc $cc))>;
+ def : Pat<(i1 (setcc imm:$a, t.Ty:$b, cond:$cc)),
+ (!cast<NVPTXInst>(NAME # "ir") imm:$a, $b, (cond2cc $cc))>;
}
-defm SETP_b16 : SETP<"b16", B16, i16imm>;
-defm SETP_s16 : SETP<"s16", B16, i16imm>;
-defm SETP_u16 : SETP<"u16", B16, i16imm>;
-defm SETP_b32 : SETP<"b32", B32, i32imm>;
-defm SETP_s32 : SETP<"s32", B32, i32imm>;
-defm SETP_u32 : SETP<"u32", B32, i32imm>;
-defm SETP_b64 : SETP<"b64", B64, i64imm>;
-defm SETP_s64 : SETP<"s64", B64, i64imm>;
-defm SETP_u64 : SETP<"u64", B64, i64imm>;
-defm SETP_f32 : SETP<"f32", B32, f32imm>;
-defm SETP_f64 : SETP<"f64", B64, f64imm>;
-def SETP_f16rr :
- BasicFlagsNVPTXInst<(outs B1:$dst),
- (ins B16:$a, B16:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}.f16">,
- Requires<[useFP16Math]>;
+defm SETP_i16 : ISETP<I16RT>;
+defm SETP_i32 : ISETP<I32RT>;
+defm SETP_i64 : ISETP<I64RT>;
+
+defm SETP_f32 : FSETP<F32RT>;
+defm SETP_f64 : FSETP<F64RT, allow_ftz = false>;
+let Predicates = [useFP16Math] in
+ defm SETP_f16 : FSETP<F16RT>;
+let Predicates = [hasBF16Math, hasPTX<78>, hasSM<90>] in
+ defm SETP_bf16 : FSETP<BF16RT>;
def SETP_f16x2rr :
BasicFlagsNVPTXInst<(outs B1:$p, B1:$q),
- (ins B32:$a, B32:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}.f16x2">,
+ (ins B32:$a, B32:$b), (ins CmpMode:$cmp, FTZFlag:$ftz),
+ "setp.${cmp:FCmp}$ftz.f16x2">,
Requires<[useFP16Math]>;
-def SETP_bf16rr :
- BasicFlagsNVPTXInst<(outs B1:$dst),
- (ins B16:$a, B16:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}.bf16">,
- Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
def SETP_bf16x2rr :
BasicFlagsNVPTXInst<(outs B1:$p, B1:$q),
- (ins B32:$a, B32:$b), (ins CmpMode:$cmp),
- "setp${cmp:base}${cmp:ftz}.bf16x2">,
+ (ins B32:$a, B32:$b), (ins CmpMode:$cmp, FTZFlag:$ftz),
+ "setp.${cmp:FCmp}$ftz.bf16x2">,
Requires<[hasBF16Math, hasPTX<78>, hasSM<90>]>;
//-----------------------------------
@@ -1786,209 +1688,36 @@ def : Pat<(i64 frameindex:$fi), (LEA_ADDRi64 (to_tframeindex $fi), 0)>;
// Comparison and Selection
//-----------------------------------
-multiclass ISET_FORMAT<PatFrag OpNode, PatLeaf Mode,
- Instruction setp_16rr,
- Instruction setp_16ri,
- Instruction setp_16ir,
- Instruction setp_32rr,
- Instruction setp_32ri,
- Instruction setp_32ir,
- Instruction setp_64rr,
- Instruction setp_64ri,
- Instruction setp_64ir> {
- // i16 -> pred
- def : Pat<(i1 (OpNode i16:$a, i16:$b)),
- (setp_16rr $a, $b, Mode)>;
- def : Pat<(i1 (OpNode i16:$a, imm:$b)),
- (setp_16ri $a, imm:$b, Mode)>;
- def : Pat<(i1 (OpNode imm:$a, i16:$b)),
- (setp_16ir imm:$a, $b, Mode)>;
- // i32 -> pred
- def : Pat<(i1 (OpNode i32:$a, i32:$b)),
- (setp_32rr $a, $b, Mode)>;
- def : Pat<(i1 (OpNode i32:$a, imm:$b)),
- (setp_32ri $a, imm:$b, Mode)>;
- def : Pat<(i1 (OpNode imm:$a, i32:$b)),
- (setp_32ir imm:$a, $b, Mode)>;
- // i64 -> pred
- def : Pat<(i1 (OpNode i64:$a, i64:$b)),
- (setp_64rr $a, $b, Mode)>;
- def : Pat<(i1 (OpNode i64:$a, imm:$b)),
- (setp_64ri $a, imm:$b, Mode)>;
- def : Pat<(i1 (OpNode imm:$a, i64:$b)),
- (setp_64ir imm:$a, $b, Mode)>;
-}
-
-multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
- : ISET_FORMAT<OpNode, Mode,
- SETP_s16rr, SETP_s16ri, SETP_s16ir,
- SETP_s32rr, SETP_s32ri, SETP_s32ir,
- SETP_s64rr, SETP_s64ri, SETP_s64ir> {
- // TableGen doesn't like empty multiclasses.
- def : PatLeaf<(i32 0)>;
-}
-
-multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
- : ISET_FORMAT<OpNode, Mode,
- SETP_u16rr, SETP_u16ri, SETP_u16ir,
- SETP_u32rr, SETP_u32ri, SETP_u32ir,
- SETP_u64rr, SETP_u64ri, SETP_u64ir> {
- // TableGen doesn't like empty multiclasses.
- def : PatLeaf<(i32 0)>;
-}
-
-defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
-defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
-defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
-defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
-defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
-defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
-defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;
-defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;
-defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;
-defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;
-defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;
-defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
+def cond_signed : PatLeaf<(cond), [{
+ return isSignedIntSetCC(N->get());
+}]>;
+
+def cond_not_signed : PatLeaf<(cond), [{
+ return !isSignedIntSetCC(N->get());
+}]>;
// comparisons of i8 extracted with BFE as i32
// It's faster to do comparison directly on i32 extracted by BFE,
// instead of the long conversion and sign extending.
-def: Pat<(setgt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
- (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGT)>;
-def: Pat<(setgt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
- (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGT)>;
-def: Pat<(setge (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
- (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpGE)>;
-def: Pat<(setge (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
- (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpGE)>;
-def: Pat<(setlt (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
- (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLT)>;
-def: Pat<(setlt (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
- (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLT)>;
-def: Pat<(setle (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
- (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8))),
- (SETP_s32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), CmpLE)>;
-def: Pat<(setle (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
- (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8))),
- (SETP_s32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), CmpLE)>;
-
-def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHI)>;
-def: Pat<(setugt (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHI)>;
-def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpHS)>;
-def: Pat<(setuge (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpHS)>;
-def: Pat<(setult (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLO)>;
-def: Pat<(setult (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLO)>;
-def: Pat<(setule (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpLS)>;
-def: Pat<(setule (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpLS)>;
-def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpEQ)>;
-def: Pat<(seteq (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpEQ)>;
-def: Pat<(setne (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), CmpNE)>;
-def: Pat<(setne (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
- (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255))),
- (SETP_u32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), CmpNE)>;
-
-// i1 compare -> i32
-def : Pat<(i32 (setne i1:$a, i1:$b)),
- (SELP_b32ii -1, 0, (XORb1rr $a, $b))>;
-def : Pat<(i32 (setne i1:$a, i1:$b)),
- (SELP_b32ii 0, -1, (XORb1rr $a, $b))>;
-
-
-
-multiclass FSET_FORMAT<PatFrag OpNode, PatLeaf Mode, PatLeaf ModeFTZ> {
- // f16 -> pred
- def : Pat<(i1 (OpNode f16:$a, f16:$b)),
- (SETP_f16rr $a, $b, ModeFTZ)>,
- Requires<[useFP16Math, doF32FTZ]>;
- def : Pat<(i1 (OpNode f16:$a, f16:$b)),
- (SETP_f16rr $a, $b, Mode)>,
- Requires<[useFP16Math]>;
-
- // bf16 -> pred
- def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
- (SETP_bf16rr $a, $b, ModeFTZ)>,
- Requires<[hasBF16Math, doF32FTZ]>;
- def : Pat<(i1 (OpNode bf16:$a, bf16:$b)),
- (SETP_bf16rr $a, $b, Mode)>,
- Requires<[hasBF16Math]>;
-
- // f32 -> pred
- def : Pat<(i1 (OpNode f32:$a, f32:$b)),
- (SETP_f32rr $a, $b, ModeFTZ)>,
- Requires<[doF32FTZ]>;
- def : Pat<(i1 (OpNode f32:$a, f32:$b)),
- (SETP_f32rr $a, $b, Mode)>;
- def : Pat<(i1 (OpNode f32:$a, fpimm:$b)),
- (SETP_f32ri $a, fpimm:$b, ModeFTZ)>,
- Requires<[doF32FTZ]>;
- def : Pat<(i1 (OpNode f32:$a, fpimm:$b)),
- (SETP_f32ri $a, fpimm:$b, Mode)>;
- def : Pat<(i1 (OpNode fpimm:$a, f32:$b)),
- (SETP_f32ir fpimm:$a, $b, ModeFTZ)>,
- Requires<[doF32FTZ]>;
- def : Pat<(i1 (OpNode fpimm:$a, f32:$b)),
- (SETP_f32ir fpimm:$a, $b, Mode)>;
-
- // f64 -> pred
- def : Pat<(i1 (OpNode f64:$a, f64:$b)),
- (SETP_f64rr $a, $b, Mode)>;
- def : Pat<(i1 (OpNode f64:$a, fpimm:$b)),
- (SETP_f64ri $a, fpimm:$b, Mode)>;
- def : Pat<(i1 (OpNode fpimm:$a, f64:$b)),
- (SETP_f64ir fpimm:$a, $b, Mode)>;
-}
-
-defm FSetOGT : FSET_FORMAT<setogt, CmpGT, CmpGT_FTZ>;
-defm FSetOLT : FSET_FORMAT<setolt, CmpLT, CmpLT_FTZ>;
-defm FSetOGE : FSET_FORMAT<setoge, CmpGE, CmpGE_FTZ>;
-defm FSetOLE : FSET_FORMAT<setole, CmpLE, CmpLE_FTZ>;
-defm FSetOEQ : FSET_FORMAT<setoeq, CmpEQ, CmpEQ_FTZ>;
-defm FSetONE : FSET_FORMAT<setone, CmpNE, CmpNE_FTZ>;
-
-defm FSetUGT : FSET_FORMAT<setugt, CmpGTU, CmpGTU_FTZ>;
-defm FSetULT : FSET_FORMAT<setult, CmpLTU, CmpLTU_FTZ>;
-defm FSetUGE : FSET_FORMAT<setuge, CmpGEU, CmpGEU_FTZ>;
-defm FSetULE : FSET_FORMAT<setule, CmpLEU, CmpLEU_FTZ>;
-defm FSetUEQ : FSET_FORMAT<setueq, CmpEQU, CmpEQU_FTZ>;
-defm FSetUNE : FSET_FORMAT<setune, CmpNEU, CmpNEU_FTZ>;
-
-defm FSetGT : FSET_FORMAT<setgt, CmpGT, CmpGT_FTZ>;
-defm FSetLT : FSET_FORMAT<setlt, CmpLT, CmpLT_FTZ>;
-defm FSetGE : FSET_FORMAT<setge, CmpGE, CmpGE_FTZ>;
-defm FSetLE : FSET_FORMAT<setle, CmpLE, CmpLE_FTZ>;
-defm FSetEQ : FSET_FORMAT<seteq, CmpEQ, CmpEQ_FTZ>;
-defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
-
-defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
-defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
+def: Pat<(setcc (i16 (sext_inreg (i16 (trunc (bfe B32:$a, B32:$oa, 8))), i8)),
+ (i16 (sext_inreg (i16 (trunc (bfe B32:$b, B32:$ob, 8))), i8)),
+ cond_signed:$cc),
+ (SETP_i32rr (BFE_S32rri $a, $oa, 8), (BFE_S32rri $b, $ob, 8), (cond2cc $cc))>;
+
+def: Pat<(setcc (i16 (sext_inreg (trunc (bfe B32:$a, imm:$oa, 8)), i8)),
+ (i16 (sext_inreg (trunc (bfe B32:$b, imm:$ob, 8)), i8)),
+ cond_signed:$cc),
+ (SETP_i32rr (BFE_S32rii $a, imm:$oa, 8), (BFE_S32rii $b, imm:$ob, 8), (cond2cc $cc))>;
+
+def: Pat<(setcc (i16 (and (trunc (bfe B32:$a, B32:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, B32:$ob, 8)), 255)),
+ cond_signed:$cc),
+ (SETP_i32rr (BFE_U32rri $a, $oa, 8), (BFE_U32rri $b, $ob, 8), (cond2cc $cc))>;
+
+def: Pat<(setcc (i16 (and (trunc (bfe B32:$a, imm:$oa, 8)), 255)),
+ (i16 (and (trunc (bfe B32:$b, imm:$ob, 8)), 255)),
+ cond_not_signed:$cc),
+ (SETP_i32rr (BFE_U32rii $a, imm:$oa, 8), (BFE_U32rii $b, imm:$ob, 8), (cond2cc $cc))>;
def SDTDeclareArrayParam :
SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
@@ -2382,25 +2111,25 @@ def : Pat<(f64 (uint_to_fp i64:$a)), (CVT_f64_u64 $a, CvtRN)>;
// f16 -> sint
-def : Pat<(i1 (fp_to_sint f16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_sint f16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint f16:$a)), (CVT_s16_f16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint f16:$a)), (CVT_s32_f16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint f16:$a)), (CVT_s64_f16 $a, CvtRZI)>;
// f16 -> uint
-def : Pat<(i1 (fp_to_uint f16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_uint f16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint f16:$a)), (CVT_u16_f16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint f16:$a)), (CVT_u32_f16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint f16:$a)), (CVT_u64_f16 $a, CvtRZI)>;
// bf16 -> sint
-def : Pat<(i1 (fp_to_sint bf16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_sint bf16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint bf16:$a)), (CVT_s16_bf16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint bf16:$a)), (CVT_s32_bf16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint bf16:$a)), (CVT_s64_bf16 $a, CvtRZI)>;
// bf16 -> uint
-def : Pat<(i1 (fp_to_uint bf16:$a)), (SETP_b16ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_uint bf16:$a)), (SETP_i16ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint bf16:$a)), (CVT_u16_bf16 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint bf16:$a)), (CVT_u32_bf16 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint bf16:$a)), (CVT_u64_bf16 $a, CvtRZI)>;
@@ -2410,7 +2139,7 @@ let Predicates = [doF32FTZ] in {
def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI_FTZ)>;
def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI_FTZ)>;
}
-def : Pat<(i1 (fp_to_sint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_sint f32:$a)), (SETP_i32ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint f32:$a)), (CVT_s16_f32 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint f32:$a)), (CVT_s32_f32 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint f32:$a)), (CVT_s64_f32 $a, CvtRZI)>;
@@ -2421,19 +2150,19 @@ let Predicates = [doF32FTZ] in {
def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI_FTZ)>;
def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI_FTZ)>;
}
-def : Pat<(i1 (fp_to_uint f32:$a)), (SETP_b32ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_uint f32:$a)), (SETP_i32ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint f32:$a)), (CVT_u16_f32 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint f32:$a)), (CVT_u32_f32 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint f32:$a)), (CVT_u64_f32 $a, CvtRZI)>;
// f64 -> sint
-def : Pat<(i1 (fp_to_sint f64:$a)), (SETP_b64ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_sint f64:$a)), (SETP_i64ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_sint f64:$a)), (CVT_s16_f64 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_sint f64:$a)), (CVT_s32_f64 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_sint f64:$a)), (CVT_s64_f64 $a, CvtRZI)>;
// f64 -> uint
-def : Pat<(i1 (fp_to_uint f64:$a)), (SETP_b64ri $a, 0, CmpEQ)>;
+def : Pat<(i1 (fp_to_uint f64:$a)), (SETP_i64ri $a, 0, CmpEQ)>;
def : Pat<(i16 (fp_to_uint f64:$a)), (CVT_u16_f64 $a, CvtRZI)>;
def : Pat<(i32 (fp_to_uint f64:$a)), (CVT_u32_f64 $a, CvtRZI)>;
def : Pat<(i64 (fp_to_uint f64:$a)), (CVT_u64_f64 $a, CvtRZI)>;
@@ -2478,14 +2207,14 @@ def : Pat<(i64 (anyext i32:$a)), (CVT_u64_u32 $a, CvtNONE)>;
// truncate i64
def : Pat<(i32 (trunc i64:$a)), (CVT_u32_u64 $a, CvtNONE)>;
def : Pat<(i16 (trunc i64:$a)), (CVT_u16_u64 $a, CvtNONE)>;
-def : Pat<(i1 (trunc i64:$a)), (SETP_b64ri (ANDb64ri $a, 1), 0, CmpNE)>;
+def : Pat<(i1 (trunc i64:$a)), (SETP_i64ri (ANDb64ri $a, 1), 0, CmpNE)>;
// truncate i32
def : Pat<(i16 (trunc i32:$a)), (CVT_u16_u32 $a, CvtNONE)>;
-def : Pat<(i1 (trunc i32:$a)), (SETP_b32ri (ANDb32ri $a, 1), 0, CmpNE)>;
+def : Pat<(i1 (trunc i32:$a)), (SETP_i32ri (ANDb32ri $a, 1), 0, CmpNE)>;
// truncate i16
-def : Pat<(i1 (trunc i16:$a)), (SETP_b16ri (ANDb16ri $a, 1), 0, CmpNE)>;
+def : Pat<(i1 (trunc i16:$a)), (SETP_i16ri (ANDb16ri $a, 1), 0, CmpNE)>;
// sext_inreg
def : Pat<(sext_inreg i16:$a, i8), (CVT_INREG_s16_s8 $a)>;
@@ -2734,7 +2463,7 @@ let isTerminator=1 in {
}
def : Pat<(brcond i32:$a, bb:$target),
- (CBranch (SETP_u32ri $a, 0, CmpNE), bb:$target)>;
+ (CBranch (SETP_i32ri $a, 0, CmpNE), bb:$target)>;
// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
// conditional branch if the target block is the next block so that the code
@@ -2964,31 +2693,18 @@ def NVPTX_fmaxnum_nsz : PatFrag<(ops node:$a, node:$b),
return N->getFlags().hasNoSignedZeros() || TM.Options.NoSignedZerosFPMath;
}]>;
-class NVPTXInst_rrr<RegisterClass RC, string Instruction, list<Predicate> Preds>
- : BasicNVPTXInst<(outs RC:$dst), (ins RC:$a, RC:$b, RC:$c), Instruction>,
- Requires<Preds>;
-
-def FMARELU_F16 : NVPTXInst_rrr<B16, "fma.rn.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16_FTZ : NVPTXInst_rrr<B16, "fma.rn.ftz.relu.f16", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_BF16 : NVPTXInst_rrr<B16, "fma.rn.relu.bf16", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16X2 : NVPTXInst_rrr<B32, "fma.rn.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_F16X2_FTZ : NVPTXInst_rrr<B32, "fma.rn.ftz.relu.f16x2", [useFP16Math, hasPTX<70>, hasSM<80>]>;
-def FMARELU_BF16X2 : NVPTXInst_rrr<B32, "fma.rn.relu.bf16x2", [hasBF16Math, hasPTX<70>, hasSM<80>]>;
-
-// FTZ
-def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
- (FMARELU_F16_FTZ $a, $b, $c)>,
- Requires<[doF32FTZ]>;
-def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)),
- (FMARELU_F16X2_FTZ $a, $b, $c)>,
- Requires<[doF32FTZ]>;
-
-// NO FTZ
-def : Pat<(f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan f16:$a, f16:$b, f16:$c), fpimm_any_zero)),
- (FMARELU_F16 $a, $b, $c)>;
-def : Pat<(bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan bf16:$a, bf16:$b, bf16:$c), fpimm_any_zero)),
- (FMARELU_BF16 $a, $b, $c)>;
-def : Pat<(v2f16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2f16:$a, v2f16:$b, v2f16:$c), fpimm_positive_zero_v2f16)),
- (FMARELU_F16X2 $a, $b, $c)>;
-def : Pat<(v2bf16 (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan v2bf16:$a, v2bf16:$b, v2bf16:$c), fpimm_positive_zero_v2bf16)),
- (FMARELU_BF16X2 $a, $b, $c)>;
+class FMARELUInst<RegTyInfo t, bit allow_ftz, PatFrag zero_pat>
+ : BasicFlagsNVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.RC:$b, t.RC:$c),
+ !if(allow_ftz, (ins FTZFlag:$ftz), (ins)),
+ "fma.rn" # !if(allow_ftz, "$ftz", "") # ".relu." # t.PtxType,
+ [(set t.Ty:$dst, (NVPTX_fmaxnum_nsz (NVPTX_fma_oneuse_and_nnan t.Ty:$a, t.Ty:$b, t.Ty:$c), zero_pat))]>;
+
+let Predicates = [useFP16Math, hasPTX<70>, hasSM<80>] in {
+ def FMARELU_F16 : FMARELUInst<F16RT, true, fpimm_any_zero>;
+ def FMARELU_F16X2 : FMARELUInst<F16X2RT, true, fpimm_positive_zero_v2f16>;
+}
+
+let Predicates = [hasBF16Math, hasPTX<70>, hasSM<80>] in {
+ def FMARELU_BF16 : FMARELUInst<BF16RT, false, fpimm_any_zero>;
+ def FMARELU_BF16X2 : FMARELUInst<BF16X2RT, false, fpimm_positive_zero_v2bf16>;
+}
\ No newline at end of file
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index cc1fd027d8515..d840324ce8238 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1006,12 +1006,14 @@ def INT_PM_EVENT_MASK : BasicNVPTXInst<(outs),
// Min Max
//
-def INT_NVVM_FMIN_F : F_MATH_2<"min.f32", B32, B32, B32, int_nvvm_fmin_f>;
-def INT_NVVM_FMIN_FTZ_F : F_MATH_2<"min.ftz.f32", B32, B32, B32, int_nvvm_fmin_ftz_f>;
-def INT_NVVM_FMIN_NAN_F : F_MATH_2<"min.NaN.f32", B32, B32, B32, int_nvvm_fmin_nan_f,
- [hasPTX<70>, hasSM<80>]>;
-def INT_NVVM_FMIN_FTZ_NAN_F : F_MATH_2<"min.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_f,
- [hasPTX<70>, hasSM<80>]>;
+def : Pat<(int_nvvm_fmin_f f32:$a, f32:$b), (MIN_f32_rr $a, $b, NoFTZ)>;
+def : Pat<(int_nvvm_fmin_ftz_f f32:$a, f32:$b), (MIN_f32_rr $a, $b, FTZ)>;
+
+let Predicates = [hasPTX<70>, hasSM<80>] in {
+ def : Pat<(int_nvvm_fmin_nan_f f32:$a, f32:$b), (MIN_NAN_f32_rr $a, $b, NoFTZ)>;
+ def : Pat<(int_nvvm_fmin_ftz_nan_f f32:$a, f32:$b), (MIN_NAN_f32_rr $a, $b, FTZ)>;
+}
+
def INT_NVVM_FMIN_XORSIGN_ABS_F :
F_MATH_2<"min.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
@@ -1025,12 +1027,15 @@ def INT_NVVM_FMIN_FTZ_NAN_XORSIGN_ABS_F :
F_MATH_2<"min.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmin_ftz_nan_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
-def INT_NVVM_FMAX_F : F_MATH_2<"max.f32", B32, B32, B32, int_nvvm_fmax_f>;
-def INT_NVVM_FMAX_FTZ_F : F_MATH_2<"max.ftz.f32", B32, B32, B32, int_nvvm_fmax_ftz_f>;
-def INT_NVVM_FMAX_NAN_F : F_MATH_2<"max.NaN.f32", B32, B32, B32, int_nvvm_fmax_nan_f,
- [hasPTX<70>, hasSM<80>]>;
-def INT_NVVM_FMAX_FTZ_NAN_F : F_MATH_2<"max.ftz.NaN.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_f,
- [hasPTX<70>, hasSM<80>]>;
+
+def : Pat<(int_nvvm_fmax_f f32:$a, f32:$b), (MAX_f32_rr $a, $b, NoFTZ)>;
+def : Pat<(int_nvvm_fmax_ftz_f f32:$a, f32:$b), (MAX_f32_rr $a, $b, FTZ)>;
+
+let Predicates = [hasPTX<70>, hasSM<80>] in {
+ def : Pat<(int_nvvm_fmax_nan_f f32:$a, f32:$b), (MAX_NAN_f32_rr $a, $b, NoFTZ)>;
+ def : Pat<(int_nvvm_fmax_ftz_nan_f f32:$a, f32:$b), (MAX_NAN_f32_rr $a, $b, FTZ)>;
+}
+
def INT_NVVM_FMAX_XORSIGN_ABS_F :
F_MATH_2<"max.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
@@ -1044,8 +1049,8 @@ def INT_NVVM_FMAX_FTZ_NAN_XORSIGN_ABS_F :
F_MATH_2<"max.ftz.NaN.xorsign.abs.f32", B32, B32, B32, int_nvvm_fmax_ftz_nan_xorsign_abs_f,
[hasPTX<72>, hasSM<86>]>;
-def INT_NVVM_FMIN_D : F_MATH_2<"min.f64", B64, B64, B64, int_nvvm_fmin_d>;
-def INT_NVVM_FMAX_D : F_MATH_2<"max.f64", B64, B64, B64, int_nvvm_fmax_d>;
+def : Pat<(int_nvvm_fmin_d f64:$a, f64:$b), (MIN_f64_rr $a, $b)>;
+def : Pat<(int_nvvm_fmax_d f64:$a, f64:$b), (MAX_f64_rr $a, $b)>;
//
// Min Max f16, f16x2, bf16, bf16x2
@@ -1181,17 +1186,10 @@ def INT_NVVM_DIV_RZ_D : F_MATH_2<"div.rz.f64", B64, B64, B64, int_nvvm_div_rz_d>
def INT_NVVM_DIV_RM_D : F_MATH_2<"div.rm.f64", B64, B64, B64, int_nvvm_div_rm_d>;
def INT_NVVM_DIV_RP_D : F_MATH_2<"div.rp.f64", B64, B64, B64, int_nvvm_div_rp_d>;
-def : Pat<(int_nvvm_div_full f32:$a, f32:$b),
- (FDIV32rr $a, $b)>;
-
-def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b),
- (FDIV32ri $a, f32imm:$b)>;
-
-def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b),
- (FDIV32rr_ftz $a, $b)>;
-
-def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b),
- (FDIV32ri_ftz $a, f32imm:$b)>;
+def : Pat<(int_nvvm_div_full f32:$a, f32:$b), (FDIV32rr $a, $b, NoFTZ)>;
+def : Pat<(int_nvvm_div_full f32:$a, fpimm:$b), (FDIV32ri $a, f32imm:$b, NoFTZ)>;
+def : Pat<(int_nvvm_div_full_ftz f32:$a, f32:$b), (FDIV32rr $a, $b, FTZ)>;
+def : Pat<(int_nvvm_div_full_ftz f32:$a, fpimm:$b), (FDIV32ri $a, f32imm:$b, FTZ)>;
//
// Sad
@@ -1288,48 +1286,36 @@ def : Pat<(int_nvvm_saturate_d f64:$a), (CVT_f64_f64 $a, CvtSAT)>;
// Exp2 Log2
//
-def INT_NVVM_EX2_APPROX_FTZ_F : F_MATH_1<"ex2.approx.ftz.f32",
- F32RT, F32RT, int_nvvm_ex2_approx_ftz_f>;
-def INT_NVVM_EX2_APPROX_F : F_MATH_1<"ex2.approx.f32",
- F32RT, F32RT, int_nvvm_ex2_approx_f>;
-def INT_NVVM_EX2_APPROX_D : F_MATH_1<"ex2.approx.f64",
- F64RT, F64RT, int_nvvm_ex2_approx_d>;
-
-def INT_NVVM_EX2_APPROX_F16 : F_MATH_1<"ex2.approx.f16",
- F16RT, F16RT, int_nvvm_ex2_approx_f16, [hasPTX<70>, hasSM<75>]>;
-def INT_NVVM_EX2_APPROX_F16X2 : F_MATH_1<"ex2.approx.f16x2",
- F16X2RT, F16X2RT, int_nvvm_ex2_approx_f16x2, [hasPTX<70>, hasSM<75>]>;
-
-def : Pat<(fexp2 f32:$a), (INT_NVVM_EX2_APPROX_FTZ_F $a)>, Requires<[doF32FTZ]>;
-def : Pat<(fexp2 f32:$a), (INT_NVVM_EX2_APPROX_F $a)>;
-def : Pat<(fexp2 f16:$a), (INT_NVVM_EX2_APPROX_F16 $a)>, Requires<[useFP16Math]>;
-def : Pat<(fexp2 v2f16:$a), (INT_NVVM_EX2_APPROX_F16X2 $a)>, Requires<[useFP16Math]>;
-
-def INT_NVVM_LG2_APPROX_FTZ_F : F_MATH_1<"lg2.approx.ftz.f32",
- F32RT, F32RT, int_nvvm_lg2_approx_ftz_f>;
-def INT_NVVM_LG2_APPROX_F : F_MATH_1<"lg2.approx.f32",
- F32RT, F32RT, int_nvvm_lg2_approx_f>;
-def INT_NVVM_LG2_APPROX_D : F_MATH_1<"lg2.approx.f64",
- F64RT, F64RT, int_nvvm_lg2_approx_d>;
-
-def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_FTZ_F $a)>,
- Requires<[doF32FTZ]>;
-def : Pat<(flog2 f32:$a), (INT_NVVM_LG2_APPROX_F $a)>,
- Requires<[doNoF32FTZ]>;
+def : Pat<(int_nvvm_ex2_approx_ftz_f f32:$a), (EX2_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_ex2_approx_f f32:$a), (EX2_APPROX_f32 $a, NoFTZ)>;
+
+let Predicates = [hasPTX<70>, hasSM<75>] in {
+ def : Pat<(int_nvvm_ex2_approx_f16 f16:$a), (EX2_APPROX_f16 $a)>;
+ def : Pat<(int_nvvm_ex2_approx_f16x2 v2f16:$a), (EX2_APPROX_f16x2 $a)>;
+}
+
+def LG2_APPROX_f32 :
+ BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
+ "lg2.approx$ftz.f32",
+ [(set f32:$dst, (flog2 f32:$src))]>;
+
+def LG2_APPROX_f64 :
+ BasicNVPTXInst<(outs B64:$dst), (ins B64:$src),
+ "lg2.approx.f64",
+ [(set f64:$dst, (flog2 f64:$src))]>;
+
+def : Pat<(int_nvvm_lg2_approx_ftz_f f32:$a), (LG2_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_lg2_approx_f f32:$a), (LG2_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(int_nvvm_lg2_approx_d f64:$a), (LG2_APPROX_f64 $a)>;
//
// Sin Cos
//
-def INT_NVVM_SIN_APPROX_FTZ_F : F_MATH_1<"sin.approx.ftz.f32",
- F32RT, F32RT, int_nvvm_sin_approx_ftz_f>;
-def INT_NVVM_SIN_APPROX_F : F_MATH_1<"sin.approx.f32",
- F32RT, F32RT, int_nvvm_sin_approx_f>;
-
-def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32",
- F32RT, F32RT, int_nvvm_cos_approx_ftz_f>;
-def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32",
- F32RT, F32RT, int_nvvm_cos_approx_f>;
+def : Pat<(int_nvvm_sin_approx_ftz_f f32:$a), (SIN_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_sin_approx_f f32:$a), (SIN_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(int_nvvm_cos_approx_ftz_f f32:$a), (COS_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_cos_approx_f f32:$a), (COS_APPROX_f32 $a, NoFTZ)>;
//
// Fma
@@ -1478,31 +1464,30 @@ def : Pat<(fsqrt_approx f32:$a), (INT_NVVM_SQRT_APPROX_F $a)>;
// Rsqrt
//
-def INT_NVVM_RSQRT_APPROX_FTZ_F
- : F_MATH_1<"rsqrt.approx.ftz.f32", F32RT, F32RT, int_nvvm_rsqrt_approx_ftz_f>;
-def INT_NVVM_RSQRT_APPROX_FTZ_D
- : F_MATH_1<"rsqrt.approx.ftz.f64", F64RT, F64RT, int_nvvm_rsqrt_approx_ftz_d>;
+foreach t = [F32RT, F64RT] in {
+ def RSQRT_APPROX_ # t.Ty :
+ BasicFlagsNVPTXInst<(outs t.RC:$dst),
+ (ins t.RC:$a), (ins FTZFlag:$ftz),
+ "rsqrt.approx$ftz.f" # t.Size>;
+}
+
+def : Pat<(int_nvvm_rsqrt_approx_ftz_f f32:$a), (RSQRT_APPROX_f32 $a, FTZ)>;
+def : Pat<(int_nvvm_rsqrt_approx_ftz_d f64:$a), (RSQRT_APPROX_f64 $a, FTZ)>;
+def : Pat<(int_nvvm_rsqrt_approx_f f32:$a), (RSQRT_APPROX_f32 $a, NoFTZ)>;
+def : Pat<(int_nvvm_rsqrt_approx_d f64:$a), (RSQRT_APPROX_f64 $a, NoFTZ)>;
-def INT_NVVM_RSQRT_APPROX_F : F_MATH_1<"rsqrt.approx.f32",
- F32RT, F32RT, int_nvvm_rsqrt_approx_f>;
-def INT_NVVM_RSQRT_APPROX_D : F_MATH_1<"rsqrt.approx.f64",
- F64RT, F64RT, int_nvvm_rsqrt_approx_d>;
// 1.0f / sqrt_approx -> rsqrt_approx
-def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)),
- (INT_NVVM_RSQRT_APPROX_F $a)>,
- Requires<[doRsqrtOpt]>;
-def: Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
- Requires<[doRsqrtOpt]>;
-
-// same for int_nvvm_sqrt_f when non-precision sqrt is requested
-def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
- (INT_NVVM_RSQRT_APPROX_F $a)>,
- Requires<[doRsqrtOpt, doNoF32FTZ]>;
-def: Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
- (INT_NVVM_RSQRT_APPROX_FTZ_F $a)>,
- Requires<[doRsqrtOpt, doF32FTZ]>;
+let Predicates = [doRsqrtOpt] in {
+ def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_f f32:$a)),
+ (RSQRT_APPROX_f32 $a, NoFTZ)>;
+ def : Pat<(fdiv f32imm_1, (int_nvvm_sqrt_approx_ftz_f f32:$a)),
+ (RSQRT_APPROX_f32 $a, FTZ)>;
+
+ // same for int_nvvm_sqrt_f when non-precision sqrt is requested
+ def : Pat<(fdiv f32imm_1, (fsqrt_approx f32:$a)),
+ (RSQRT_APPROX_f32 $a)>;
+}
//
// Add
//
diff --git a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
index 61c3ba2ee54ab..e3b072549bc04 100644
--- a/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
+++ b/llvm/test/CodeGen/MIR/NVPTX/floating-point-immediate-operands.mir
@@ -48,8 +48,8 @@ body: |
%4 = CVT_f32_f64 %3, 5
%5 = CVT_f32_s32 %2, 5
; CHECK: %6:b32 = FADD_rnf32ri %5, float 6.250000e+00
- %6 = FADD_rnf32ri %5, float 6.250000e+00
- %7 = FMUL_rnf32rr %6, %4
+ %6 = FADD_rnf32ri %5, float 6.250000e+00, 0
+ %7 = FMUL_rnf32rr %6, %4, 0
ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
Return
...
@@ -74,8 +74,8 @@ body: |
%4 = CVT_f32_f64 %3, 5
%5 = CVT_f32_s32 %2, 5
; CHECK: %6:b32 = FADD_rnf32ri %5, float 0x7FF8000000000000
- %6 = FADD_rnf32ri %5, float 0x7FF8000000000000
- %7 = FMUL_rnf32rr %6, %4
+ %6 = FADD_rnf32ri %5, float 0x7FF8000000000000, 0
+ %7 = FMUL_rnf32rr %6, %4, 0
ST_i32 %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s32), addrspace 101)
Return
...
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 22a7177650ee2..94f49b01e6ea6 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -71,7 +71,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: and.b32 %r31, %r54, %r3;
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32;
-; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
+; CHECKPTX62-NEXT: setp.ne.b32 %p1, %r6, %r54;
; CHECKPTX62-NEXT: mov.b32 %r54, %r6;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
@@ -87,7 +87,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: and.b32 %r36, %r55, %r3;
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37;
-; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
+; CHECKPTX62-NEXT: setp.ne.b32 %p2, %r9, %r55;
; CHECKPTX62-NEXT: mov.b32 %r55, %r9;
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
@@ -108,7 +108,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: and.b32 %r44, %r56, %r12;
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45;
-; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
+; CHECKPTX62-NEXT: setp.ne.b32 %p3, %r15, %r56;
; CHECKPTX62-NEXT: mov.b32 %r56, %r15;
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
@@ -129,7 +129,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: and.b32 %r52, %r57, %r18;
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
-; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
+; CHECKPTX62-NEXT: setp.ne.b32 %p4, %r21, %r57;
; CHECKPTX62-NEXT: mov.b32 %r57, %r21;
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index b5a4f94611453..b21bd16d55c2c 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: and.b32 %r31, %r54, %r3;
; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32;
-; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
+; CHECKPTX71-NEXT: setp.ne.b32 %p1, %r6, %r54;
; CHECKPTX71-NEXT: mov.b32 %r54, %r6;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
@@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: and.b32 %r36, %r55, %r3;
; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37;
-; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
+; CHECKPTX71-NEXT: setp.ne.b32 %p2, %r9, %r55;
; CHECKPTX71-NEXT: mov.b32 %r55, %r9;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
@@ -110,7 +110,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: and.b32 %r44, %r56, %r12;
; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45;
-; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
+; CHECKPTX71-NEXT: setp.ne.b32 %p3, %r15, %r56;
; CHECKPTX71-NEXT: mov.b32 %r56, %r15;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
@@ -132,7 +132,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: and.b32 %r52, %r57, %r18;
; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53;
-; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
+; CHECKPTX71-NEXT: setp.ne.b32 %p4, %r21, %r57;
; CHECKPTX71-NEXT: mov.b32 %r57, %r21;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index 55ce3dfc46539..04a58cf22cfc5 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -453,7 +453,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: or.b32 %r17, %r16, %r15;
; CHECK-NEXT: membar.sys;
; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r19, %r17;
-; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r19;
+; CHECK-NEXT: setp.ne.b32 %p1, %r5, %r19;
; CHECK-NEXT: mov.b32 %r19, %r5;
; CHECK-NEXT: @%p1 bra $L__BB24_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
diff --git a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
index f59f51c3c57d3..a386e4292777b 100644
--- a/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16-instructions.ll
@@ -1339,9 +1339,9 @@ define bfloat @test_maximum(bfloat %a, bfloat %b) {
; SM70-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
; SM70-NEXT: selp.b16 %rs4, 0x7FC0, %rs3, %p2;
-; SM70-NEXT: setp.eq.s16 %p3, %rs1, 0;
+; SM70-NEXT: setp.eq.b16 %p3, %rs1, 0;
; SM70-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
-; SM70-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; SM70-NEXT: setp.eq.b16 %p4, %rs2, 0;
; SM70-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
; SM70-NEXT: cvt.u32.u16 %r5, %rs4;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
@@ -1462,9 +1462,9 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
; SM70-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1;
; SM70-NEXT: setp.nan.f32 %p2, %r4, %r2;
; SM70-NEXT: selp.b16 %rs6, 0x7FC0, %rs5, %p2;
-; SM70-NEXT: setp.eq.s16 %p3, %rs2, 0;
+; SM70-NEXT: setp.eq.b16 %p3, %rs2, 0;
; SM70-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3;
-; SM70-NEXT: setp.eq.s16 %p4, %rs4, 0;
+; SM70-NEXT: setp.eq.b16 %p4, %rs4, 0;
; SM70-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4;
; SM70-NEXT: cvt.u32.u16 %r5, %rs6;
; SM70-NEXT: shl.b32 %r6, %r5, 16;
@@ -1478,9 +1478,9 @@ define <2 x bfloat> @test_maximum_v2(<2 x bfloat> %a, <2 x bfloat> %b) {
; SM70-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6;
; SM70-NEXT: setp.nan.f32 %p7, %r10, %r8;
; SM70-NEXT: selp.b16 %rs11, 0x7FC0, %rs10, %p7;
-; SM70-NEXT: setp.eq.s16 %p8, %rs1, 0;
+; SM70-NEXT: setp.eq.b16 %p8, %rs1, 0;
; SM70-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8;
-; SM70-NEXT: setp.eq.s16 %p9, %rs3, 0;
+; SM70-NEXT: setp.eq.b16 %p9, %rs3, 0;
; SM70-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9;
; SM70-NEXT: cvt.u32.u16 %r11, %rs11;
; SM70-NEXT: shl.b32 %r12, %r11, 16;
diff --git a/llvm/test/CodeGen/NVPTX/branch-fold.mir b/llvm/test/CodeGen/NVPTX/branch-fold.mir
index 4d80d52de8da8..ca6f49feff052 100644
--- a/llvm/test/CodeGen/NVPTX/branch-fold.mir
+++ b/llvm/test/CodeGen/NVPTX/branch-fold.mir
@@ -58,7 +58,7 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[ADDi64ri:%[0-9]+]]:b64 = ADDi64ri [[ADDi64ri]], 1
- ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_s64ri [[ADDi64ri]], 1, 2
+ ; CHECK-NEXT: [[SETP_s64ri:%[0-9]+]]:b1 = SETP_i64ri [[ADDi64ri]], 1, 2
; CHECK-NEXT: CBranch [[SETP_s64ri]], %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3.bb4:
@@ -77,7 +77,7 @@ body: |
successors: %bb.2(0x7c000000), %bb.3(0x04000000)
%5:b64 = ADDi64ri %5, 1
- %4:b1 = SETP_s64ri %5, 1, 2
+ %4:b1 = SETP_i64ri %5, 1, 2
CBranch %4, %bb.2
bb.3.bb4:
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index c99860cc5cc1b..9f900c961d2ed 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB0_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB0_1;
; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end
@@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB1_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB1_1;
; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end
@@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB2_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB2_1;
; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end
@@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB3_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB3_1;
; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end
@@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB4_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB4_1;
; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end
@@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB5_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB5_1;
; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end
@@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB6_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB6_1;
; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end
@@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB7_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB7_1;
; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end
@@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB8_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB8_1;
; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end
@@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB9_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB9_1;
; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end
@@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB10_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB10_1;
; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end
@@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB11_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB11_1;
; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end
@@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB12_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB12_1;
; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end
@@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB13_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB13_1;
; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end
@@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB14_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB14_1;
; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end
@@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB15_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB15_1;
; SM60-NEXT: $L__BB15_3: // %partword.cmpxchg.end
@@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB16_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB16_1;
; SM60-NEXT: $L__BB16_3: // %partword.cmpxchg.end
@@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB17_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB17_1;
; SM60-NEXT: $L__BB17_3: // %partword.cmpxchg.end
@@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB18_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB18_1;
; SM60-NEXT: $L__BB18_3: // %partword.cmpxchg.end
@@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB19_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB19_1;
; SM60-NEXT: $L__BB19_3: // %partword.cmpxchg.end
@@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB20_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB20_1;
; SM60-NEXT: $L__BB20_3: // %partword.cmpxchg.end
@@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB21_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB21_1;
; SM60-NEXT: $L__BB21_3: // %partword.cmpxchg.end
@@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB22_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB22_1;
; SM60-NEXT: $L__BB22_3: // %partword.cmpxchg.end
@@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB23_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB23_1;
; SM60-NEXT: $L__BB23_3: // %partword.cmpxchg.end
@@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB24_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB24_1;
; SM60-NEXT: $L__BB24_3: // %partword.cmpxchg.end
@@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB25_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB25_1;
; SM60-NEXT: $L__BB25_3: // %partword.cmpxchg.end
@@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB26_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB26_1;
; SM60-NEXT: $L__BB26_3: // %partword.cmpxchg.end
@@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB27_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB27_1;
; SM60-NEXT: $L__BB27_3: // %partword.cmpxchg.end
@@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB28_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB28_1;
; SM60-NEXT: $L__BB28_3: // %partword.cmpxchg.end
@@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB29_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB29_1;
; SM60-NEXT: $L__BB29_3: // %partword.cmpxchg.end
@@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB30_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB30_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB30_1;
; SM60-NEXT: $L__BB30_3: // %partword.cmpxchg.end
@@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB31_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB31_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB31_1;
; SM60-NEXT: $L__BB31_3: // %partword.cmpxchg.end
@@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB32_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB32_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB32_1;
; SM60-NEXT: $L__BB32_3: // %partword.cmpxchg.end
@@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB33_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB33_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB33_1;
; SM60-NEXT: $L__BB33_3: // %partword.cmpxchg.end
@@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB34_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB34_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB34_1;
; SM60-NEXT: $L__BB34_3: // %partword.cmpxchg.end
@@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB35_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB35_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB35_1;
; SM60-NEXT: $L__BB35_3: // %partword.cmpxchg.end
@@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB36_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB36_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB36_1;
; SM60-NEXT: $L__BB36_3: // %partword.cmpxchg.end
@@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB37_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB37_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB37_1;
; SM60-NEXT: $L__BB37_3: // %partword.cmpxchg.end
@@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB38_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB38_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB38_1;
; SM60-NEXT: $L__BB38_3: // %partword.cmpxchg.end
@@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB39_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB39_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB39_1;
; SM60-NEXT: $L__BB39_3: // %partword.cmpxchg.end
@@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB40_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB40_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB40_1;
; SM60-NEXT: $L__BB40_3: // %partword.cmpxchg.end
@@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB41_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB41_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB41_1;
; SM60-NEXT: $L__BB41_3: // %partword.cmpxchg.end
@@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB42_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB42_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB42_1;
; SM60-NEXT: $L__BB42_3: // %partword.cmpxchg.end
@@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB43_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB43_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB43_1;
; SM60-NEXT: $L__BB43_3: // %partword.cmpxchg.end
@@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: or.b32 %r17, %r20, %r3;
; SM60-NEXT: or.b32 %r18, %r20, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM60-NEXT: @%p1 bra $L__BB44_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB44_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB44_1;
; SM60-NEXT: $L__BB44_3: // %partword.cmpxchg.end
@@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB45_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB45_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB45_1;
; SM60-NEXT: $L__BB45_3: // %partword.cmpxchg.end
@@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB46_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB46_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB46_1;
; SM60-NEXT: $L__BB46_3: // %partword.cmpxchg.end
@@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB47_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB47_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB47_1;
; SM60-NEXT: $L__BB47_3: // %partword.cmpxchg.end
@@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB48_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB48_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB48_1;
; SM60-NEXT: $L__BB48_3: // %partword.cmpxchg.end
@@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB49_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB49_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB49_1;
; SM60-NEXT: $L__BB49_3: // %partword.cmpxchg.end
@@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB50_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB50_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB50_1;
; SM60-NEXT: $L__BB50_3: // %partword.cmpxchg.end
@@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB51_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB51_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB51_1;
; SM60-NEXT: $L__BB51_3: // %partword.cmpxchg.end
@@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB52_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB52_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB52_1;
; SM60-NEXT: $L__BB52_3: // %partword.cmpxchg.end
@@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB53_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB53_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB53_1;
; SM60-NEXT: $L__BB53_3: // %partword.cmpxchg.end
@@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB54_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB54_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB54_1;
; SM60-NEXT: $L__BB54_3: // %partword.cmpxchg.end
@@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB55_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB55_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB55_1;
; SM60-NEXT: $L__BB55_3: // %partword.cmpxchg.end
@@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB56_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB56_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB56_1;
; SM60-NEXT: $L__BB56_3: // %partword.cmpxchg.end
@@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB57_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB57_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB57_1;
; SM60-NEXT: $L__BB57_3: // %partword.cmpxchg.end
@@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB58_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB58_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB58_1;
; SM60-NEXT: $L__BB58_3: // %partword.cmpxchg.end
@@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB59_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB59_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB59_1;
; SM60-NEXT: $L__BB59_3: // %partword.cmpxchg.end
@@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB60_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB60_1;
; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end
@@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB61_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB61_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB61_1;
; SM60-NEXT: $L__BB61_3: // %partword.cmpxchg.end
@@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB62_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB62_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB62_1;
; SM60-NEXT: $L__BB62_3: // %partword.cmpxchg.end
@@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB63_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB63_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB63_1;
; SM60-NEXT: $L__BB63_3: // %partword.cmpxchg.end
@@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB64_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB64_1;
; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end
@@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB65_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB65_1;
; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end
@@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB66_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB66_1;
; SM60-NEXT: $L__BB66_3: // %partword.cmpxchg.end
@@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB67_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB67_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB67_1;
; SM60-NEXT: $L__BB67_3: // %partword.cmpxchg.end
@@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB68_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB68_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB68_1;
; SM60-NEXT: $L__BB68_3: // %partword.cmpxchg.end
@@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB69_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB69_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB69_1;
; SM60-NEXT: $L__BB69_3: // %partword.cmpxchg.end
@@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB70_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB70_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB70_1;
; SM60-NEXT: $L__BB70_3: // %partword.cmpxchg.end
@@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB71_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB71_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB71_1;
; SM60-NEXT: $L__BB71_3: // %partword.cmpxchg.end
@@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB72_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB72_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB72_1;
; SM60-NEXT: $L__BB72_3: // %partword.cmpxchg.end
@@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB73_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB73_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB73_1;
; SM60-NEXT: $L__BB73_3: // %partword.cmpxchg.end
@@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB74_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB74_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB74_1;
; SM60-NEXT: $L__BB74_3: // %partword.cmpxchg.end
@@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB75_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB75_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB75_1;
; SM60-NEXT: $L__BB75_3: // %partword.cmpxchg.end
@@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB76_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB76_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB76_1;
; SM60-NEXT: $L__BB76_3: // %partword.cmpxchg.end
@@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB77_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB77_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB77_1;
; SM60-NEXT: $L__BB77_3: // %partword.cmpxchg.end
@@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB78_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB78_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB78_1;
; SM60-NEXT: $L__BB78_3: // %partword.cmpxchg.end
@@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB79_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB79_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB79_1;
; SM60-NEXT: $L__BB79_3: // %partword.cmpxchg.end
@@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB80_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB80_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB80_1;
; SM60-NEXT: $L__BB80_3: // %partword.cmpxchg.end
@@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB81_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB81_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB81_1;
; SM60-NEXT: $L__BB81_3: // %partword.cmpxchg.end
@@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB82_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB82_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB82_1;
; SM60-NEXT: $L__BB82_3: // %partword.cmpxchg.end
@@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB83_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB83_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB83_1;
; SM60-NEXT: $L__BB83_3: // %partword.cmpxchg.end
@@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB84_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB84_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB84_1;
; SM60-NEXT: $L__BB84_3: // %partword.cmpxchg.end
@@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB85_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB85_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB85_1;
; SM60-NEXT: $L__BB85_3: // %partword.cmpxchg.end
@@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB86_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB86_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB86_1;
; SM60-NEXT: $L__BB86_3: // %partword.cmpxchg.end
@@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB87_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB87_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB87_1;
; SM60-NEXT: $L__BB87_3: // %partword.cmpxchg.end
@@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB88_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB88_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB88_1;
; SM60-NEXT: $L__BB88_3: // %partword.cmpxchg.end
@@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: or.b32 %r16, %r19, %r3;
; SM60-NEXT: or.b32 %r17, %r19, %r4;
; SM60-NEXT: atom.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM60-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM60-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM60-NEXT: @%p1 bra $L__BB89_3;
; SM60-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM60-NEXT: // in Loop: Header=BB89_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
-; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM60-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB89_1;
; SM60-NEXT: $L__BB89_3: // %partword.cmpxchg.end
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
index 68de517f65bb9..28b258dc2a868 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
@@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB0_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB0_1;
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
@@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB1_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB1_1;
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
@@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB2_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB2_1;
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
@@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB3_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB3_1;
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
@@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB4_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB4_1;
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
@@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB5_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB5_1;
; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end
@@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB6_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB6_1;
; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end
@@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB7_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB7_1;
; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end
@@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB8_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB8_1;
; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
@@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB9_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB9_1;
; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
@@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB10_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB10_1;
; SM70-NEXT: $L__BB10_3: // %partword.cmpxchg.end
@@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB11_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB11_1;
; SM70-NEXT: $L__BB11_3: // %partword.cmpxchg.end
@@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB12_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB12_1;
; SM70-NEXT: $L__BB12_3: // %partword.cmpxchg.end
@@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB13_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB13_1;
; SM70-NEXT: $L__BB13_3: // %partword.cmpxchg.end
@@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB14_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB14_1;
; SM70-NEXT: $L__BB14_3: // %partword.cmpxchg.end
@@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB15_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB15_1;
; SM70-NEXT: $L__BB15_3: // %partword.cmpxchg.end
@@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB16_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB16_1;
; SM70-NEXT: $L__BB16_3: // %partword.cmpxchg.end
@@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB17_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB17_1;
; SM70-NEXT: $L__BB17_3: // %partword.cmpxchg.end
@@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB18_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB18_1;
; SM70-NEXT: $L__BB18_3: // %partword.cmpxchg.end
@@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB19_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB19_1;
; SM70-NEXT: $L__BB19_3: // %partword.cmpxchg.end
@@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB20_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB20_1;
; SM70-NEXT: $L__BB20_3: // %partword.cmpxchg.end
@@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB21_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB21_1;
; SM70-NEXT: $L__BB21_3: // %partword.cmpxchg.end
@@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB22_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB22_1;
; SM70-NEXT: $L__BB22_3: // %partword.cmpxchg.end
@@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB23_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB23_1;
; SM70-NEXT: $L__BB23_3: // %partword.cmpxchg.end
@@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB24_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB24_1;
; SM70-NEXT: $L__BB24_3: // %partword.cmpxchg.end
@@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB25_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB25_1;
; SM70-NEXT: $L__BB25_3: // %partword.cmpxchg.end
@@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB26_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB26_1;
; SM70-NEXT: $L__BB26_3: // %partword.cmpxchg.end
@@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB27_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB27_1;
; SM70-NEXT: $L__BB27_3: // %partword.cmpxchg.end
@@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB28_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB28_1;
; SM70-NEXT: $L__BB28_3: // %partword.cmpxchg.end
@@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB29_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB29_1;
; SM70-NEXT: $L__BB29_3: // %partword.cmpxchg.end
@@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB30_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB30_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB30_1;
; SM70-NEXT: $L__BB30_3: // %partword.cmpxchg.end
@@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB31_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB31_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB31_1;
; SM70-NEXT: $L__BB31_3: // %partword.cmpxchg.end
@@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB32_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB32_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB32_1;
; SM70-NEXT: $L__BB32_3: // %partword.cmpxchg.end
@@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB33_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB33_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB33_1;
; SM70-NEXT: $L__BB33_3: // %partword.cmpxchg.end
@@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB34_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB34_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB34_1;
; SM70-NEXT: $L__BB34_3: // %partword.cmpxchg.end
@@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB35_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB35_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB35_1;
; SM70-NEXT: $L__BB35_3: // %partword.cmpxchg.end
@@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB36_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB36_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB36_1;
; SM70-NEXT: $L__BB36_3: // %partword.cmpxchg.end
@@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB37_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB37_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB37_1;
; SM70-NEXT: $L__BB37_3: // %partword.cmpxchg.end
@@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB38_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB38_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB38_1;
; SM70-NEXT: $L__BB38_3: // %partword.cmpxchg.end
@@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB39_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB39_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB39_1;
; SM70-NEXT: $L__BB39_3: // %partword.cmpxchg.end
@@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB40_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB40_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB40_1;
; SM70-NEXT: $L__BB40_3: // %partword.cmpxchg.end
@@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB41_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB41_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB41_1;
; SM70-NEXT: $L__BB41_3: // %partword.cmpxchg.end
@@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB42_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB42_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB42_1;
; SM70-NEXT: $L__BB42_3: // %partword.cmpxchg.end
@@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB43_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB43_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB43_1;
; SM70-NEXT: $L__BB43_3: // %partword.cmpxchg.end
@@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB44_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB44_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB44_1;
; SM70-NEXT: $L__BB44_3: // %partword.cmpxchg.end
@@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB45_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB45_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB45_1;
; SM70-NEXT: $L__BB45_3: // %partword.cmpxchg.end
@@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB46_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB46_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB46_1;
; SM70-NEXT: $L__BB46_3: // %partword.cmpxchg.end
@@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB47_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB47_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB47_1;
; SM70-NEXT: $L__BB47_3: // %partword.cmpxchg.end
@@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB48_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB48_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB48_1;
; SM70-NEXT: $L__BB48_3: // %partword.cmpxchg.end
@@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB49_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB49_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB49_1;
; SM70-NEXT: $L__BB49_3: // %partword.cmpxchg.end
@@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB50_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB50_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB50_1;
; SM70-NEXT: $L__BB50_3: // %partword.cmpxchg.end
@@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB51_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB51_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB51_1;
; SM70-NEXT: $L__BB51_3: // %partword.cmpxchg.end
@@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB52_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB52_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB52_1;
; SM70-NEXT: $L__BB52_3: // %partword.cmpxchg.end
@@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB53_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB53_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB53_1;
; SM70-NEXT: $L__BB53_3: // %partword.cmpxchg.end
@@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB54_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB54_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB54_1;
; SM70-NEXT: $L__BB54_3: // %partword.cmpxchg.end
@@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB55_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB55_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB55_1;
; SM70-NEXT: $L__BB55_3: // %partword.cmpxchg.end
@@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB56_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB56_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB56_1;
; SM70-NEXT: $L__BB56_3: // %partword.cmpxchg.end
@@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB57_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB57_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB57_1;
; SM70-NEXT: $L__BB57_3: // %partword.cmpxchg.end
@@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB58_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB58_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB58_1;
; SM70-NEXT: $L__BB58_3: // %partword.cmpxchg.end
@@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB59_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB59_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB59_1;
; SM70-NEXT: $L__BB59_3: // %partword.cmpxchg.end
@@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB60_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB60_1;
; SM70-NEXT: $L__BB60_3: // %partword.cmpxchg.end
@@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB61_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB61_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB61_1;
; SM70-NEXT: $L__BB61_3: // %partword.cmpxchg.end
@@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB62_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB62_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB62_1;
; SM70-NEXT: $L__BB62_3: // %partword.cmpxchg.end
@@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB63_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB63_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB63_1;
; SM70-NEXT: $L__BB63_3: // %partword.cmpxchg.end
@@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB64_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB64_1;
; SM70-NEXT: $L__BB64_3: // %partword.cmpxchg.end
@@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB65_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB65_1;
; SM70-NEXT: $L__BB65_3: // %partword.cmpxchg.end
@@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB66_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB66_1;
; SM70-NEXT: $L__BB66_3: // %partword.cmpxchg.end
@@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB67_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB67_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB67_1;
; SM70-NEXT: $L__BB67_3: // %partword.cmpxchg.end
@@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB68_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB68_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB68_1;
; SM70-NEXT: $L__BB68_3: // %partword.cmpxchg.end
@@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB69_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB69_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB69_1;
; SM70-NEXT: $L__BB69_3: // %partword.cmpxchg.end
@@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB70_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB70_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB70_1;
; SM70-NEXT: $L__BB70_3: // %partword.cmpxchg.end
@@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB71_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB71_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB71_1;
; SM70-NEXT: $L__BB71_3: // %partword.cmpxchg.end
@@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB72_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB72_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB72_1;
; SM70-NEXT: $L__BB72_3: // %partword.cmpxchg.end
@@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB73_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB73_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB73_1;
; SM70-NEXT: $L__BB73_3: // %partword.cmpxchg.end
@@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB74_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB74_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB74_1;
; SM70-NEXT: $L__BB74_3: // %partword.cmpxchg.end
@@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB75_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB75_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB75_1;
; SM70-NEXT: $L__BB75_3: // %partword.cmpxchg.end
@@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB76_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB76_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB76_1;
; SM70-NEXT: $L__BB76_3: // %partword.cmpxchg.end
@@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB77_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB77_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB77_1;
; SM70-NEXT: $L__BB77_3: // %partword.cmpxchg.end
@@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB78_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB78_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB78_1;
; SM70-NEXT: $L__BB78_3: // %partword.cmpxchg.end
@@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB79_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB79_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB79_1;
; SM70-NEXT: $L__BB79_3: // %partword.cmpxchg.end
@@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB80_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB80_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB80_1;
; SM70-NEXT: $L__BB80_3: // %partword.cmpxchg.end
@@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB81_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB81_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB81_1;
; SM70-NEXT: $L__BB81_3: // %partword.cmpxchg.end
@@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB82_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB82_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB82_1;
; SM70-NEXT: $L__BB82_3: // %partword.cmpxchg.end
@@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB83_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB83_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB83_1;
; SM70-NEXT: $L__BB83_3: // %partword.cmpxchg.end
@@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB84_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB84_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB84_1;
; SM70-NEXT: $L__BB84_3: // %partword.cmpxchg.end
@@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB85_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB85_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB85_1;
; SM70-NEXT: $L__BB85_3: // %partword.cmpxchg.end
@@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB86_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB86_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB86_1;
; SM70-NEXT: $L__BB86_3: // %partword.cmpxchg.end
@@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB87_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB87_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB87_1;
; SM70-NEXT: $L__BB87_3: // %partword.cmpxchg.end
@@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB88_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB88_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB88_1;
; SM70-NEXT: $L__BB88_3: // %partword.cmpxchg.end
@@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB89_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB89_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB89_1;
; SM70-NEXT: $L__BB89_3: // %partword.cmpxchg.end
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
index e20f988577282..368fe3f036c9e 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
@@ -32,12 +32,12 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB0_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB0_1;
; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end
@@ -77,12 +77,12 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB1_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB1_1;
; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end
@@ -122,12 +122,12 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB2_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB2_1;
; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end
@@ -167,12 +167,12 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB3_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB3_1;
; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end
@@ -213,12 +213,12 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB4_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB4_1;
; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end
@@ -259,12 +259,12 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB5_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB5_1;
; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end
@@ -306,12 +306,12 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB6_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB6_1;
; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end
@@ -353,12 +353,12 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB7_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB7_1;
; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end
@@ -400,12 +400,12 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB8_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB8_1;
; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end
@@ -446,12 +446,12 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB9_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB9_1;
; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end
@@ -492,12 +492,12 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB10_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB10_1;
; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end
@@ -538,12 +538,12 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB11_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB11_1;
; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end
@@ -584,12 +584,12 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB12_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB12_1;
; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end
@@ -630,12 +630,12 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB13_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB13_1;
; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end
@@ -676,12 +676,12 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB14_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB14_1;
; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end
@@ -723,12 +723,12 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB15_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB15_1;
; SM90-NEXT: $L__BB15_3: // %partword.cmpxchg.end
@@ -770,12 +770,12 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB16_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB16_1;
; SM90-NEXT: $L__BB16_3: // %partword.cmpxchg.end
@@ -817,12 +817,12 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB17_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB17_1;
; SM90-NEXT: $L__BB17_3: // %partword.cmpxchg.end
@@ -864,12 +864,12 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB18_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB18_1;
; SM90-NEXT: $L__BB18_3: // %partword.cmpxchg.end
@@ -910,12 +910,12 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB19_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB19_1;
; SM90-NEXT: $L__BB19_3: // %partword.cmpxchg.end
@@ -956,12 +956,12 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB20_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB20_1;
; SM90-NEXT: $L__BB20_3: // %partword.cmpxchg.end
@@ -1002,12 +1002,12 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB21_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB21_1;
; SM90-NEXT: $L__BB21_3: // %partword.cmpxchg.end
@@ -1049,12 +1049,12 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB22_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB22_1;
; SM90-NEXT: $L__BB22_3: // %partword.cmpxchg.end
@@ -1096,12 +1096,12 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB23_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB23_1;
; SM90-NEXT: $L__BB23_3: // %partword.cmpxchg.end
@@ -1143,12 +1143,12 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB24_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB24_1;
; SM90-NEXT: $L__BB24_3: // %partword.cmpxchg.end
@@ -1190,12 +1190,12 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB25_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB25_1;
; SM90-NEXT: $L__BB25_3: // %partword.cmpxchg.end
@@ -1237,12 +1237,12 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB26_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB26_1;
; SM90-NEXT: $L__BB26_3: // %partword.cmpxchg.end
@@ -1284,12 +1284,12 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB27_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB27_1;
; SM90-NEXT: $L__BB27_3: // %partword.cmpxchg.end
@@ -1331,12 +1331,12 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB28_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB28_1;
; SM90-NEXT: $L__BB28_3: // %partword.cmpxchg.end
@@ -1378,12 +1378,12 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB29_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB29_1;
; SM90-NEXT: $L__BB29_3: // %partword.cmpxchg.end
@@ -1425,12 +1425,12 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB30_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB30_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB30_1;
; SM90-NEXT: $L__BB30_3: // %partword.cmpxchg.end
@@ -1472,12 +1472,12 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB31_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB31_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB31_1;
; SM90-NEXT: $L__BB31_3: // %partword.cmpxchg.end
@@ -1519,12 +1519,12 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB32_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB32_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB32_1;
; SM90-NEXT: $L__BB32_3: // %partword.cmpxchg.end
@@ -1566,12 +1566,12 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB33_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB33_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB33_1;
; SM90-NEXT: $L__BB33_3: // %partword.cmpxchg.end
@@ -1613,12 +1613,12 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB34_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB34_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB34_1;
; SM90-NEXT: $L__BB34_3: // %partword.cmpxchg.end
@@ -1660,12 +1660,12 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB35_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB35_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB35_1;
; SM90-NEXT: $L__BB35_3: // %partword.cmpxchg.end
@@ -1707,12 +1707,12 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB36_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB36_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB36_1;
; SM90-NEXT: $L__BB36_3: // %partword.cmpxchg.end
@@ -1754,12 +1754,12 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB37_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB37_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB37_1;
; SM90-NEXT: $L__BB37_3: // %partword.cmpxchg.end
@@ -1801,12 +1801,12 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB38_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB38_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB38_1;
; SM90-NEXT: $L__BB38_3: // %partword.cmpxchg.end
@@ -1848,12 +1848,12 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB39_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB39_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB39_1;
; SM90-NEXT: $L__BB39_3: // %partword.cmpxchg.end
@@ -1895,12 +1895,12 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB40_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB40_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB40_1;
; SM90-NEXT: $L__BB40_3: // %partword.cmpxchg.end
@@ -1942,12 +1942,12 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB41_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB41_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB41_1;
; SM90-NEXT: $L__BB41_3: // %partword.cmpxchg.end
@@ -1989,12 +1989,12 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB42_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB42_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB42_1;
; SM90-NEXT: $L__BB42_3: // %partword.cmpxchg.end
@@ -2036,12 +2036,12 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB43_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB43_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB43_1;
; SM90-NEXT: $L__BB43_3: // %partword.cmpxchg.end
@@ -2083,12 +2083,12 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: or.b32 %r17, %r20, %r3;
; SM90-NEXT: or.b32 %r18, %r20, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM90-NEXT: @%p1 bra $L__BB44_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB44_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB44_1;
; SM90-NEXT: $L__BB44_3: // %partword.cmpxchg.end
@@ -2128,12 +2128,12 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB45_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB45_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB45_1;
; SM90-NEXT: $L__BB45_3: // %partword.cmpxchg.end
@@ -2172,12 +2172,12 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB46_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB46_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB46_1;
; SM90-NEXT: $L__BB46_3: // %partword.cmpxchg.end
@@ -2216,12 +2216,12 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB47_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB47_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB47_1;
; SM90-NEXT: $L__BB47_3: // %partword.cmpxchg.end
@@ -2260,12 +2260,12 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB48_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB48_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB48_1;
; SM90-NEXT: $L__BB48_3: // %partword.cmpxchg.end
@@ -2305,12 +2305,12 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB49_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB49_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB49_1;
; SM90-NEXT: $L__BB49_3: // %partword.cmpxchg.end
@@ -2350,12 +2350,12 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB50_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB50_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB50_1;
; SM90-NEXT: $L__BB50_3: // %partword.cmpxchg.end
@@ -2396,12 +2396,12 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB51_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB51_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB51_1;
; SM90-NEXT: $L__BB51_3: // %partword.cmpxchg.end
@@ -2442,12 +2442,12 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB52_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB52_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB52_1;
; SM90-NEXT: $L__BB52_3: // %partword.cmpxchg.end
@@ -2488,12 +2488,12 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB53_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB53_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB53_1;
; SM90-NEXT: $L__BB53_3: // %partword.cmpxchg.end
@@ -2533,12 +2533,12 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB54_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB54_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB54_1;
; SM90-NEXT: $L__BB54_3: // %partword.cmpxchg.end
@@ -2578,12 +2578,12 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB55_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB55_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB55_1;
; SM90-NEXT: $L__BB55_3: // %partword.cmpxchg.end
@@ -2623,12 +2623,12 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB56_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB56_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB56_1;
; SM90-NEXT: $L__BB56_3: // %partword.cmpxchg.end
@@ -2668,12 +2668,12 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB57_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB57_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB57_1;
; SM90-NEXT: $L__BB57_3: // %partword.cmpxchg.end
@@ -2713,12 +2713,12 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB58_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB58_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB58_1;
; SM90-NEXT: $L__BB58_3: // %partword.cmpxchg.end
@@ -2758,12 +2758,12 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB59_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB59_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB59_1;
; SM90-NEXT: $L__BB59_3: // %partword.cmpxchg.end
@@ -2804,12 +2804,12 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB60_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB60_1;
; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end
@@ -2850,12 +2850,12 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB61_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB61_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB61_1;
; SM90-NEXT: $L__BB61_3: // %partword.cmpxchg.end
@@ -2896,12 +2896,12 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB62_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB62_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB62_1;
; SM90-NEXT: $L__BB62_3: // %partword.cmpxchg.end
@@ -2942,12 +2942,12 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB63_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB63_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB63_1;
; SM90-NEXT: $L__BB63_3: // %partword.cmpxchg.end
@@ -2987,12 +2987,12 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB64_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB64_1;
; SM90-NEXT: $L__BB64_3: // %partword.cmpxchg.end
@@ -3032,12 +3032,12 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB65_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB65_1;
; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end
@@ -3077,12 +3077,12 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB66_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB66_1;
; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end
@@ -3123,12 +3123,12 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB67_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB67_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB67_1;
; SM90-NEXT: $L__BB67_3: // %partword.cmpxchg.end
@@ -3169,12 +3169,12 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB68_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB68_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB68_1;
; SM90-NEXT: $L__BB68_3: // %partword.cmpxchg.end
@@ -3215,12 +3215,12 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB69_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB69_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB69_1;
; SM90-NEXT: $L__BB69_3: // %partword.cmpxchg.end
@@ -3261,12 +3261,12 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB70_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB70_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB70_1;
; SM90-NEXT: $L__BB70_3: // %partword.cmpxchg.end
@@ -3307,12 +3307,12 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB71_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB71_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB71_1;
; SM90-NEXT: $L__BB71_3: // %partword.cmpxchg.end
@@ -3353,12 +3353,12 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB72_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB72_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB72_1;
; SM90-NEXT: $L__BB72_3: // %partword.cmpxchg.end
@@ -3399,12 +3399,12 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB73_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB73_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB73_1;
; SM90-NEXT: $L__BB73_3: // %partword.cmpxchg.end
@@ -3445,12 +3445,12 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB74_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB74_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB74_1;
; SM90-NEXT: $L__BB74_3: // %partword.cmpxchg.end
@@ -3491,12 +3491,12 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB75_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB75_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB75_1;
; SM90-NEXT: $L__BB75_3: // %partword.cmpxchg.end
@@ -3537,12 +3537,12 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB76_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB76_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB76_1;
; SM90-NEXT: $L__BB76_3: // %partword.cmpxchg.end
@@ -3583,12 +3583,12 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB77_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB77_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB77_1;
; SM90-NEXT: $L__BB77_3: // %partword.cmpxchg.end
@@ -3629,12 +3629,12 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB78_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB78_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB78_1;
; SM90-NEXT: $L__BB78_3: // %partword.cmpxchg.end
@@ -3675,12 +3675,12 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB79_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB79_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB79_1;
; SM90-NEXT: $L__BB79_3: // %partword.cmpxchg.end
@@ -3721,12 +3721,12 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB80_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB80_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB80_1;
; SM90-NEXT: $L__BB80_3: // %partword.cmpxchg.end
@@ -3767,12 +3767,12 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB81_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB81_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB81_1;
; SM90-NEXT: $L__BB81_3: // %partword.cmpxchg.end
@@ -3813,12 +3813,12 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB82_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB82_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB82_1;
; SM90-NEXT: $L__BB82_3: // %partword.cmpxchg.end
@@ -3859,12 +3859,12 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB83_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB83_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB83_1;
; SM90-NEXT: $L__BB83_3: // %partword.cmpxchg.end
@@ -3905,12 +3905,12 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB84_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB84_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB84_1;
; SM90-NEXT: $L__BB84_3: // %partword.cmpxchg.end
@@ -3951,12 +3951,12 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB85_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB85_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB85_1;
; SM90-NEXT: $L__BB85_3: // %partword.cmpxchg.end
@@ -3997,12 +3997,12 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB86_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB86_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB86_1;
; SM90-NEXT: $L__BB86_3: // %partword.cmpxchg.end
@@ -4043,12 +4043,12 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB87_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB87_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB87_1;
; SM90-NEXT: $L__BB87_3: // %partword.cmpxchg.end
@@ -4089,12 +4089,12 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.global.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB88_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB88_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB88_1;
; SM90-NEXT: $L__BB88_3: // %partword.cmpxchg.end
@@ -4135,12 +4135,12 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: or.b32 %r16, %r19, %r3;
; SM90-NEXT: or.b32 %r17, %r19, %r4;
; SM90-NEXT: atom.relaxed.shared.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM90-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM90-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM90-NEXT: @%p1 bra $L__BB89_3;
; SM90-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM90-NEXT: // in Loop: Header=BB89_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
-; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM90-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB89_1;
; SM90-NEXT: $L__BB89_3: // %partword.cmpxchg.end
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index 85414a2ab04e8..25b4c74086dc1 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -39,12 +39,12 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: or.b32 %r17, %r20, %r3;
; SM30-NEXT: or.b32 %r18, %r20, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT: @%p1 bra $L__BB0_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB0_1;
; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end
@@ -80,12 +80,12 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB0_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB0_1;
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
@@ -165,12 +165,12 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: or.b32 %r17, %r20, %r3;
; SM30-NEXT: or.b32 %r18, %r20, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT: @%p1 bra $L__BB1_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB1_1;
; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end
@@ -207,12 +207,12 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB1_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB1_1;
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
@@ -295,12 +295,12 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: or.b32 %r17, %r20, %r3;
; SM30-NEXT: or.b32 %r18, %r20, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT: @%p1 bra $L__BB2_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB2_1;
; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end
@@ -337,12 +337,12 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB2_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB2_1;
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
@@ -424,12 +424,12 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: or.b32 %r17, %r20, %r3;
; SM30-NEXT: or.b32 %r18, %r20, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT: @%p1 bra $L__BB3_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB3_1;
; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end
@@ -467,12 +467,12 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB3_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB3_1;
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
@@ -556,12 +556,12 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: or.b32 %r17, %r20, %r3;
; SM30-NEXT: or.b32 %r18, %r20, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM30-NEXT: @%p1 bra $L__BB4_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB4_1;
; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end
@@ -599,12 +599,12 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: or.b32 %r17, %r20, %r3;
; SM70-NEXT: or.b32 %r18, %r20, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r18, %r17;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r18;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r18;
; SM70-NEXT: @%p1 bra $L__BB4_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r20, %r8;
; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB4_1;
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
@@ -687,12 +687,12 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: or.b32 %r16, %r19, %r3;
; SM30-NEXT: or.b32 %r17, %r19, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT: @%p1 bra $L__BB5_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB5_1;
; SM30-NEXT: $L__BB5_3: // %partword.cmpxchg.end
@@ -727,12 +727,12 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB5_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB5_1;
; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end
@@ -810,12 +810,12 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: or.b32 %r16, %r19, %r3;
; SM30-NEXT: or.b32 %r17, %r19, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT: @%p1 bra $L__BB6_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB6_1;
; SM30-NEXT: $L__BB6_3: // %partword.cmpxchg.end
@@ -851,12 +851,12 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB6_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB6_1;
; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end
@@ -937,12 +937,12 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: or.b32 %r16, %r19, %r3;
; SM30-NEXT: or.b32 %r17, %r19, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT: @%p1 bra $L__BB7_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB7_1;
; SM30-NEXT: $L__BB7_3: // %partword.cmpxchg.end
@@ -978,12 +978,12 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB7_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB7_1;
; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end
@@ -1063,12 +1063,12 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: or.b32 %r16, %r19, %r3;
; SM30-NEXT: or.b32 %r17, %r19, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT: @%p1 bra $L__BB8_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB8_1;
; SM30-NEXT: $L__BB8_3: // %partword.cmpxchg.end
@@ -1105,12 +1105,12 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB8_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB8_1;
; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
@@ -1193,12 +1193,12 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: or.b32 %r16, %r19, %r3;
; SM30-NEXT: or.b32 %r17, %r19, %r4;
; SM30-NEXT: atom.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM30-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM30-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM30-NEXT: @%p1 bra $L__BB9_3;
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM30-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB9_1;
; SM30-NEXT: $L__BB9_3: // %partword.cmpxchg.end
@@ -1235,12 +1235,12 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: or.b32 %r16, %r19, %r3;
; SM70-NEXT: or.b32 %r17, %r19, %r4;
; SM70-NEXT: atom.relaxed.cas.b32 %r7, [%rd1], %r17, %r16;
-; SM70-NEXT: setp.eq.s32 %p1, %r7, %r17;
+; SM70-NEXT: setp.eq.b32 %p1, %r7, %r17;
; SM70-NEXT: @%p1 bra $L__BB9_3;
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
+; SM70-NEXT: setp.ne.b32 %p2, %r19, %r8;
; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB9_1;
; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
index ee86fe97ef781..b44ae47d623bd 100644
--- a/llvm/test/CodeGen/NVPTX/compare-int.ll
+++ b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -11,7 +11,7 @@
;;; i64
define i64 @icmp_eq_i64(i64 %a, i64 %b) {
-; CHECK: setp.eq.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
+; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i64 %a, %b
@@ -20,7 +20,7 @@ define i64 @icmp_eq_i64(i64 %a, i64 %b) {
}
define i64 @icmp_ne_i64(i64 %a, i64 %b) {
-; CHECK: setp.ne.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
+; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i64 %a, %b
@@ -103,7 +103,7 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) {
;;; i32
define i32 @icmp_eq_i32(i32 %a, i32 %b) {
-; CHECK: setp.eq.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i32 %a, %b
@@ -112,7 +112,7 @@ define i32 @icmp_eq_i32(i32 %a, i32 %b) {
}
define i32 @icmp_ne_i32(i32 %a, i32 %b) {
-; CHECK: setp.ne.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
+; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i32 %a, %b
@@ -196,7 +196,7 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
;;; i16
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
-; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i16 %a, %b
@@ -205,7 +205,7 @@ define i16 @icmp_eq_i16(i16 %a, i16 %b) {
}
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
-; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i16 %a, %b
@@ -290,7 +290,7 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.eq.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp eq i8 %a, %b
@@ -300,7 +300,7 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ne.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
+; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
; CHECK: ret
%cmp = icmp ne i8 %a, %b
diff --git a/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll b/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
index 193cf674ecdfc..a1020e68e1bae 100644
--- a/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
+++ b/llvm/test/CodeGen/NVPTX/distributed-shared-cluster.ll
@@ -210,12 +210,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: or.b32 %r39, %r48, %r3;
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r6, [%rd1], %r39, %r48;
-; CHECK-NEXT: setp.eq.s32 %p1, %r6, %r39;
+; CHECK-NEXT: setp.eq.b32 %p1, %r6, %r39;
; CHECK-NEXT: @%p1 bra $L__BB4_3;
; CHECK-NEXT: // %bb.2: // %partword.cmpxchg.failure32
; CHECK-NEXT: // in Loop: Header=BB4_1 Depth=1
; CHECK-NEXT: and.b32 %r7, %r6, %r2;
-; CHECK-NEXT: setp.ne.s32 %p2, %r48, %r7;
+; CHECK-NEXT: setp.ne.b32 %p2, %r48, %r7;
; CHECK-NEXT: mov.b32 %r48, %r7;
; CHECK-NEXT: @%p2 bra $L__BB4_1;
; CHECK-NEXT: $L__BB4_3: // %partword.cmpxchg.end31
@@ -225,12 +225,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: or.b32 %r41, %r49, %r3;
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r10, [%rd1], %r41, %r49;
-; CHECK-NEXT: setp.eq.s32 %p3, %r10, %r41;
+; CHECK-NEXT: setp.eq.b32 %p3, %r10, %r41;
; CHECK-NEXT: @%p3 bra $L__BB4_6;
; CHECK-NEXT: // %bb.5: // %partword.cmpxchg.failure22
; CHECK-NEXT: // in Loop: Header=BB4_4 Depth=1
; CHECK-NEXT: and.b32 %r11, %r10, %r2;
-; CHECK-NEXT: setp.ne.s32 %p4, %r49, %r11;
+; CHECK-NEXT: setp.ne.b32 %p4, %r49, %r11;
; CHECK-NEXT: mov.b32 %r49, %r11;
; CHECK-NEXT: @%p4 bra $L__BB4_4;
; CHECK-NEXT: $L__BB4_6: // %partword.cmpxchg.end21
@@ -242,12 +242,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: or.b32 %r43, %r50, %r3;
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r14, [%rd1], %r43, %r50;
-; CHECK-NEXT: setp.eq.s32 %p5, %r14, %r43;
+; CHECK-NEXT: setp.eq.b32 %p5, %r14, %r43;
; CHECK-NEXT: @%p5 bra $L__BB4_9;
; CHECK-NEXT: // %bb.8: // %partword.cmpxchg.failure12
; CHECK-NEXT: // in Loop: Header=BB4_7 Depth=1
; CHECK-NEXT: and.b32 %r15, %r14, %r2;
-; CHECK-NEXT: setp.ne.s32 %p6, %r50, %r15;
+; CHECK-NEXT: setp.ne.b32 %p6, %r50, %r15;
; CHECK-NEXT: mov.b32 %r50, %r15;
; CHECK-NEXT: @%p6 bra $L__BB4_7;
; CHECK-NEXT: $L__BB4_9: // %partword.cmpxchg.end11
@@ -258,12 +258,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: or.b32 %r45, %r51, %r3;
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r18, [%rd1], %r45, %r51;
-; CHECK-NEXT: setp.eq.s32 %p7, %r18, %r45;
+; CHECK-NEXT: setp.eq.b32 %p7, %r18, %r45;
; CHECK-NEXT: @%p7 bra $L__BB4_12;
; CHECK-NEXT: // %bb.11: // %partword.cmpxchg.failure2
; CHECK-NEXT: // in Loop: Header=BB4_10 Depth=1
; CHECK-NEXT: and.b32 %r19, %r18, %r2;
-; CHECK-NEXT: setp.ne.s32 %p8, %r51, %r19;
+; CHECK-NEXT: setp.ne.b32 %p8, %r51, %r19;
; CHECK-NEXT: mov.b32 %r51, %r19;
; CHECK-NEXT: @%p8 bra $L__BB4_10;
; CHECK-NEXT: $L__BB4_12: // %partword.cmpxchg.end1
@@ -275,12 +275,12 @@ define void @test_distributed_shared_cluster_cmpxchg(ptr addrspace(7) %dsmem_ptr
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: or.b32 %r47, %r52, %r3;
; CHECK-NEXT: atom.relaxed.shared::cluster.cas.b32 %r22, [%rd1], %r47, %r52;
-; CHECK-NEXT: setp.eq.s32 %p9, %r22, %r47;
+; CHECK-NEXT: setp.eq.b32 %p9, %r22, %r47;
; CHECK-NEXT: @%p9 bra $L__BB4_15;
; CHECK-NEXT: // %bb.14: // %partword.cmpxchg.failure
; CHECK-NEXT: // in Loop: Header=BB4_13 Depth=1
; CHECK-NEXT: and.b32 %r23, %r22, %r2;
-; CHECK-NEXT: setp.ne.s32 %p10, %r52, %r23;
+; CHECK-NEXT: setp.ne.b32 %p10, %r52, %r23;
; CHECK-NEXT: mov.b32 %r52, %r23;
; CHECK-NEXT: @%p10 bra $L__BB4_13;
; CHECK-NEXT: $L__BB4_15: // %partword.cmpxchg.end
diff --git a/llvm/test/CodeGen/NVPTX/extractelement.ll b/llvm/test/CodeGen/NVPTX/extractelement.ll
index f37777ab954e2..e04732ebad66b 100644
--- a/llvm/test/CodeGen/NVPTX/extractelement.ll
+++ b/llvm/test/CodeGen/NVPTX/extractelement.ll
@@ -40,7 +40,7 @@ define i1 @test_v2i8_load(ptr %a) {
; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd1];
; CHECK-NEXT: or.b16 %rs5, %rs1, %rs2;
; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
-; CHECK-NEXT: setp.eq.s16 %p1, %rs6, 0;
+; CHECK-NEXT: setp.eq.b16 %p1, %rs6, 0;
; CHECK-NEXT: selp.b32 %r1, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index 13f1c2f30b830..2b7e4184670c7 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -880,8 +880,9 @@ define half @test_sqrt(half %a) #0 {
; CHECK-LABEL: test_sin(
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_sin_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
-; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]];
-; CHECK: sin.approx.f32 [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]];
+; CHECK-NOF16: sin.approx.f32 [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ: sin.approx.ftz.f32 [[RF:%r[0-9]+]], [[AF]];
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
@@ -893,8 +894,9 @@ define half @test_sin(half %a) #0 #1 {
; CHECK-LABEL: test_cos(
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_cos_param_0];
; CHECK-NOFTZ: cvt.f32.f16 [[AF:%r[0-9]+]], [[A]];
-; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]];
-; CHECK: cos.approx.f32 [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ: cvt.ftz.f32.f16 [[AF:%r[0-9]+]], [[A]];
+; CHECK-NOF16: cos.approx.f32 [[RF:%r[0-9]+]], [[AF]];
+; CHECK-F16-FTZ: cos.approx.ftz.f32 [[RF:%r[0-9]+]], [[AF]];
; CHECK: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[RF]];
; CHECK: st.param.b16 [func_retval0], [[R]];
; CHECK: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 43a605f2b34d7..093bc20547b85 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -83,7 +83,7 @@ define half @test_extract_i(<2 x half> %a, i64 %idx) #0 {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
; CHECK-NEXT: ld.param.b32 %r1, [test_extract_i_param_0];
-; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0;
+; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0;
; CHECK-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
index bc48d242f88fd..5eda3a1e2dda1 100644
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -291,6 +291,34 @@ define float @fcos_approx(float %a) #0 {
ret float %r
}
+define float @fsin_approx_ftz(float %a) #0 #1 {
+; CHECK-LABEL: fsin_approx_ftz(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [fsin_approx_ftz_param_0];
+; CHECK-NEXT: sin.approx.ftz.f32 %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %r = tail call float @llvm.sin.f32(float %a)
+ ret float %r
+}
+
+define float @fcos_approx_ftz(float %a) #0 #1 {
+; CHECK-LABEL: fcos_approx_ftz(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [fcos_approx_ftz_param_0];
+; CHECK-NEXT: cos.approx.ftz.f32 %r2, %r1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
+; CHECK-NEXT: ret;
+ %r = tail call float @llvm.cos.f32(float %a)
+ ret float %r
+}
+
define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
; CHECK-LABEL: repeated_div_recip_allowed(
; CHECK: {
diff --git a/llvm/test/CodeGen/NVPTX/i1-select.ll b/llvm/test/CodeGen/NVPTX/i1-select.ll
index 562c746200d87..f1adc3489c0d9 100644
--- a/llvm/test/CodeGen/NVPTX/i1-select.ll
+++ b/llvm/test/CodeGen/NVPTX/i1-select.ll
@@ -72,11 +72,11 @@ define i32 @test_select_i1_basic(i32 %v1, i32 %v2, i32 %v3, i32 %true, i32 %fals
; CHECK-NEXT: ld.param.b32 %r1, [test_select_i1_basic_param_0];
; CHECK-NEXT: ld.param.b32 %r2, [test_select_i1_basic_param_1];
; CHECK-NEXT: or.b32 %r4, %r1, %r2;
-; CHECK-NEXT: setp.ne.s32 %p1, %r1, 0;
+; CHECK-NEXT: setp.ne.b32 %p1, %r1, 0;
; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_param_2];
-; CHECK-NEXT: setp.eq.s32 %p2, %r5, 0;
+; CHECK-NEXT: setp.eq.b32 %p2, %r5, 0;
; CHECK-NEXT: ld.param.b32 %r7, [test_select_i1_basic_param_3];
-; CHECK-NEXT: setp.eq.s32 %p3, %r4, 0;
+; CHECK-NEXT: setp.eq.b32 %p3, %r4, 0;
; CHECK-NEXT: ld.param.b32 %r8, [test_select_i1_basic_param_4];
; CHECK-NEXT: selp.b32 %r9, %r7, %r8, %p2;
; CHECK-NEXT: selp.b32 %r10, %r9, %r8, %p1;
@@ -99,12 +99,12 @@ define i32 @test_select_i1_basic_folding(i32 %v1, i32 %v2, i32 %v3, i32 %true, i
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_select_i1_basic_folding_param_0];
-; CHECK-NEXT: setp.eq.s32 %p1, %r1, 0;
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, 0;
; CHECK-NEXT: ld.param.b32 %r3, [test_select_i1_basic_folding_param_1];
-; CHECK-NEXT: setp.ne.s32 %p2, %r3, 0;
-; CHECK-NEXT: setp.eq.s32 %p3, %r3, 0;
+; CHECK-NEXT: setp.ne.b32 %p2, %r3, 0;
+; CHECK-NEXT: setp.eq.b32 %p3, %r3, 0;
; CHECK-NEXT: ld.param.b32 %r5, [test_select_i1_basic_folding_param_2];
-; CHECK-NEXT: setp.eq.s32 %p4, %r5, 0;
+; CHECK-NEXT: setp.eq.b32 %p4, %r5, 0;
; CHECK-NEXT: ld.param.b32 %r6, [test_select_i1_basic_folding_param_3];
; CHECK-NEXT: xor.pred %p6, %p1, %p3;
; CHECK-NEXT: ld.param.b32 %r7, [test_select_i1_basic_folding_param_4];
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index 49dbc5c385dc0..f2211eb1c0b8e 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -24,18 +24,18 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: selp.b64 %rd6, %rd54, %rd50, %p2;
; CHECK-NEXT: selp.b64 %rd5, %rd53, %rd49, %p2;
; CHECK-NEXT: or.b64 %rd55, %rd5, %rd6;
-; CHECK-NEXT: setp.eq.s64 %p3, %rd55, 0;
+; CHECK-NEXT: setp.eq.b64 %p3, %rd55, 0;
; CHECK-NEXT: or.b64 %rd56, %rd3, %rd4;
-; CHECK-NEXT: setp.eq.s64 %p4, %rd56, 0;
+; CHECK-NEXT: setp.eq.b64 %p4, %rd56, 0;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0;
+; CHECK-NEXT: setp.ne.b64 %p6, %rd6, 0;
; CHECK-NEXT: clz.b64 %r1, %rd6;
; CHECK-NEXT: cvt.u64.u32 %rd57, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd5;
; CHECK-NEXT: cvt.u64.u32 %rd58, %r2;
; CHECK-NEXT: add.s64 %rd59, %rd58, 64;
; CHECK-NEXT: selp.b64 %rd60, %rd57, %rd59, %p6;
-; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0;
+; CHECK-NEXT: setp.ne.b64 %p7, %rd4, 0;
; CHECK-NEXT: clz.b64 %r3, %rd4;
; CHECK-NEXT: cvt.u64.u32 %rd61, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd3;
@@ -45,16 +45,16 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: mov.b64 %rd116, 0;
; CHECK-NEXT: sub.cc.s64 %rd66, %rd60, %rd64;
; CHECK-NEXT: subc.cc.s64 %rd8, %rd116, 0;
-; CHECK-NEXT: setp.ne.s64 %p8, %rd8, 0;
+; CHECK-NEXT: setp.ne.b64 %p8, %rd8, 0;
; CHECK-NEXT: and.pred %p10, %p8, %p8;
-; CHECK-NEXT: setp.eq.s64 %p11, %rd8, 0;
+; CHECK-NEXT: setp.eq.b64 %p11, %rd8, 0;
; CHECK-NEXT: setp.gt.u64 %p12, %rd66, 127;
; CHECK-NEXT: and.pred %p13, %p11, %p12;
; CHECK-NEXT: or.pred %p14, %p13, %p10;
; CHECK-NEXT: or.pred %p15, %p5, %p14;
; CHECK-NEXT: xor.b64 %rd67, %rd66, 127;
; CHECK-NEXT: or.b64 %rd68, %rd67, %rd8;
-; CHECK-NEXT: setp.eq.s64 %p16, %rd68, 0;
+; CHECK-NEXT: setp.eq.b64 %p16, %rd68, 0;
; CHECK-NEXT: selp.b64 %rd125, 0, %rd4, %p15;
; CHECK-NEXT: selp.b64 %rd124, 0, %rd3, %p15;
; CHECK-NEXT: or.pred %p17, %p15, %p16;
@@ -63,7 +63,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd118, %rd66, 1;
; CHECK-NEXT: addc.cc.s64 %rd119, %rd8, 0;
; CHECK-NEXT: or.b64 %rd71, %rd118, %rd119;
-; CHECK-NEXT: setp.eq.s64 %p18, %rd71, 0;
+; CHECK-NEXT: setp.eq.b64 %p18, %rd71, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd66;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd72, %rd4, %r6;
@@ -117,7 +117,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd118, %rd118, -1;
; CHECK-NEXT: addc.cc.s64 %rd119, %rd119, -1;
; CHECK-NEXT: or.b64 %rd97, %rd118, %rd119;
-; CHECK-NEXT: setp.eq.s64 %p21, %rd97, 0;
+; CHECK-NEXT: setp.eq.b64 %p21, %rd97, 0;
; CHECK-NEXT: @%p21 bra $L__BB0_4;
; CHECK-NEXT: bra.uni $L__BB0_2;
; CHECK-NEXT: $L__BB0_4: // %udiv-loop-exit
@@ -155,18 +155,18 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: ld.param.v2.b64 {%rd41, %rd42}, [urem_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd3, %rd4}, [urem_i128_param_1];
; CHECK-NEXT: or.b64 %rd45, %rd3, %rd4;
-; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0;
+; CHECK-NEXT: setp.eq.b64 %p1, %rd45, 0;
; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42;
-; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd46, 0;
; CHECK-NEXT: or.pred %p3, %p1, %p2;
-; CHECK-NEXT: setp.ne.s64 %p4, %rd4, 0;
+; CHECK-NEXT: setp.ne.b64 %p4, %rd4, 0;
; CHECK-NEXT: clz.b64 %r1, %rd4;
; CHECK-NEXT: cvt.u64.u32 %rd47, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd3;
; CHECK-NEXT: cvt.u64.u32 %rd48, %r2;
; CHECK-NEXT: add.s64 %rd49, %rd48, 64;
; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4;
-; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0;
+; CHECK-NEXT: setp.ne.b64 %p5, %rd42, 0;
; CHECK-NEXT: clz.b64 %r3, %rd42;
; CHECK-NEXT: cvt.u64.u32 %rd51, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd41;
@@ -177,14 +177,14 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: sub.cc.s64 %rd5, %rd50, %rd54;
; CHECK-NEXT: subc.cc.s64 %rd6, %rd101, 0;
; CHECK-NEXT: setp.gt.u64 %p6, %rd5, 127;
-; CHECK-NEXT: setp.eq.s64 %p7, %rd6, 0;
+; CHECK-NEXT: setp.eq.b64 %p7, %rd6, 0;
; CHECK-NEXT: and.pred %p8, %p7, %p6;
-; CHECK-NEXT: setp.ne.s64 %p9, %rd6, 0;
+; CHECK-NEXT: setp.ne.b64 %p9, %rd6, 0;
; CHECK-NEXT: or.pred %p10, %p8, %p9;
; CHECK-NEXT: or.pred %p11, %p3, %p10;
; CHECK-NEXT: xor.b64 %rd56, %rd5, 127;
; CHECK-NEXT: or.b64 %rd57, %rd56, %rd6;
-; CHECK-NEXT: setp.eq.s64 %p12, %rd57, 0;
+; CHECK-NEXT: setp.eq.b64 %p12, %rd57, 0;
; CHECK-NEXT: selp.b64 %rd110, 0, %rd42, %p11;
; CHECK-NEXT: selp.b64 %rd109, 0, %rd41, %p11;
; CHECK-NEXT: or.pred %p13, %p11, %p12;
@@ -193,7 +193,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd103, %rd5, 1;
; CHECK-NEXT: addc.cc.s64 %rd104, %rd6, 0;
; CHECK-NEXT: or.b64 %rd60, %rd103, %rd104;
-; CHECK-NEXT: setp.eq.s64 %p14, %rd60, 0;
+; CHECK-NEXT: setp.eq.b64 %p14, %rd60, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd5;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd61, %rd42, %r6;
@@ -247,7 +247,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd103, %rd103, -1;
; CHECK-NEXT: addc.cc.s64 %rd104, %rd104, -1;
; CHECK-NEXT: or.b64 %rd86, %rd103, %rd104;
-; CHECK-NEXT: setp.eq.s64 %p17, %rd86, 0;
+; CHECK-NEXT: setp.eq.b64 %p17, %rd86, 0;
; CHECK-NEXT: @%p17 bra $L__BB1_4;
; CHECK-NEXT: bra.uni $L__BB1_2;
; CHECK-NEXT: $L__BB1_4: // %udiv-loop-exit
@@ -327,18 +327,18 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: xor.b64 %rd55, %rd50, %rd46;
; CHECK-NEXT: shr.s64 %rd5, %rd55, 63;
; CHECK-NEXT: or.b64 %rd56, %rd3, %rd4;
-; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0;
+; CHECK-NEXT: setp.eq.b64 %p3, %rd56, 0;
; CHECK-NEXT: or.b64 %rd57, %rd1, %rd2;
-; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0;
+; CHECK-NEXT: setp.eq.b64 %p4, %rd57, 0;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0;
+; CHECK-NEXT: setp.ne.b64 %p6, %rd4, 0;
; CHECK-NEXT: clz.b64 %r1, %rd4;
; CHECK-NEXT: cvt.u64.u32 %rd58, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd3;
; CHECK-NEXT: cvt.u64.u32 %rd59, %r2;
; CHECK-NEXT: add.s64 %rd60, %rd59, 64;
; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6;
-; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0;
+; CHECK-NEXT: setp.ne.b64 %p7, %rd2, 0;
; CHECK-NEXT: clz.b64 %r3, %rd2;
; CHECK-NEXT: cvt.u64.u32 %rd62, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd1;
@@ -348,16 +348,16 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: mov.b64 %rd111, 0;
; CHECK-NEXT: sub.cc.s64 %rd67, %rd61, %rd65;
; CHECK-NEXT: subc.cc.s64 %rd8, %rd111, 0;
-; CHECK-NEXT: setp.ne.s64 %p8, %rd8, 0;
+; CHECK-NEXT: setp.ne.b64 %p8, %rd8, 0;
; CHECK-NEXT: and.pred %p10, %p8, %p8;
-; CHECK-NEXT: setp.eq.s64 %p11, %rd8, 0;
+; CHECK-NEXT: setp.eq.b64 %p11, %rd8, 0;
; CHECK-NEXT: setp.gt.u64 %p12, %rd67, 127;
; CHECK-NEXT: and.pred %p13, %p11, %p12;
; CHECK-NEXT: or.pred %p14, %p13, %p10;
; CHECK-NEXT: or.pred %p15, %p5, %p14;
; CHECK-NEXT: xor.b64 %rd68, %rd67, 127;
; CHECK-NEXT: or.b64 %rd69, %rd68, %rd8;
-; CHECK-NEXT: setp.eq.s64 %p16, %rd69, 0;
+; CHECK-NEXT: setp.eq.b64 %p16, %rd69, 0;
; CHECK-NEXT: selp.b64 %rd120, 0, %rd2, %p15;
; CHECK-NEXT: selp.b64 %rd119, 0, %rd1, %p15;
; CHECK-NEXT: or.pred %p17, %p15, %p16;
@@ -366,7 +366,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd113, %rd67, 1;
; CHECK-NEXT: addc.cc.s64 %rd114, %rd8, 0;
; CHECK-NEXT: or.b64 %rd72, %rd113, %rd114;
-; CHECK-NEXT: setp.eq.s64 %p18, %rd72, 0;
+; CHECK-NEXT: setp.eq.b64 %p18, %rd72, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd67;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd73, %rd2, %r6;
@@ -420,7 +420,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd113, %rd113, -1;
; CHECK-NEXT: addc.cc.s64 %rd114, %rd114, -1;
; CHECK-NEXT: or.b64 %rd98, %rd113, %rd114;
-; CHECK-NEXT: setp.eq.s64 %p21, %rd98, 0;
+; CHECK-NEXT: setp.eq.b64 %p21, %rd98, 0;
; CHECK-NEXT: @%p21 bra $L__BB4_4;
; CHECK-NEXT: bra.uni $L__BB4_2;
; CHECK-NEXT: $L__BB4_4: // %udiv-loop-exit
@@ -452,18 +452,18 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: ld.param.v2.b64 {%rd41, %rd42}, [udiv_i128_param_0];
; CHECK-NEXT: ld.param.v2.b64 {%rd43, %rd44}, [udiv_i128_param_1];
; CHECK-NEXT: or.b64 %rd45, %rd43, %rd44;
-; CHECK-NEXT: setp.eq.s64 %p1, %rd45, 0;
+; CHECK-NEXT: setp.eq.b64 %p1, %rd45, 0;
; CHECK-NEXT: or.b64 %rd46, %rd41, %rd42;
-; CHECK-NEXT: setp.eq.s64 %p2, %rd46, 0;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd46, 0;
; CHECK-NEXT: or.pred %p3, %p1, %p2;
-; CHECK-NEXT: setp.ne.s64 %p4, %rd44, 0;
+; CHECK-NEXT: setp.ne.b64 %p4, %rd44, 0;
; CHECK-NEXT: clz.b64 %r1, %rd44;
; CHECK-NEXT: cvt.u64.u32 %rd47, %r1;
; CHECK-NEXT: clz.b64 %r2, %rd43;
; CHECK-NEXT: cvt.u64.u32 %rd48, %r2;
; CHECK-NEXT: add.s64 %rd49, %rd48, 64;
; CHECK-NEXT: selp.b64 %rd50, %rd47, %rd49, %p4;
-; CHECK-NEXT: setp.ne.s64 %p5, %rd42, 0;
+; CHECK-NEXT: setp.ne.b64 %p5, %rd42, 0;
; CHECK-NEXT: clz.b64 %r3, %rd42;
; CHECK-NEXT: cvt.u64.u32 %rd51, %r3;
; CHECK-NEXT: clz.b64 %r4, %rd41;
@@ -474,14 +474,14 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: sub.cc.s64 %rd5, %rd50, %rd54;
; CHECK-NEXT: subc.cc.s64 %rd6, %rd95, 0;
; CHECK-NEXT: setp.gt.u64 %p6, %rd5, 127;
-; CHECK-NEXT: setp.eq.s64 %p7, %rd6, 0;
+; CHECK-NEXT: setp.eq.b64 %p7, %rd6, 0;
; CHECK-NEXT: and.pred %p8, %p7, %p6;
-; CHECK-NEXT: setp.ne.s64 %p9, %rd6, 0;
+; CHECK-NEXT: setp.ne.b64 %p9, %rd6, 0;
; CHECK-NEXT: or.pred %p10, %p8, %p9;
; CHECK-NEXT: or.pred %p11, %p3, %p10;
; CHECK-NEXT: xor.b64 %rd56, %rd5, 127;
; CHECK-NEXT: or.b64 %rd57, %rd56, %rd6;
-; CHECK-NEXT: setp.eq.s64 %p12, %rd57, 0;
+; CHECK-NEXT: setp.eq.b64 %p12, %rd57, 0;
; CHECK-NEXT: selp.b64 %rd104, 0, %rd42, %p11;
; CHECK-NEXT: selp.b64 %rd103, 0, %rd41, %p11;
; CHECK-NEXT: or.pred %p13, %p11, %p12;
@@ -490,7 +490,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd97, %rd5, 1;
; CHECK-NEXT: addc.cc.s64 %rd98, %rd6, 0;
; CHECK-NEXT: or.b64 %rd60, %rd97, %rd98;
-; CHECK-NEXT: setp.eq.s64 %p14, %rd60, 0;
+; CHECK-NEXT: setp.eq.b64 %p14, %rd60, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd5;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
; CHECK-NEXT: shl.b64 %rd61, %rd42, %r6;
@@ -544,7 +544,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd97, %rd97, -1;
; CHECK-NEXT: addc.cc.s64 %rd98, %rd98, -1;
; CHECK-NEXT: or.b64 %rd86, %rd97, %rd98;
-; CHECK-NEXT: setp.eq.s64 %p17, %rd86, 0;
+; CHECK-NEXT: setp.eq.b64 %p17, %rd86, 0;
; CHECK-NEXT: @%p17 bra $L__BB5_4;
; CHECK-NEXT: bra.uni $L__BB5_2;
; CHECK-NEXT: $L__BB5_4: // %udiv-loop-exit
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index e89ab7a5605c3..2b7a06c33d948 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -98,7 +98,7 @@ define i16 @test_extract_i(<2 x i16> %a, i64 %idx) #0 {
; COMMON-NEXT: // %bb.0:
; COMMON-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
; COMMON-NEXT: ld.param.b32 %r1, [test_extract_i_param_0];
-; COMMON-NEXT: setp.eq.s64 %p1, %rd1, 0;
+; COMMON-NEXT: setp.eq.b64 %p1, %rd1, 0;
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; COMMON-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; COMMON-NEXT: cvt.u32.u16 %r2, %rs3;
@@ -735,8 +735,8 @@ define <2 x i16> @test_select_cc(<2 x i16> %a, <2 x i16> %b, <2 x i16> %c, <2 x
; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_param_0];
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r4;
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r3;
-; COMMON-NEXT: setp.ne.s16 %p1, %rs3, %rs1;
-; COMMON-NEXT: setp.ne.s16 %p2, %rs4, %rs2;
+; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1;
+; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2;
; COMMON-NEXT: mov.b32 {%rs5, %rs6}, %r2;
; COMMON-NEXT: mov.b32 {%rs7, %rs8}, %r1;
; COMMON-NEXT: selp.b16 %rs9, %rs8, %rs6, %p2;
@@ -762,8 +762,8 @@ define <2 x i32> @test_select_cc_i32_i16(<2 x i32> %a, <2 x i32> %b,
; COMMON-NEXT: ld.param.b32 %r5, [test_select_cc_i32_i16_param_2];
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r6;
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r5;
-; COMMON-NEXT: setp.ne.s16 %p1, %rs3, %rs1;
-; COMMON-NEXT: setp.ne.s16 %p2, %rs4, %rs2;
+; COMMON-NEXT: setp.ne.b16 %p1, %rs3, %rs1;
+; COMMON-NEXT: setp.ne.b16 %p2, %rs4, %rs2;
; COMMON-NEXT: selp.b32 %r7, %r2, %r4, %p2;
; COMMON-NEXT: selp.b32 %r8, %r1, %r3, %p1;
; COMMON-NEXT: st.param.v2.b32 [func_retval0], {%r8, %r7};
@@ -786,8 +786,8 @@ define <2 x i16> @test_select_cc_i16_i32(<2 x i16> %a, <2 x i16> %b,
; COMMON-NEXT: ld.param.v2.b32 {%r3, %r4}, [test_select_cc_i16_i32_param_2];
; COMMON-NEXT: ld.param.b32 %r2, [test_select_cc_i16_i32_param_1];
; COMMON-NEXT: ld.param.b32 %r1, [test_select_cc_i16_i32_param_0];
-; COMMON-NEXT: setp.ne.s32 %p1, %r3, %r5;
-; COMMON-NEXT: setp.ne.s32 %p2, %r4, %r6;
+; COMMON-NEXT: setp.ne.b32 %p1, %r3, %r5;
+; COMMON-NEXT: setp.ne.b32 %p2, %r4, %r6;
; COMMON-NEXT: mov.b32 {%rs1, %rs2}, %r2;
; COMMON-NEXT: mov.b32 {%rs3, %rs4}, %r1;
; COMMON-NEXT: selp.b16 %rs5, %rs4, %rs2, %p2;
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index fd2e56bb126bb..328da60a1f783 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -300,16 +300,16 @@ define <4 x i8> @test_umax(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_umax_param_0];
; CHECK-NEXT: bfe.u32 %r3, %r2, 0, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 0, 8;
-; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3;
+; CHECK-NEXT: setp.gt.u32 %p1, %r4, %r3;
; CHECK-NEXT: bfe.u32 %r5, %r2, 8, 8;
; CHECK-NEXT: bfe.u32 %r6, %r1, 8, 8;
-; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5;
+; CHECK-NEXT: setp.gt.u32 %p2, %r6, %r5;
; CHECK-NEXT: bfe.u32 %r7, %r2, 16, 8;
; CHECK-NEXT: bfe.u32 %r8, %r1, 16, 8;
-; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7;
+; CHECK-NEXT: setp.gt.u32 %p3, %r8, %r7;
; CHECK-NEXT: bfe.u32 %r9, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
-; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9;
+; CHECK-NEXT: setp.gt.u32 %p4, %r10, %r9;
; CHECK-NEXT: selp.b32 %r11, %r10, %r9, %p4;
; CHECK-NEXT: selp.b32 %r12, %r8, %r7, %p3;
; CHECK-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
@@ -378,16 +378,16 @@ define <4 x i8> @test_umin(<4 x i8> %a, <4 x i8> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_umin_param_0];
; CHECK-NEXT: bfe.u32 %r3, %r2, 0, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 0, 8;
-; CHECK-NEXT: setp.ls.u32 %p1, %r4, %r3;
+; CHECK-NEXT: setp.le.u32 %p1, %r4, %r3;
; CHECK-NEXT: bfe.u32 %r5, %r2, 8, 8;
; CHECK-NEXT: bfe.u32 %r6, %r1, 8, 8;
-; CHECK-NEXT: setp.ls.u32 %p2, %r6, %r5;
+; CHECK-NEXT: setp.le.u32 %p2, %r6, %r5;
; CHECK-NEXT: bfe.u32 %r7, %r2, 16, 8;
; CHECK-NEXT: bfe.u32 %r8, %r1, 16, 8;
-; CHECK-NEXT: setp.ls.u32 %p3, %r8, %r7;
+; CHECK-NEXT: setp.le.u32 %p3, %r8, %r7;
; CHECK-NEXT: bfe.u32 %r9, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
-; CHECK-NEXT: setp.ls.u32 %p4, %r10, %r9;
+; CHECK-NEXT: setp.le.u32 %p4, %r10, %r9;
; CHECK-NEXT: selp.b32 %r11, %r10, %r9, %p4;
; CHECK-NEXT: selp.b32 %r12, %r8, %r7, %p3;
; CHECK-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
@@ -414,16 +414,16 @@ define <4 x i8> @test_eq(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_eq_param_0];
; CHECK-NEXT: bfe.u32 %r4, %r2, 0, 8;
; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8;
-; CHECK-NEXT: setp.eq.u32 %p1, %r5, %r4;
+; CHECK-NEXT: setp.eq.b32 %p1, %r5, %r4;
; CHECK-NEXT: bfe.u32 %r6, %r2, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
-; CHECK-NEXT: setp.eq.u32 %p2, %r7, %r6;
+; CHECK-NEXT: setp.eq.b32 %p2, %r7, %r6;
; CHECK-NEXT: bfe.u32 %r8, %r2, 16, 8;
; CHECK-NEXT: bfe.u32 %r9, %r1, 16, 8;
-; CHECK-NEXT: setp.eq.u32 %p3, %r9, %r8;
+; CHECK-NEXT: setp.eq.b32 %p3, %r9, %r8;
; CHECK-NEXT: bfe.u32 %r10, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r11, %r1, 24, 8;
-; CHECK-NEXT: setp.eq.u32 %p4, %r11, %r10;
+; CHECK-NEXT: setp.eq.b32 %p4, %r11, %r10;
; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4;
; CHECK-NEXT: bfe.u32 %r14, %r3, 16, 8;
@@ -454,16 +454,16 @@ define <4 x i8> @test_ne(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_ne_param_0];
; CHECK-NEXT: bfe.u32 %r4, %r2, 0, 8;
; CHECK-NEXT: bfe.u32 %r5, %r1, 0, 8;
-; CHECK-NEXT: setp.ne.u32 %p1, %r5, %r4;
+; CHECK-NEXT: setp.ne.b32 %p1, %r5, %r4;
; CHECK-NEXT: bfe.u32 %r6, %r2, 8, 8;
; CHECK-NEXT: bfe.u32 %r7, %r1, 8, 8;
-; CHECK-NEXT: setp.ne.u32 %p2, %r7, %r6;
+; CHECK-NEXT: setp.ne.b32 %p2, %r7, %r6;
; CHECK-NEXT: bfe.u32 %r8, %r2, 16, 8;
; CHECK-NEXT: bfe.u32 %r9, %r1, 16, 8;
-; CHECK-NEXT: setp.ne.u32 %p3, %r9, %r8;
+; CHECK-NEXT: setp.ne.b32 %p3, %r9, %r8;
; CHECK-NEXT: bfe.u32 %r10, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r11, %r1, 24, 8;
-; CHECK-NEXT: setp.ne.u32 %p4, %r11, %r10;
+; CHECK-NEXT: setp.ne.b32 %p4, %r11, %r10;
; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
; CHECK-NEXT: selp.b32 %r13, %r11, %r12, %p4;
; CHECK-NEXT: bfe.u32 %r14, %r3, 16, 8;
@@ -920,16 +920,16 @@ define <4 x i8> @test_select_cc(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8>
; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_param_0];
; CHECK-NEXT: bfe.u32 %r5, %r4, 0, 8;
; CHECK-NEXT: bfe.u32 %r6, %r3, 0, 8;
-; CHECK-NEXT: setp.ne.u32 %p1, %r6, %r5;
+; CHECK-NEXT: setp.ne.b32 %p1, %r6, %r5;
; CHECK-NEXT: bfe.u32 %r7, %r4, 8, 8;
; CHECK-NEXT: bfe.u32 %r8, %r3, 8, 8;
-; CHECK-NEXT: setp.ne.u32 %p2, %r8, %r7;
+; CHECK-NEXT: setp.ne.b32 %p2, %r8, %r7;
; CHECK-NEXT: bfe.u32 %r9, %r4, 16, 8;
; CHECK-NEXT: bfe.u32 %r10, %r3, 16, 8;
-; CHECK-NEXT: setp.ne.u32 %p3, %r10, %r9;
+; CHECK-NEXT: setp.ne.b32 %p3, %r10, %r9;
; CHECK-NEXT: bfe.u32 %r11, %r4, 24, 8;
; CHECK-NEXT: bfe.u32 %r12, %r3, 24, 8;
-; CHECK-NEXT: setp.ne.u32 %p4, %r12, %r11;
+; CHECK-NEXT: setp.ne.b32 %p4, %r12, %r11;
; CHECK-NEXT: bfe.u32 %r13, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r14, %r1, 24, 8;
; CHECK-NEXT: selp.b32 %r15, %r14, %r13, %p4;
@@ -965,16 +965,16 @@ define <4 x i32> @test_select_cc_i32_i8(<4 x i32> %a, <4 x i32> %b,
; CHECK-NEXT: ld.param.b32 %r9, [test_select_cc_i32_i8_param_2];
; CHECK-NEXT: bfe.u32 %r11, %r10, 0, 8;
; CHECK-NEXT: bfe.u32 %r12, %r9, 0, 8;
-; CHECK-NEXT: setp.ne.u32 %p1, %r12, %r11;
+; CHECK-NEXT: setp.ne.b32 %p1, %r12, %r11;
; CHECK-NEXT: bfe.u32 %r13, %r10, 8, 8;
; CHECK-NEXT: bfe.u32 %r14, %r9, 8, 8;
-; CHECK-NEXT: setp.ne.u32 %p2, %r14, %r13;
+; CHECK-NEXT: setp.ne.b32 %p2, %r14, %r13;
; CHECK-NEXT: bfe.u32 %r15, %r10, 16, 8;
; CHECK-NEXT: bfe.u32 %r16, %r9, 16, 8;
-; CHECK-NEXT: setp.ne.u32 %p3, %r16, %r15;
+; CHECK-NEXT: setp.ne.b32 %p3, %r16, %r15;
; CHECK-NEXT: bfe.u32 %r17, %r10, 24, 8;
; CHECK-NEXT: bfe.u32 %r18, %r9, 24, 8;
-; CHECK-NEXT: setp.ne.u32 %p4, %r18, %r17;
+; CHECK-NEXT: setp.ne.b32 %p4, %r18, %r17;
; CHECK-NEXT: selp.b32 %r19, %r4, %r8, %p4;
; CHECK-NEXT: selp.b32 %r20, %r3, %r7, %p3;
; CHECK-NEXT: selp.b32 %r21, %r2, %r6, %p2;
@@ -998,10 +998,10 @@ define <4 x i8> @test_select_cc_i8_i32(<4 x i8> %a, <4 x i8> %b,
; CHECK-NEXT: ld.param.v4.b32 {%r3, %r4, %r5, %r6}, [test_select_cc_i8_i32_param_2];
; CHECK-NEXT: ld.param.b32 %r2, [test_select_cc_i8_i32_param_1];
; CHECK-NEXT: ld.param.b32 %r1, [test_select_cc_i8_i32_param_0];
-; CHECK-NEXT: setp.ne.s32 %p1, %r3, %r7;
-; CHECK-NEXT: setp.ne.s32 %p2, %r4, %r8;
-; CHECK-NEXT: setp.ne.s32 %p3, %r5, %r9;
-; CHECK-NEXT: setp.ne.s32 %p4, %r6, %r10;
+; CHECK-NEXT: setp.ne.b32 %p1, %r3, %r7;
+; CHECK-NEXT: setp.ne.b32 %p2, %r4, %r8;
+; CHECK-NEXT: setp.ne.b32 %p3, %r5, %r9;
+; CHECK-NEXT: setp.ne.b32 %p4, %r6, %r10;
; CHECK-NEXT: bfe.u32 %r11, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r12, %r1, 24, 8;
; CHECK-NEXT: selp.b32 %r13, %r12, %r11, %p4;
@@ -1421,16 +1421,16 @@ define void @test_sext_v4i1_to_v4i8(ptr %a, ptr %b, ptr %c) {
; CHECK-NEXT: ld.b32 %r2, [%rd2];
; CHECK-NEXT: bfe.u32 %r3, %r2, 0, 8;
; CHECK-NEXT: bfe.u32 %r4, %r1, 0, 8;
-; CHECK-NEXT: setp.hi.u32 %p1, %r4, %r3;
+; CHECK-NEXT: setp.gt.u32 %p1, %r4, %r3;
; CHECK-NEXT: bfe.u32 %r5, %r2, 8, 8;
; CHECK-NEXT: bfe.u32 %r6, %r1, 8, 8;
-; CHECK-NEXT: setp.hi.u32 %p2, %r6, %r5;
+; CHECK-NEXT: setp.gt.u32 %p2, %r6, %r5;
; CHECK-NEXT: bfe.u32 %r7, %r2, 16, 8;
; CHECK-NEXT: bfe.u32 %r8, %r1, 16, 8;
-; CHECK-NEXT: setp.hi.u32 %p3, %r8, %r7;
+; CHECK-NEXT: setp.gt.u32 %p3, %r8, %r7;
; CHECK-NEXT: bfe.u32 %r9, %r2, 24, 8;
; CHECK-NEXT: bfe.u32 %r10, %r1, 24, 8;
-; CHECK-NEXT: setp.hi.u32 %p4, %r10, %r9;
+; CHECK-NEXT: setp.gt.u32 %p4, %r10, %r9;
; CHECK-NEXT: selp.b32 %r11, -1, 0, %p4;
; CHECK-NEXT: selp.b32 %r12, -1, 0, %p3;
; CHECK-NEXT: prmt.b32 %r13, %r12, %r11, 0x3340U;
diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
index 5cfdbb7447ad8..307e2c8550914 100644
--- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
+++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
@@ -62,7 +62,7 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) {
; CHECK-NEXT: ld.param.b64 %rd2, [test_b128_input_from_select_param_0];
; CHECK-NEXT: cvta.to.global.u64 %rd3, %rd2;
; CHECK-NEXT: ld.global.b8 %rs1, [%rd3];
-; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0;
+; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0;
; CHECK-NEXT: selp.b64 %rd4, 24, 42, %p1;
; CHECK-NEXT: mov.b64 %rd5, 0;
; CHECK-NEXT: mov.b128 %rq1, {%rd4, %rd5};
diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
index 6dbf44f38aa2f..037d7df1aee59 100644
--- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
+++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test3.ll
@@ -16,7 +16,7 @@ define void @test_b128_in_loop() {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.global.s32 %rd1, [size];
-; CHECK-NEXT: setp.eq.s64 %p1, %rd1, 0;
+; CHECK-NEXT: setp.eq.b64 %p1, %rd1, 0;
; CHECK-NEXT: @%p1 bra $L__BB0_3;
; CHECK-NEXT: // %bb.1: // %BB1
; CHECK-NEXT: ld.global.v2.b64 {%rd12, %rd13}, [x];
@@ -36,7 +36,7 @@ define void @test_b128_in_loop() {
; CHECK-NEXT: mov.b128 {%rd12, %rd13}, %rq1;
; CHECK-NEXT: st.global.v2.b64 [x], {%rd12, %rd13};
; CHECK-NEXT: add.s64 %rd14, %rd14, 1;
-; CHECK-NEXT: setp.ne.s64 %p2, %rd1, %rd14;
+; CHECK-NEXT: setp.ne.b64 %p2, %rd1, %rd14;
; CHECK-NEXT: @%p2 bra $L__BB0_2;
; CHECK-NEXT: $L__BB0_3: // %BB3
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/jump-table.ll b/llvm/test/CodeGen/NVPTX/jump-table.ll
index 955befc624c71..a6238352179ca 100644
--- a/llvm/test/CodeGen/NVPTX/jump-table.ll
+++ b/llvm/test/CodeGen/NVPTX/jump-table.ll
@@ -99,7 +99,7 @@ define i32 @test2(i32 %tmp158) {
; CHECK-NEXT: st.param.b32 [func_retval0], 12;
; CHECK-NEXT: ret;
; CHECK-NEXT: $L__BB1_5: // %entry
-; CHECK-NEXT: setp.eq.s32 %p3, %r1, 1024;
+; CHECK-NEXT: setp.eq.b32 %p3, %r1, 1024;
; CHECK-NEXT: @%p3 bra $L__BB1_3;
; CHECK-NEXT: bra.uni $L__BB1_6;
; CHECK-NEXT: $L__BB1_3: // %bb338
diff --git a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
index d494ee30c2821..b6a00e03a80ab 100644
--- a/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
+++ b/llvm/test/CodeGen/NVPTX/load-with-non-coherent-cache.ll
@@ -613,7 +613,7 @@ define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
; SM20-NEXT: add.rn.f32 %r9, %r7, %r9;
; SM20-NEXT: add.s64 %rd7, %rd7, 4;
; SM20-NEXT: add.s32 %r8, %r8, -1;
-; SM20-NEXT: setp.ne.s32 %p1, %r8, 0;
+; SM20-NEXT: setp.ne.b32 %p1, %r8, 0;
; SM20-NEXT: @%p1 bra $L__BB18_1;
; SM20-NEXT: // %bb.2: // %exit
; SM20-NEXT: st.global.b32 [%rd2], %r9;
@@ -638,7 +638,7 @@ define ptx_kernel void @foo19(ptr noalias readonly %from, ptr %to, i32 %n) {
; SM35-NEXT: add.rn.f32 %r9, %r7, %r9;
; SM35-NEXT: add.s64 %rd7, %rd7, 4;
; SM35-NEXT: add.s32 %r8, %r8, -1;
-; SM35-NEXT: setp.ne.s32 %p1, %r8, 0;
+; SM35-NEXT: setp.ne.b32 %p1, %r8, 0;
; SM35-NEXT: @%p1 bra $L__BB18_1;
; SM35-NEXT: // %bb.2: // %exit
; SM35-NEXT: st.global.b32 [%rd2], %r9;
diff --git a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
index 99212fc0dff79..297b2b984cdae 100644
--- a/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-aggr-copies.ll
@@ -160,7 +160,7 @@ entry:
; PTX-LABEL: .visible .func (.param .b64 func_retval0) memmove_caller(
; PTX: ld.param.b64 %rd[[N:[0-9]+]]
-; PTX-DAG: setp.eq.s64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0
+; PTX-DAG: setp.eq.b64 %p[[NEQ0:[0-9]+]], %rd[[N]], 0
; PTX-DAG: setp.ge.u64 %p[[SRC_GT_THAN_DST:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
; PTX-NEXT: @%p[[SRC_GT_THAN_DST]] bra $L__BB[[FORWARD_BB:[0-9_]+]]
; -- this is the backwards copying BB
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins.ll b/llvm/test/CodeGen/NVPTX/math-intrins.ll
index dde71b009d564..e9635e9393984 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins.ll
@@ -621,9 +621,9 @@ define half @minimum_half(half %a, half %b) {
; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs1, -32768;
; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs2, -32768;
; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -657,9 +657,9 @@ define half @minimum_half(half %a, half %b) {
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, -32768;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs1, -32768;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, -32768;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs2, -32768;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -682,9 +682,9 @@ define float @minimum_float(float %a, float %b) {
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2;
; CHECK-NOF16-NEXT: min.f32 %r3, %r1, %r2;
; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648;
; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648;
; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
@@ -727,7 +727,7 @@ define float @minimum_imm1(float %a) {
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648;
; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2;
; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3;
@@ -768,7 +768,7 @@ define float @minimum_imm2(float %a) {
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r1;
; CHECK-NOF16-NEXT: min.f32 %r2, %r1, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r3, 0f7FC00000, %r2, %p1;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648;
; CHECK-NOF16-NEXT: selp.f32 %r4, %r1, %r3, %p2;
; CHECK-NOF16-NEXT: setp.eq.f32 %p3, %r3, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r5, %r4, %r3, %p3;
@@ -810,9 +810,9 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2;
; CHECK-NOF16-NEXT: min.ftz.f32 %r3, %r1, %r2;
; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, -2147483648;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, -2147483648;
; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, -2147483648;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, -2147483648;
; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
@@ -826,7 +826,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
; CHECK-F16-NEXT: // %bb.0:
; CHECK-F16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0];
; CHECK-F16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-F16-NEXT: min.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-F16-NEXT: min.ftz.NaN.f32 %r3, %r1, %r2;
; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-F16-NEXT: ret;
;
@@ -837,7 +837,7 @@ define float @minimum_float_ftz(float %a, float %b) #1 {
; CHECK-SM80-NOF16-NEXT: // %bb.0:
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [minimum_float_ftz_param_0];
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [minimum_float_ftz_param_1];
-; CHECK-SM80-NOF16-NEXT: min.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-SM80-NOF16-NEXT: min.ftz.NaN.f32 %r3, %r1, %r2;
; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-SM80-NOF16-NEXT: ret;
%x = call float @llvm.minimum.f32(float %a, float %b)
@@ -856,9 +856,9 @@ define double @minimum_double(double %a, double %b) {
; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2;
; CHECK-NEXT: min.f64 %rd3, %rd1, %rd2;
; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT: setp.eq.s64 %p2, %rd1, -9223372036854775808;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd1, -9223372036854775808;
; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2;
-; CHECK-NEXT: setp.eq.s64 %p3, %rd2, -9223372036854775808;
+; CHECK-NEXT: setp.eq.b64 %p3, %rd2, -9223372036854775808;
; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3;
; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000;
; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4;
@@ -884,9 +884,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1;
; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs2, -32768;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs2, -32768;
; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs4, -32768;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs4, -32768;
; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs6;
; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -897,9 +897,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6;
; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4;
; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs1, -32768;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p8, %rs1, -32768;
; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs3, -32768;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p9, %rs3, -32768;
; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs11;
; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000;
@@ -933,9 +933,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs2, -32768;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs2, -32768;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs4, -32768;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs4, -32768;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs6;
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -946,9 +946,9 @@ define <2 x half> @minimum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs1, -32768;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p8, %rs1, -32768;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs3, -32768;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p9, %rs3, -32768;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs11;
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000;
@@ -1152,9 +1152,9 @@ define half @maximum_half(half %a, half %b) {
; CHECK-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs1, 0;
; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs2, 0;
; CHECK-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1188,9 +1188,9 @@ define half @maximum_half(half %a, half %b) {
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs3, %rs1, %rs2, %p1;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs4, 0x7E00, %rs3, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs1, 0;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs1, %rs4, %p3;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs2, 0;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, %rs2, %rs5, %p4;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs4;
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1291,9 +1291,9 @@ define float @maximum_float(float %a, float %b) {
; CHECK-NOF16-NEXT: setp.nan.f32 %p1, %r1, %r2;
; CHECK-NOF16-NEXT: max.f32 %r3, %r1, %r2;
; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0;
; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0;
; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
; CHECK-NOF16-NEXT: setp.eq.f32 %p4, %r4, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
@@ -1337,9 +1337,9 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
; CHECK-NOF16-NEXT: setp.nan.ftz.f32 %p1, %r1, %r2;
; CHECK-NOF16-NEXT: max.ftz.f32 %r3, %r1, %r2;
; CHECK-NOF16-NEXT: selp.f32 %r4, 0f7FC00000, %r3, %p1;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p2, %r1, 0;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p2, %r1, 0;
; CHECK-NOF16-NEXT: selp.f32 %r5, %r1, %r4, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s32 %p3, %r2, 0;
+; CHECK-NOF16-NEXT: setp.eq.b32 %p3, %r2, 0;
; CHECK-NOF16-NEXT: selp.f32 %r6, %r2, %r5, %p3;
; CHECK-NOF16-NEXT: setp.eq.ftz.f32 %p4, %r4, 0f00000000;
; CHECK-NOF16-NEXT: selp.f32 %r7, %r6, %r4, %p4;
@@ -1353,7 +1353,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
; CHECK-F16-NEXT: // %bb.0:
; CHECK-F16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0];
; CHECK-F16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-F16-NEXT: max.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-F16-NEXT: max.ftz.NaN.f32 %r3, %r1, %r2;
; CHECK-F16-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-F16-NEXT: ret;
;
@@ -1364,7 +1364,7 @@ define float @maximum_float_ftz(float %a, float %b) #1 {
; CHECK-SM80-NOF16-NEXT: // %bb.0:
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r1, [maximum_float_ftz_param_0];
; CHECK-SM80-NOF16-NEXT: ld.param.b32 %r2, [maximum_float_ftz_param_1];
-; CHECK-SM80-NOF16-NEXT: max.NaN.ftz.f32 %r3, %r1, %r2;
+; CHECK-SM80-NOF16-NEXT: max.ftz.NaN.f32 %r3, %r1, %r2;
; CHECK-SM80-NOF16-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-SM80-NOF16-NEXT: ret;
%x = call float @llvm.maximum.f32(float %a, float %b)
@@ -1383,9 +1383,9 @@ define double @maximum_double(double %a, double %b) {
; CHECK-NEXT: setp.nan.f64 %p1, %rd1, %rd2;
; CHECK-NEXT: max.f64 %rd3, %rd1, %rd2;
; CHECK-NEXT: selp.f64 %rd4, 0d7FF8000000000000, %rd3, %p1;
-; CHECK-NEXT: setp.eq.s64 %p2, %rd1, 0;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd1, 0;
; CHECK-NEXT: selp.f64 %rd5, %rd1, %rd4, %p2;
-; CHECK-NEXT: setp.eq.s64 %p3, %rd2, 0;
+; CHECK-NEXT: setp.eq.b64 %p3, %rd2, 0;
; CHECK-NEXT: selp.f64 %rd6, %rd2, %rd5, %p3;
; CHECK-NEXT: setp.eq.f64 %p4, %rd4, 0d0000000000000000;
; CHECK-NEXT: selp.f64 %rd7, %rd6, %rd4, %p4;
@@ -1411,9 +1411,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1;
; CHECK-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p3, %rs2, 0;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p3, %rs2, 0;
; CHECK-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p4, %rs4, 0;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p4, %rs4, 0;
; CHECK-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r3, %rs6;
; CHECK-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1424,9 +1424,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6;
; CHECK-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4;
; CHECK-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p8, %rs1, 0;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p8, %rs1, 0;
; CHECK-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-NOF16-NEXT: setp.eq.s16 %p9, %rs3, 0;
+; CHECK-NOF16-NEXT: setp.eq.b16 %p9, %rs3, 0;
; CHECK-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9;
; CHECK-NOF16-NEXT: cvt.f32.f16 %r6, %rs11;
; CHECK-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000;
@@ -1460,9 +1460,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs5, %rs2, %rs4, %p1;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p2, %r2, %r1;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs6, 0x7E00, %rs5, %p2;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p3, %rs2, 0;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p3, %rs2, 0;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs7, %rs2, %rs6, %p3;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p4, %rs4, 0;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p4, %rs4, 0;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs8, %rs4, %rs7, %p4;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r3, %rs6;
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p5, %r3, 0f00000000;
@@ -1473,9 +1473,9 @@ define <2 x half> @maximum_v2half(<2 x half> %a, <2 x half> %b) {
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs10, %rs1, %rs3, %p6;
; CHECK-SM80-NOF16-NEXT: setp.nan.f32 %p7, %r5, %r4;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs11, 0x7E00, %rs10, %p7;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p8, %rs1, 0;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p8, %rs1, 0;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs12, %rs1, %rs11, %p8;
-; CHECK-SM80-NOF16-NEXT: setp.eq.s16 %p9, %rs3, 0;
+; CHECK-SM80-NOF16-NEXT: setp.eq.b16 %p9, %rs3, 0;
; CHECK-SM80-NOF16-NEXT: selp.b16 %rs13, %rs3, %rs12, %p9;
; CHECK-SM80-NOF16-NEXT: cvt.f32.f16 %r6, %rs11;
; CHECK-SM80-NOF16-NEXT: setp.eq.f32 %p10, %r6, 0f00000000;
diff --git a/llvm/test/CodeGen/NVPTX/sext-setcc.ll b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
index 0af8190f20d18..f6e6196345fcb 100644
--- a/llvm/test/CodeGen/NVPTX/sext-setcc.ll
+++ b/llvm/test/CodeGen/NVPTX/sext-setcc.ll
@@ -12,8 +12,8 @@ define <2 x i16> @sext_setcc_v2i1_to_v2i16(ptr %p) {
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: ld.param.b64 %rd1, [sext_setcc_v2i1_to_v2i16_param_0];
; CHECK-NEXT: ld.v2.b16 {%rs1, %rs2}, [%rd1];
-; CHECK-NEXT: setp.eq.s16 %p1, %rs1, 0;
-; CHECK-NEXT: setp.eq.s16 %p2, %rs2, 0;
+; CHECK-NEXT: setp.eq.b16 %p1, %rs1, 0;
+; CHECK-NEXT: setp.eq.b16 %p2, %rs2, 0;
; CHECK-NEXT: selp.b16 %rs3, -1, 0, %p2;
; CHECK-NEXT: selp.b16 %rs4, -1, 0, %p1;
; CHECK-NEXT: st.param.v2.b16 [func_retval0], {%rs4, %rs3};
@@ -39,19 +39,19 @@ define <4 x i8> @sext_setcc_v4i1_to_v4i8(ptr %p) {
; CHECK-NEXT: bfe.u32 %r2, %r1, 0, 8;
; CHECK-NEXT: cvt.u16.u32 %rs1, %r2;
; CHECK-NEXT: and.b16 %rs2, %rs1, 255;
-; CHECK-NEXT: setp.eq.s16 %p1, %rs2, 0;
+; CHECK-NEXT: setp.eq.b16 %p1, %rs2, 0;
; CHECK-NEXT: bfe.u32 %r3, %r1, 8, 8;
; CHECK-NEXT: cvt.u16.u32 %rs3, %r3;
; CHECK-NEXT: and.b16 %rs4, %rs3, 255;
-; CHECK-NEXT: setp.eq.s16 %p2, %rs4, 0;
+; CHECK-NEXT: setp.eq.b16 %p2, %rs4, 0;
; CHECK-NEXT: bfe.u32 %r4, %r1, 16, 8;
; CHECK-NEXT: cvt.u16.u32 %rs5, %r4;
; CHECK-NEXT: and.b16 %rs6, %rs5, 255;
-; CHECK-NEXT: setp.eq.s16 %p3, %rs6, 0;
+; CHECK-NEXT: setp.eq.b16 %p3, %rs6, 0;
; CHECK-NEXT: bfe.u32 %r5, %r1, 24, 8;
; CHECK-NEXT: cvt.u16.u32 %rs7, %r5;
; CHECK-NEXT: and.b16 %rs8, %rs7, 255;
-; CHECK-NEXT: setp.eq.s16 %p4, %rs8, 0;
+; CHECK-NEXT: setp.eq.b16 %p4, %rs8, 0;
; CHECK-NEXT: selp.b32 %r6, -1, 0, %p4;
; CHECK-NEXT: selp.b32 %r7, -1, 0, %p3;
; CHECK-NEXT: prmt.b32 %r8, %r7, %r6, 0x3340U;
diff --git a/llvm/test/CodeGen/NVPTX/tid-range.ll b/llvm/test/CodeGen/NVPTX/tid-range.ll
index 019814e47c2b1..3ec33eace6441 100644
--- a/llvm/test/CodeGen/NVPTX/tid-range.ll
+++ b/llvm/test/CodeGen/NVPTX/tid-range.ll
@@ -12,7 +12,7 @@ define i1 @test1() {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: mov.u32 %r1, %tid.x;
-; CHECK-NEXT: setp.eq.s32 %p1, %r1, 1;
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, 1;
; CHECK-NEXT: selp.b32 %r2, -1, 0, %p1;
; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
; CHECK-NEXT: ret;
More information about the llvm-commits
mailing list