[llvm] ff25115 - [NVPTX] cleanup & canonicalize `mov` (#129344)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 23 14:53:04 PDT 2025
Author: Justin Fargnoli
Date: 2025-03-23T14:53:00-07:00
New Revision: ff25115ca0e150d723c75ae981b9449e1028ed2d
URL: https://github.com/llvm/llvm-project/commit/ff25115ca0e150d723c75ae981b9449e1028ed2d
DIFF: https://github.com/llvm/llvm-project/commit/ff25115ca0e150d723c75ae981b9449e1028ed2d.diff
LOG: [NVPTX] cleanup & canonicalize `mov` (#129344)
Use a `multiclass` to define `mov` and canonicalize the `mov`
instruction to always use the `b<bit-size>` suffix.
Added:
Modified:
llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
llvm/test/CodeGen/NVPTX/atomics-sm70.ll
llvm/test/CodeGen/NVPTX/atomics-sm90.ll
llvm/test/CodeGen/NVPTX/atomics.ll
llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
llvm/test/CodeGen/NVPTX/cmpxchg.ll
llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
llvm/test/CodeGen/NVPTX/div.ll
llvm/test/CodeGen/NVPTX/f16-instructions.ll
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
llvm/test/CodeGen/NVPTX/fma.ll
llvm/test/CodeGen/NVPTX/i128.ll
llvm/test/CodeGen/NVPTX/indirect_byval.ll
llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll
llvm/test/CodeGen/NVPTX/local-stack-frame.ll
llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
llvm/test/CodeGen/NVPTX/no-extra-parens.ll
llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
llvm/test/CodeGen/NVPTX/variadics-backend.ll
llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index f2757c5e49b33..ec1f969494cd1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -1008,7 +1008,7 @@ void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Opc = TM.is64Bit() ? NVPTX::cvta_to_local_64 : NVPTX::cvta_to_local;
break;
case ADDRESS_SPACE_PARAM:
- Opc = TM.is64Bit() ? NVPTX::IMOV64rr : NVPTX::IMOV32rr;
+ Opc = TM.is64Bit() ? NVPTX::IMOV64r : NVPTX::IMOV32r;
break;
}
@@ -2172,10 +2172,10 @@ bool NVPTXDAGToDAGISel::tryBF16ArithToFMA(SDNode *N) {
auto API = APF.bitcastToAPInt();
API = API.concat(API);
auto Const = CurDAG->getTargetConstant(API, DL, MVT::i32);
- return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32ri, DL, VT, Const), 0);
+ return SDValue(CurDAG->getMachineNode(NVPTX::IMOV32i, DL, VT, Const), 0);
}
auto Const = CurDAG->getTargetConstantFP(APF, DL, VT);
- return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16ri, DL, VT, Const), 0);
+ return SDValue(CurDAG->getMachineNode(NVPTX::BFMOV16i, DL, VT, Const), 0);
};
switch (N->getOpcode()) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 18b513039ecea..0551954444e57 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -40,22 +40,22 @@ void NVPTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
unsigned Op;
if (DestRC == &NVPTX::Int1RegsRegClass) {
- Op = NVPTX::IMOV1rr;
+ Op = NVPTX::IMOV1r;
} else if (DestRC == &NVPTX::Int16RegsRegClass) {
- Op = NVPTX::IMOV16rr;
+ Op = NVPTX::MOV16r;
} else if (DestRC == &NVPTX::Int32RegsRegClass) {
- Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32rr
+ Op = (SrcRC == &NVPTX::Int32RegsRegClass ? NVPTX::IMOV32r
: NVPTX::BITCONVERT_32_F2I);
} else if (DestRC == &NVPTX::Int64RegsRegClass) {
- Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64rr
+ Op = (SrcRC == &NVPTX::Int64RegsRegClass ? NVPTX::IMOV64r
: NVPTX::BITCONVERT_64_F2I);
} else if (DestRC == &NVPTX::Int128RegsRegClass) {
- Op = NVPTX::IMOV128rr;
+ Op = NVPTX::IMOV128r;
} else if (DestRC == &NVPTX::Float32RegsRegClass) {
- Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32rr
+ Op = (SrcRC == &NVPTX::Float32RegsRegClass ? NVPTX::FMOV32r
: NVPTX::BITCONVERT_32_I2F);
} else if (DestRC == &NVPTX::Float64RegsRegClass) {
- Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64rr
+ Op = (SrcRC == &NVPTX::Float64RegsRegClass ? NVPTX::FMOV64r
: NVPTX::BITCONVERT_64_I2F);
} else {
llvm_unreachable("Bad register copy");
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 83509b1078c57..a65bd14ebfe5f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1945,68 +1945,53 @@ def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
// Load a memory address into a u32 or u64 register.
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR_base:$a),
- "mov.u32 \t$dst, $a;",
+ "mov.b32 \t$dst, $a;",
[(set i32:$dst, (Wrapper tglobaladdr:$a))]>;
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins ADDR_base:$a),
- "mov.u64 \t$dst, $a;",
+ "mov.b64 \t$dst, $a;",
[(set i64:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack.
let hasSideEffects = false in {
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
- "mov.u32 \t$d, __local_depot$num;", []>;
+ "mov.b32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
- "mov.u64 \t$d, __local_depot$num;", []>;
+ "mov.b64 \t$d, __local_depot$num;", []>;
}
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
-let hasSideEffects=0, isAsCheapAsAMove=1 in {
- def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
- "mov.pred \t$dst, $sss;", []>;
- def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
- "mov.u16 \t$dst, $sss;", []>;
- def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
- "mov.u32 \t$dst, $sss;", []>;
- def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
- "mov.u64 \t$dst, $sss;", []>;
- def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
- "mov.b128 \t$dst, $sss;", []>;
-
- def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
- "mov.f32 \t$dst, $src;", []>;
- def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
- "mov.f64 \t$dst, $src;", []>;
-
- def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
- "mov.pred \t$dst, $src;",
- [(set i1:$dst, imm:$src)]>;
- def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
- "mov.b16 \t$dst, $src;",
- [(set i16:$dst, imm:$src)]>;
- def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
- "mov.b32 \t$dst, $src;",
- [(set i32:$dst, imm:$src)]>;
- def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
- "mov.b64 \t$dst, $src;",
- [(set i64:$dst, imm:$src)]>;
-
- def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src),
- "mov.b16 \t$dst, $src;",
- [(set f16:$dst, fpimm:$src)]>;
- def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src),
- "mov.b16 \t$dst, $src;",
- [(set bf16:$dst, fpimm:$src)]>;
- def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
- "mov.f32 \t$dst, $src;",
- [(set f32:$dst, fpimm:$src)]>;
- def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
- "mov.f64 \t$dst, $src;",
- [(set f64:$dst, fpimm:$src)]>;
-}
-
-def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
-def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
+let hasSideEffects = false, isAsCheapAsAMove = true in {
+ // Class for register-to-register moves
+ class MOVr<RegisterClass RC, string OpStr> :
+ NVPTXInst<(outs RC:$dst), (ins RC:$src),
+ "mov." # OpStr # " \t$dst, $src;", []>;
+
+ // Class for immediate-to-register moves
+ class MOVi<RegisterClass RC, string OpStr, ValueType VT, Operand IMMType, SDNode ImmNode> :
+ NVPTXInst<(outs RC:$dst), (ins IMMType:$src),
+ "mov." # OpStr # " \t$dst, $src;",
+ [(set VT:$dst, ImmNode:$src)]>;
+}
+
+def IMOV1r : MOVr<Int1Regs, "pred">;
+def IMOV1i : MOVi<Int1Regs, "pred", i1, i1imm, imm>;
+def MOV16r : MOVr<Int16Regs, "b16">;
+def IMOV16i : MOVi<Int16Regs, "b16", i16, i16imm, imm>;
+def IMOV32r : MOVr<Int32Regs, "b32">;
+def IMOV32i : MOVi<Int32Regs, "b32", i32, i32imm, imm>;
+def IMOV64r : MOVr<Int64Regs, "b64">;
+def IMOV64i : MOVi<Int64Regs, "b64", i64, i64imm, imm>;
+def IMOV128r : MOVr<Int128Regs, "b128">;
+def FMOV16i : MOVi<Int16Regs, "b16", f16, f16imm, fpimm>;
+def BFMOV16i : MOVi<Int16Regs, "b16", bf16, bf16imm, fpimm>;
+def FMOV32r : MOVr<Float32Regs, "b32">;
+def FMOV32i : MOVi<Float32Regs, "b32", f32, f32imm, fpimm>;
+def FMOV64r : MOVr<Float64Regs, "b64">;
+def FMOV64i : MOVi<Float64Regs, "b64", f64, f64imm, fpimm>;
+
+def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32i texternalsym:$dst)>;
+def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64i texternalsym:$dst)>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins ADDR:$addr),
@@ -2717,8 +2702,8 @@ def ProxyRegI1 : ProxyRegInst<"pred", i1, Int1Regs>;
def ProxyRegI16 : ProxyRegInst<"b16", i16, Int16Regs>;
def ProxyRegI32 : ProxyRegInst<"b32", i32, Int32Regs>;
def ProxyRegI64 : ProxyRegInst<"b64", i64, Int64Regs>;
-def ProxyRegF32 : ProxyRegInst<"f32", f32, Float32Regs>;
-def ProxyRegF64 : ProxyRegInst<"f64", f64, Float64Regs>;
+def ProxyRegF32 : ProxyRegInst<"b32", f32, Float32Regs>;
+def ProxyRegF64 : ProxyRegInst<"b64", f64, Float64Regs>;
foreach vt = [f16, bf16] in {
def: Pat<(vt (ProxyReg vt:$src)), (ProxyRegI16 $src)>;
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index b180928af82a4..b14295020bc0e 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
-; CHECKPTX62-NEXT: mov.u32 %r54, %r6;
+; CHECKPTX62-NEXT: mov.b32 %r54, %r6;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX62-NEXT: ld.u32 %r55, [%r1];
@@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
-; CHECKPTX62-NEXT: mov.u32 %r55, %r9;
+; CHECKPTX62-NEXT: mov.b32 %r55, %r9;
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
@@ -109,7 +109,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
-; CHECKPTX62-NEXT: mov.u32 %r56, %r15;
+; CHECKPTX62-NEXT: mov.b32 %r56, %r15;
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
@@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
-; CHECKPTX62-NEXT: mov.u32 %r57, %r21;
+; CHECKPTX62-NEXT: mov.b32 %r57, %r21;
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX62-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index 9027bd6a14780..f27e574724ce4 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -73,7 +73,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
-; CHECKPTX71-NEXT: mov.u32 %r54, %r6;
+; CHECKPTX71-NEXT: mov.b32 %r54, %r6;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX71-NEXT: ld.u32 %r55, [%r1];
@@ -89,7 +89,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
-; CHECKPTX71-NEXT: mov.u32 %r55, %r9;
+; CHECKPTX71-NEXT: mov.b32 %r55, %r9;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
@@ -111,7 +111,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
-; CHECKPTX71-NEXT: mov.u32 %r56, %r15;
+; CHECKPTX71-NEXT: mov.b32 %r56, %r15;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
@@ -133,7 +133,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
-; CHECKPTX71-NEXT: mov.u32 %r57, %r21;
+; CHECKPTX71-NEXT: mov.b32 %r57, %r21;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX71-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/atomics.ll b/llvm/test/CodeGen/NVPTX/atomics.ll
index 2f58d279f82c3..e1fbb53891902 100644
--- a/llvm/test/CodeGen/NVPTX/atomics.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics.ll
@@ -429,7 +429,7 @@ define half @atomicrmw_add_f16_generic(ptr %addr, half %val) {
; CHECK-NEXT: membar.sys;
; CHECK-NEXT: atom.cas.b32 %r5, [%rd1], %r16, %r14;
; CHECK-NEXT: setp.ne.s32 %p1, %r5, %r16;
-; CHECK-NEXT: mov.u32 %r16, %r5;
+; CHECK-NEXT: mov.b32 %r16, %r5;
; CHECK-NEXT: @%p1 bra $L__BB22_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-NEXT: shr.u32 %r15, %r5, %r1;
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 1c9d271902fd3..9474b01f95ee8 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -21,7 +21,7 @@ entry:
%buf = alloca [16 x i8], align 4
; CHECK: .local .align 4 .b8 __local_depot0[16]
-; CHECK: mov.u64 %SPL
+; CHECK: mov.b64 %SPL
; CHECK: ld.param.u64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
index ea308c2a7673b..442da4debea8f 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm60.ll
@@ -38,7 +38,7 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB0_1;
; SM60-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r13;
@@ -83,7 +83,7 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB1_1;
; SM60-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r13;
@@ -128,7 +128,7 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM60-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB2_1;
; SM60-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r13;
@@ -173,7 +173,7 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB3_1;
; SM60-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -219,7 +219,7 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB4_1;
; SM60-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -265,7 +265,7 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB5_1;
; SM60-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -312,7 +312,7 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB6_1;
; SM60-NEXT: $L__BB6_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -359,7 +359,7 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB7_1;
; SM60-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -406,7 +406,7 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB8_1;
; SM60-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -452,7 +452,7 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB9_1;
; SM60-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -498,7 +498,7 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB10_1;
; SM60-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -544,7 +544,7 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB11_1;
; SM60-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -590,7 +590,7 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB12_1;
; SM60-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -636,7 +636,7 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB13_1;
; SM60-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -682,7 +682,7 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB14_1;
; SM60-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -729,7 +729,7 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB15_1;
; SM60-NEXT: $L__BB15_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -776,7 +776,7 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB16_1;
; SM60-NEXT: $L__BB16_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -823,7 +823,7 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB17_1;
; SM60-NEXT: $L__BB17_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -870,7 +870,7 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB18_1;
; SM60-NEXT: $L__BB18_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r13;
@@ -916,7 +916,7 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB19_1;
; SM60-NEXT: $L__BB19_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r13;
@@ -962,7 +962,7 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB20_1;
; SM60-NEXT: $L__BB20_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r13;
@@ -1008,7 +1008,7 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB21_1;
; SM60-NEXT: $L__BB21_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1055,7 +1055,7 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB22_1;
; SM60-NEXT: $L__BB22_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1102,7 +1102,7 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB23_1;
; SM60-NEXT: $L__BB23_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1149,7 +1149,7 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB24_1;
; SM60-NEXT: $L__BB24_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1196,7 +1196,7 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB25_1;
; SM60-NEXT: $L__BB25_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1243,7 +1243,7 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB26_1;
; SM60-NEXT: $L__BB26_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1290,7 +1290,7 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB27_1;
; SM60-NEXT: $L__BB27_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1337,7 +1337,7 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB28_1;
; SM60-NEXT: $L__BB28_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1384,7 +1384,7 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB29_1;
; SM60-NEXT: $L__BB29_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1431,7 +1431,7 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB30_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB30_1;
; SM60-NEXT: $L__BB30_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1478,7 +1478,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB31_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB31_1;
; SM60-NEXT: $L__BB31_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1525,7 +1525,7 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB32_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB32_1;
; SM60-NEXT: $L__BB32_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1572,7 +1572,7 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB33_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB33_1;
; SM60-NEXT: $L__BB33_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1619,7 +1619,7 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB34_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB34_1;
; SM60-NEXT: $L__BB34_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1666,7 +1666,7 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB35_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB35_1;
; SM60-NEXT: $L__BB35_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1713,7 +1713,7 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB36_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB36_1;
; SM60-NEXT: $L__BB36_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1760,7 +1760,7 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB37_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB37_1;
; SM60-NEXT: $L__BB37_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1807,7 +1807,7 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM60-NEXT: // in Loop: Header=BB38_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB38_1;
; SM60-NEXT: $L__BB38_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1854,7 +1854,7 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB39_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB39_1;
; SM60-NEXT: $L__BB39_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1901,7 +1901,7 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB40_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB40_1;
; SM60-NEXT: $L__BB40_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1948,7 +1948,7 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB41_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB41_1;
; SM60-NEXT: $L__BB41_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -1995,7 +1995,7 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB42_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB42_1;
; SM60-NEXT: $L__BB42_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2042,7 +2042,7 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB43_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB43_1;
; SM60-NEXT: $L__BB43_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2089,7 +2089,7 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM60-NEXT: // in Loop: Header=BB44_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM60-NEXT: mov.u32 %r20, %r8;
+; SM60-NEXT: mov.b32 %r20, %r8;
; SM60-NEXT: @%p2 bra $L__BB44_1;
; SM60-NEXT: $L__BB44_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2134,7 +2134,7 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB45_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB45_1;
; SM60-NEXT: $L__BB45_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2178,7 +2178,7 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
; SM60-NEXT: // in Loop: Header=BB46_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB46_1;
; SM60-NEXT: $L__BB46_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2222,7 +2222,7 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
; SM60-NEXT: // in Loop: Header=BB47_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB47_1;
; SM60-NEXT: $L__BB47_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2266,7 +2266,7 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB48_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB48_1;
; SM60-NEXT: $L__BB48_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2311,7 +2311,7 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB49_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB49_1;
; SM60-NEXT: $L__BB49_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2356,7 +2356,7 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB50_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB50_1;
; SM60-NEXT: $L__BB50_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2402,7 +2402,7 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB51_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB51_1;
; SM60-NEXT: $L__BB51_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2448,7 +2448,7 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB52_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB52_1;
; SM60-NEXT: $L__BB52_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2494,7 +2494,7 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB53_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB53_1;
; SM60-NEXT: $L__BB53_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2539,7 +2539,7 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB54_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB54_1;
; SM60-NEXT: $L__BB54_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2584,7 +2584,7 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB55_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB55_1;
; SM60-NEXT: $L__BB55_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2629,7 +2629,7 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB56_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB56_1;
; SM60-NEXT: $L__BB56_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2674,7 +2674,7 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB57_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB57_1;
; SM60-NEXT: $L__BB57_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2719,7 +2719,7 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB58_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB58_1;
; SM60-NEXT: $L__BB58_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2764,7 +2764,7 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB59_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB59_1;
; SM60-NEXT: $L__BB59_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2810,7 +2810,7 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB60_1;
; SM60-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2856,7 +2856,7 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB61_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB61_1;
; SM60-NEXT: $L__BB61_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2902,7 +2902,7 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB62_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB62_1;
; SM60-NEXT: $L__BB62_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -2948,7 +2948,7 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB63_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB63_1;
; SM60-NEXT: $L__BB63_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2993,7 +2993,7 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB64_1;
; SM60-NEXT: $L__BB64_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r14;
@@ -3038,7 +3038,7 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB65_1;
; SM60-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM60-NEXT: st.param.b32 [func_retval0], %r14;
@@ -3083,7 +3083,7 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB66_1;
; SM60-NEXT: $L__BB66_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3129,7 +3129,7 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB67_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB67_1;
; SM60-NEXT: $L__BB67_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3175,7 +3175,7 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB68_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB68_1;
; SM60-NEXT: $L__BB68_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3221,7 +3221,7 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB69_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB69_1;
; SM60-NEXT: $L__BB69_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3267,7 +3267,7 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB70_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB70_1;
; SM60-NEXT: $L__BB70_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3313,7 +3313,7 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB71_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB71_1;
; SM60-NEXT: $L__BB71_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3359,7 +3359,7 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB72_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB72_1;
; SM60-NEXT: $L__BB72_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3405,7 +3405,7 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB73_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB73_1;
; SM60-NEXT: $L__BB73_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3451,7 +3451,7 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB74_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB74_1;
; SM60-NEXT: $L__BB74_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3497,7 +3497,7 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB75_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB75_1;
; SM60-NEXT: $L__BB75_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3543,7 +3543,7 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB76_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB76_1;
; SM60-NEXT: $L__BB76_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3589,7 +3589,7 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB77_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB77_1;
; SM60-NEXT: $L__BB77_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3635,7 +3635,7 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB78_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB78_1;
; SM60-NEXT: $L__BB78_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3681,7 +3681,7 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB79_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB79_1;
; SM60-NEXT: $L__BB79_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3727,7 +3727,7 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB80_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB80_1;
; SM60-NEXT: $L__BB80_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3773,7 +3773,7 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB81_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB81_1;
; SM60-NEXT: $L__BB81_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3819,7 +3819,7 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB82_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB82_1;
; SM60-NEXT: $L__BB82_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3865,7 +3865,7 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM60-NEXT: // in Loop: Header=BB83_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB83_1;
; SM60-NEXT: $L__BB83_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3911,7 +3911,7 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB84_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB84_1;
; SM60-NEXT: $L__BB84_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -3957,7 +3957,7 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB85_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB85_1;
; SM60-NEXT: $L__BB85_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -4003,7 +4003,7 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB86_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB86_1;
; SM60-NEXT: $L__BB86_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -4049,7 +4049,7 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM60-NEXT: // in Loop: Header=BB87_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB87_1;
; SM60-NEXT: $L__BB87_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -4095,7 +4095,7 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB88_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB88_1;
; SM60-NEXT: $L__BB88_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
@@ -4141,7 +4141,7 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM60-NEXT: // in Loop: Header=BB89_1 Depth=1
; SM60-NEXT: and.b32 %r8, %r7, %r2;
; SM60-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM60-NEXT: mov.u32 %r19, %r8;
+; SM60-NEXT: mov.b32 %r19, %r8;
; SM60-NEXT: @%p2 bra $L__BB89_1;
; SM60-NEXT: $L__BB89_3: // %partword.cmpxchg.end
; SM60-NEXT: membar.sys;
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
index 4360ea36e863a..df8c49aaaa42c 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm70.ll
@@ -38,7 +38,7 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB0_1;
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -83,7 +83,7 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB1_1;
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -128,7 +128,7 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB2_1;
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -173,7 +173,7 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB3_1;
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -219,7 +219,7 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB4_1;
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -265,7 +265,7 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB5_1;
; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -312,7 +312,7 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB6_1;
; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -359,7 +359,7 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB7_1;
; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -406,7 +406,7 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB8_1;
; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -452,7 +452,7 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB9_1;
; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -498,7 +498,7 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB10_1;
; SM70-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -544,7 +544,7 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB11_1;
; SM70-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -590,7 +590,7 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB12_1;
; SM70-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -636,7 +636,7 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB13_1;
; SM70-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -682,7 +682,7 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB14_1;
; SM70-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -729,7 +729,7 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB15_1;
; SM70-NEXT: $L__BB15_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -776,7 +776,7 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB16_1;
; SM70-NEXT: $L__BB16_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -823,7 +823,7 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB17_1;
; SM70-NEXT: $L__BB17_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -870,7 +870,7 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB18_1;
; SM70-NEXT: $L__BB18_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -916,7 +916,7 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB19_1;
; SM70-NEXT: $L__BB19_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -962,7 +962,7 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB20_1;
; SM70-NEXT: $L__BB20_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -1008,7 +1008,7 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB21_1;
; SM70-NEXT: $L__BB21_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1055,7 +1055,7 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB22_1;
; SM70-NEXT: $L__BB22_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1102,7 +1102,7 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB23_1;
; SM70-NEXT: $L__BB23_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1149,7 +1149,7 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB24_1;
; SM70-NEXT: $L__BB24_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1196,7 +1196,7 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB25_1;
; SM70-NEXT: $L__BB25_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1243,7 +1243,7 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB26_1;
; SM70-NEXT: $L__BB26_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1290,7 +1290,7 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB27_1;
; SM70-NEXT: $L__BB27_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1337,7 +1337,7 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB28_1;
; SM70-NEXT: $L__BB28_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1384,7 +1384,7 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB29_1;
; SM70-NEXT: $L__BB29_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1431,7 +1431,7 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB30_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB30_1;
; SM70-NEXT: $L__BB30_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1478,7 +1478,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB31_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB31_1;
; SM70-NEXT: $L__BB31_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1525,7 +1525,7 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB32_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB32_1;
; SM70-NEXT: $L__BB32_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1572,7 +1572,7 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB33_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB33_1;
; SM70-NEXT: $L__BB33_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1619,7 +1619,7 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB34_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB34_1;
; SM70-NEXT: $L__BB34_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1666,7 +1666,7 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB35_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB35_1;
; SM70-NEXT: $L__BB35_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1713,7 +1713,7 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB36_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB36_1;
; SM70-NEXT: $L__BB36_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1760,7 +1760,7 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB37_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB37_1;
; SM70-NEXT: $L__BB37_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1807,7 +1807,7 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM70-NEXT: // in Loop: Header=BB38_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB38_1;
; SM70-NEXT: $L__BB38_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1854,7 +1854,7 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB39_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB39_1;
; SM70-NEXT: $L__BB39_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1901,7 +1901,7 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB40_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB40_1;
; SM70-NEXT: $L__BB40_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1948,7 +1948,7 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB41_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB41_1;
; SM70-NEXT: $L__BB41_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1995,7 +1995,7 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB42_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB42_1;
; SM70-NEXT: $L__BB42_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2042,7 +2042,7 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB43_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB43_1;
; SM70-NEXT: $L__BB43_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2089,7 +2089,7 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB44_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB44_1;
; SM70-NEXT: $L__BB44_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2134,7 +2134,7 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB45_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB45_1;
; SM70-NEXT: $L__BB45_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2178,7 +2178,7 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
; SM70-NEXT: // in Loop: Header=BB46_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB46_1;
; SM70-NEXT: $L__BB46_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2222,7 +2222,7 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
; SM70-NEXT: // in Loop: Header=BB47_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB47_1;
; SM70-NEXT: $L__BB47_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2266,7 +2266,7 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB48_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB48_1;
; SM70-NEXT: $L__BB48_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2311,7 +2311,7 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB49_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB49_1;
; SM70-NEXT: $L__BB49_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2356,7 +2356,7 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB50_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB50_1;
; SM70-NEXT: $L__BB50_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2402,7 +2402,7 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB51_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB51_1;
; SM70-NEXT: $L__BB51_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2448,7 +2448,7 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB52_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB52_1;
; SM70-NEXT: $L__BB52_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2494,7 +2494,7 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB53_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB53_1;
; SM70-NEXT: $L__BB53_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2539,7 +2539,7 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB54_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB54_1;
; SM70-NEXT: $L__BB54_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2584,7 +2584,7 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB55_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB55_1;
; SM70-NEXT: $L__BB55_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2629,7 +2629,7 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB56_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB56_1;
; SM70-NEXT: $L__BB56_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2674,7 +2674,7 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB57_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB57_1;
; SM70-NEXT: $L__BB57_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2719,7 +2719,7 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB58_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB58_1;
; SM70-NEXT: $L__BB58_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2764,7 +2764,7 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB59_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB59_1;
; SM70-NEXT: $L__BB59_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2810,7 +2810,7 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB60_1;
; SM70-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2856,7 +2856,7 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB61_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB61_1;
; SM70-NEXT: $L__BB61_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2902,7 +2902,7 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB62_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB62_1;
; SM70-NEXT: $L__BB62_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -2948,7 +2948,7 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB63_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB63_1;
; SM70-NEXT: $L__BB63_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2993,7 +2993,7 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB64_1;
; SM70-NEXT: $L__BB64_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -3038,7 +3038,7 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB65_1;
; SM70-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -3083,7 +3083,7 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB66_1;
; SM70-NEXT: $L__BB66_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3129,7 +3129,7 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB67_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB67_1;
; SM70-NEXT: $L__BB67_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3175,7 +3175,7 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB68_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB68_1;
; SM70-NEXT: $L__BB68_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3221,7 +3221,7 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB69_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB69_1;
; SM70-NEXT: $L__BB69_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3267,7 +3267,7 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB70_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB70_1;
; SM70-NEXT: $L__BB70_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3313,7 +3313,7 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB71_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB71_1;
; SM70-NEXT: $L__BB71_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3359,7 +3359,7 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB72_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB72_1;
; SM70-NEXT: $L__BB72_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3405,7 +3405,7 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB73_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB73_1;
; SM70-NEXT: $L__BB73_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3451,7 +3451,7 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB74_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB74_1;
; SM70-NEXT: $L__BB74_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3497,7 +3497,7 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB75_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB75_1;
; SM70-NEXT: $L__BB75_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3543,7 +3543,7 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB76_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB76_1;
; SM70-NEXT: $L__BB76_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3589,7 +3589,7 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB77_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB77_1;
; SM70-NEXT: $L__BB77_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3635,7 +3635,7 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB78_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB78_1;
; SM70-NEXT: $L__BB78_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3681,7 +3681,7 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB79_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB79_1;
; SM70-NEXT: $L__BB79_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3727,7 +3727,7 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB80_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB80_1;
; SM70-NEXT: $L__BB80_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3773,7 +3773,7 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB81_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB81_1;
; SM70-NEXT: $L__BB81_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3819,7 +3819,7 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB82_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB82_1;
; SM70-NEXT: $L__BB82_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3865,7 +3865,7 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM70-NEXT: // in Loop: Header=BB83_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB83_1;
; SM70-NEXT: $L__BB83_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3911,7 +3911,7 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB84_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB84_1;
; SM70-NEXT: $L__BB84_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -3957,7 +3957,7 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB85_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB85_1;
; SM70-NEXT: $L__BB85_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -4003,7 +4003,7 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB86_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB86_1;
; SM70-NEXT: $L__BB86_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -4049,7 +4049,7 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB87_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB87_1;
; SM70-NEXT: $L__BB87_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -4095,7 +4095,7 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB88_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB88_1;
; SM70-NEXT: $L__BB88_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -4141,7 +4141,7 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM70-NEXT: // in Loop: Header=BB89_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB89_1;
; SM70-NEXT: $L__BB89_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
index 5acb275a6f581..6df7b3d695f7d 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg-sm90.ll
@@ -38,7 +38,7 @@ define i8 @monotonic_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB0_1;
; SM90-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r13;
@@ -83,7 +83,7 @@ define i8 @monotonic_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %ne
; SM90-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB1_1;
; SM90-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r13;
@@ -128,7 +128,7 @@ define i8 @monotonic_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %ne
; SM90-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB2_1;
; SM90-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r13;
@@ -173,7 +173,7 @@ define i8 @monotonic_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB3_1;
; SM90-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -219,7 +219,7 @@ define i8 @monotonic_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB4_1;
; SM90-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -265,7 +265,7 @@ define i8 @monotonic_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB5_1;
; SM90-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -312,7 +312,7 @@ define i8 @monotonic_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB6_1;
; SM90-NEXT: $L__BB6_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -359,7 +359,7 @@ define i8 @monotonic_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB7_1;
; SM90-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -406,7 +406,7 @@ define i8 @monotonic_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB8_1;
; SM90-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -452,7 +452,7 @@ define i8 @acquire_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB9_1;
; SM90-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -498,7 +498,7 @@ define i8 @acquire_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB10_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB10_1;
; SM90-NEXT: $L__BB10_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -544,7 +544,7 @@ define i8 @acquire_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB11_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB11_1;
; SM90-NEXT: $L__BB11_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -590,7 +590,7 @@ define i8 @acquire_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB12_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB12_1;
; SM90-NEXT: $L__BB12_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -636,7 +636,7 @@ define i8 @acquire_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB13_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB13_1;
; SM90-NEXT: $L__BB13_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -682,7 +682,7 @@ define i8 @acquire_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB14_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB14_1;
; SM90-NEXT: $L__BB14_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -729,7 +729,7 @@ define i8 @acquire_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB15_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB15_1;
; SM90-NEXT: $L__BB15_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -776,7 +776,7 @@ define i8 @acquire_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB16_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB16_1;
; SM90-NEXT: $L__BB16_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -823,7 +823,7 @@ define i8 @acquire_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB17_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB17_1;
; SM90-NEXT: $L__BB17_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -870,7 +870,7 @@ define i8 @release_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB18_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB18_1;
; SM90-NEXT: $L__BB18_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r13;
@@ -916,7 +916,7 @@ define i8 @release_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB19_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB19_1;
; SM90-NEXT: $L__BB19_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r13;
@@ -962,7 +962,7 @@ define i8 @release_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB20_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB20_1;
; SM90-NEXT: $L__BB20_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r13;
@@ -1008,7 +1008,7 @@ define i8 @release_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB21_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB21_1;
; SM90-NEXT: $L__BB21_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1055,7 +1055,7 @@ define i8 @release_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB22_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB22_1;
; SM90-NEXT: $L__BB22_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1102,7 +1102,7 @@ define i8 @release_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB23_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB23_1;
; SM90-NEXT: $L__BB23_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1149,7 +1149,7 @@ define i8 @release_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB24_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB24_1;
; SM90-NEXT: $L__BB24_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1196,7 +1196,7 @@ define i8 @release_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB25_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB25_1;
; SM90-NEXT: $L__BB25_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1243,7 +1243,7 @@ define i8 @release_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB26_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB26_1;
; SM90-NEXT: $L__BB26_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1290,7 +1290,7 @@ define i8 @acq_rel_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB27_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB27_1;
; SM90-NEXT: $L__BB27_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1337,7 +1337,7 @@ define i8 @acq_rel_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB28_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB28_1;
; SM90-NEXT: $L__BB28_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1384,7 +1384,7 @@ define i8 @acq_rel_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB29_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB29_1;
; SM90-NEXT: $L__BB29_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1431,7 +1431,7 @@ define i8 @acq_rel_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB30_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB30_1;
; SM90-NEXT: $L__BB30_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1478,7 +1478,7 @@ define i8 @acq_rel_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB31_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB31_1;
; SM90-NEXT: $L__BB31_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1525,7 +1525,7 @@ define i8 @acq_rel_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB32_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB32_1;
; SM90-NEXT: $L__BB32_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1572,7 +1572,7 @@ define i8 @acq_rel_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB33_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB33_1;
; SM90-NEXT: $L__BB33_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1619,7 +1619,7 @@ define i8 @acq_rel_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB34_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB34_1;
; SM90-NEXT: $L__BB34_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1666,7 +1666,7 @@ define i8 @acq_rel_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB35_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB35_1;
; SM90-NEXT: $L__BB35_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1713,7 +1713,7 @@ define i8 @seq_cst_monotonic_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB36_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB36_1;
; SM90-NEXT: $L__BB36_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1760,7 +1760,7 @@ define i8 @seq_cst_monotonic_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB37_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB37_1;
; SM90-NEXT: $L__BB37_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1807,7 +1807,7 @@ define i8 @seq_cst_monotonic_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new)
; SM90-NEXT: // in Loop: Header=BB38_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB38_1;
; SM90-NEXT: $L__BB38_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1854,7 +1854,7 @@ define i8 @seq_cst_acquire_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB39_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB39_1;
; SM90-NEXT: $L__BB39_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1901,7 +1901,7 @@ define i8 @seq_cst_acquire_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB40_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB40_1;
; SM90-NEXT: $L__BB40_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1948,7 +1948,7 @@ define i8 @seq_cst_acquire_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB41_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB41_1;
; SM90-NEXT: $L__BB41_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -1995,7 +1995,7 @@ define i8 @seq_cst_seq_cst_i8_generic(ptr %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB42_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB42_1;
; SM90-NEXT: $L__BB42_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2042,7 +2042,7 @@ define i8 @seq_cst_seq_cst_i8_global(ptr addrspace(1) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB43_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB43_1;
; SM90-NEXT: $L__BB43_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2089,7 +2089,7 @@ define i8 @seq_cst_seq_cst_i8_shared(ptr addrspace(3) %addr, i8 %cmp, i8 %new) {
; SM90-NEXT: // in Loop: Header=BB44_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM90-NEXT: mov.u32 %r20, %r8;
+; SM90-NEXT: mov.b32 %r20, %r8;
; SM90-NEXT: @%p2 bra $L__BB44_1;
; SM90-NEXT: $L__BB44_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2134,7 +2134,7 @@ define i16 @monotonic_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB45_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB45_1;
; SM90-NEXT: $L__BB45_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2178,7 +2178,7 @@ define i16 @monotonic_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16
; SM90-NEXT: // in Loop: Header=BB46_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB46_1;
; SM90-NEXT: $L__BB46_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2222,7 +2222,7 @@ define i16 @monotonic_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16
; SM90-NEXT: // in Loop: Header=BB47_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB47_1;
; SM90-NEXT: $L__BB47_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2266,7 +2266,7 @@ define i16 @monotonic_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB48_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB48_1;
; SM90-NEXT: $L__BB48_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2311,7 +2311,7 @@ define i16 @monotonic_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB49_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB49_1;
; SM90-NEXT: $L__BB49_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2356,7 +2356,7 @@ define i16 @monotonic_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB50_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB50_1;
; SM90-NEXT: $L__BB50_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2402,7 +2402,7 @@ define i16 @monotonic_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB51_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB51_1;
; SM90-NEXT: $L__BB51_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2448,7 +2448,7 @@ define i16 @monotonic_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB52_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB52_1;
; SM90-NEXT: $L__BB52_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2494,7 +2494,7 @@ define i16 @monotonic_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB53_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB53_1;
; SM90-NEXT: $L__BB53_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2539,7 +2539,7 @@ define i16 @acquire_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB54_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB54_1;
; SM90-NEXT: $L__BB54_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2584,7 +2584,7 @@ define i16 @acquire_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB55_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB55_1;
; SM90-NEXT: $L__BB55_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2629,7 +2629,7 @@ define i16 @acquire_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB56_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB56_1;
; SM90-NEXT: $L__BB56_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2674,7 +2674,7 @@ define i16 @acquire_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB57_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB57_1;
; SM90-NEXT: $L__BB57_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2719,7 +2719,7 @@ define i16 @acquire_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB58_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB58_1;
; SM90-NEXT: $L__BB58_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2764,7 +2764,7 @@ define i16 @acquire_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB59_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB59_1;
; SM90-NEXT: $L__BB59_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2810,7 +2810,7 @@ define i16 @acquire_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB60_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB60_1;
; SM90-NEXT: $L__BB60_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2856,7 +2856,7 @@ define i16 @acquire_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB61_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB61_1;
; SM90-NEXT: $L__BB61_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2902,7 +2902,7 @@ define i16 @acquire_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB62_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB62_1;
; SM90-NEXT: $L__BB62_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -2948,7 +2948,7 @@ define i16 @release_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB63_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB63_1;
; SM90-NEXT: $L__BB63_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r14;
@@ -2993,7 +2993,7 @@ define i16 @release_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB64_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB64_1;
; SM90-NEXT: $L__BB64_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r14;
@@ -3038,7 +3038,7 @@ define i16 @release_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB65_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB65_1;
; SM90-NEXT: $L__BB65_3: // %partword.cmpxchg.end
; SM90-NEXT: st.param.b32 [func_retval0], %r14;
@@ -3083,7 +3083,7 @@ define i16 @release_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB66_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB66_1;
; SM90-NEXT: $L__BB66_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3129,7 +3129,7 @@ define i16 @release_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB67_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB67_1;
; SM90-NEXT: $L__BB67_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3175,7 +3175,7 @@ define i16 @release_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB68_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB68_1;
; SM90-NEXT: $L__BB68_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3221,7 +3221,7 @@ define i16 @release_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB69_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB69_1;
; SM90-NEXT: $L__BB69_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3267,7 +3267,7 @@ define i16 @release_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB70_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB70_1;
; SM90-NEXT: $L__BB70_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3313,7 +3313,7 @@ define i16 @release_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB71_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB71_1;
; SM90-NEXT: $L__BB71_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3359,7 +3359,7 @@ define i16 @acq_rel_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB72_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB72_1;
; SM90-NEXT: $L__BB72_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3405,7 +3405,7 @@ define i16 @acq_rel_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB73_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB73_1;
; SM90-NEXT: $L__BB73_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3451,7 +3451,7 @@ define i16 @acq_rel_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB74_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB74_1;
; SM90-NEXT: $L__BB74_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3497,7 +3497,7 @@ define i16 @acq_rel_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB75_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB75_1;
; SM90-NEXT: $L__BB75_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3543,7 +3543,7 @@ define i16 @acq_rel_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB76_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB76_1;
; SM90-NEXT: $L__BB76_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3589,7 +3589,7 @@ define i16 @acq_rel_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB77_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB77_1;
; SM90-NEXT: $L__BB77_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3635,7 +3635,7 @@ define i16 @acq_rel_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB78_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB78_1;
; SM90-NEXT: $L__BB78_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3681,7 +3681,7 @@ define i16 @acq_rel_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB79_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB79_1;
; SM90-NEXT: $L__BB79_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3727,7 +3727,7 @@ define i16 @acq_rel_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB80_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB80_1;
; SM90-NEXT: $L__BB80_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3773,7 +3773,7 @@ define i16 @seq_cst_monotonic_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB81_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB81_1;
; SM90-NEXT: $L__BB81_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3819,7 +3819,7 @@ define i16 @seq_cst_monotonic_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB82_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB82_1;
; SM90-NEXT: $L__BB82_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3865,7 +3865,7 @@ define i16 @seq_cst_monotonic_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %
; SM90-NEXT: // in Loop: Header=BB83_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB83_1;
; SM90-NEXT: $L__BB83_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3911,7 +3911,7 @@ define i16 @seq_cst_acquire_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB84_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB84_1;
; SM90-NEXT: $L__BB84_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -3957,7 +3957,7 @@ define i16 @seq_cst_acquire_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB85_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB85_1;
; SM90-NEXT: $L__BB85_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -4003,7 +4003,7 @@ define i16 @seq_cst_acquire_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB86_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB86_1;
; SM90-NEXT: $L__BB86_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -4049,7 +4049,7 @@ define i16 @seq_cst_seq_cst_i16_generic(ptr %addr, i16 %cmp, i16 %new) {
; SM90-NEXT: // in Loop: Header=BB87_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB87_1;
; SM90-NEXT: $L__BB87_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -4095,7 +4095,7 @@ define i16 @seq_cst_seq_cst_i16_global(ptr addrspace(1) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB88_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB88_1;
; SM90-NEXT: $L__BB88_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
@@ -4141,7 +4141,7 @@ define i16 @seq_cst_seq_cst_i16_shared(ptr addrspace(3) %addr, i16 %cmp, i16 %ne
; SM90-NEXT: // in Loop: Header=BB89_1 Depth=1
; SM90-NEXT: and.b32 %r8, %r7, %r2;
; SM90-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM90-NEXT: mov.u32 %r19, %r8;
+; SM90-NEXT: mov.b32 %r19, %r8;
; SM90-NEXT: @%p2 bra $L__BB89_1;
; SM90-NEXT: $L__BB89_3: // %partword.cmpxchg.end
; SM90-NEXT: fence.acquire.sys;
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index aaea0d2ee25ef..e5f05e49d2fef 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -45,7 +45,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM30-NEXT: mov.u32 %r20, %r8;
+; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB0_1;
; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
@@ -86,7 +86,7 @@ define i8 @relaxed_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB0_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB0_1;
; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -171,7 +171,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM30-NEXT: mov.u32 %r20, %r8;
+; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB1_1;
; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -213,7 +213,7 @@ define i8 @acquire_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB1_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB1_1;
; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -301,7 +301,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM30-NEXT: mov.u32 %r20, %r8;
+; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB2_1;
; SM30-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r13;
@@ -343,7 +343,7 @@ define i8 @release_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB2_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB2_1;
; SM70-NEXT: $L__BB2_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r13;
@@ -430,7 +430,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM30-NEXT: mov.u32 %r20, %r8;
+; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB3_1;
; SM30-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -473,7 +473,7 @@ define i8 @acq_rel_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB3_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB3_1;
; SM70-NEXT: $L__BB3_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -562,7 +562,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM30-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM30-NEXT: mov.u32 %r20, %r8;
+; SM30-NEXT: mov.b32 %r20, %r8;
; SM30-NEXT: @%p2 bra $L__BB4_1;
; SM30-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -605,7 +605,7 @@ define i8 @seq_cst_sys_i8(ptr %addr, i8 %cmp, i8 %new) {
; SM70-NEXT: // in Loop: Header=BB4_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
-; SM70-NEXT: mov.u32 %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
; SM70-NEXT: @%p2 bra $L__BB4_1;
; SM70-NEXT: $L__BB4_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -693,7 +693,7 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
+; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB5_1;
; SM30-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
@@ -733,7 +733,7 @@ define i16 @relaxed_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB5_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB5_1;
; SM70-NEXT: $L__BB5_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -816,7 +816,7 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
+; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB6_1;
; SM30-NEXT: $L__BB6_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -857,7 +857,7 @@ define i16 @acquire_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB6_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB6_1;
; SM70-NEXT: $L__BB6_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -943,7 +943,7 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
+; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB7_1;
; SM30-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
@@ -984,7 +984,7 @@ define i16 @release_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB7_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB7_1;
; SM70-NEXT: $L__BB7_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
@@ -1069,7 +1069,7 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
+; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB8_1;
; SM30-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -1111,7 +1111,7 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB8_1;
; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
@@ -1199,7 +1199,7 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
+; SM30-NEXT: mov.b32 %r19, %r8;
; SM30-NEXT: @%p2 bra $L__BB9_1;
; SM30-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM30-NEXT: membar.sys;
@@ -1241,7 +1241,7 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
+; SM70-NEXT: mov.b32 %r19, %r8;
; SM70-NEXT: @%p2 bra $L__BB9_1;
; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
; SM70-NEXT: fence.acq_rel.sys;
diff --git a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
index 1b1bb91d5c79e..b0e2082621bff 100644
--- a/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
+++ b/llvm/test/CodeGen/NVPTX/disjoint-or-addr.ll
@@ -12,7 +12,7 @@ define i32 @test_disjoint_or_addr(i16 %a) {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.u64 %rd1, a;
+; CHECK-NEXT: mov.b64 %rd1, a;
; CHECK-NEXT: cvta.global.u64 %rd2, %rd1;
; CHECK-NEXT: ld.u32 %r1, [%rd2+8];
; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
diff --git a/llvm/test/CodeGen/NVPTX/div.ll b/llvm/test/CodeGen/NVPTX/div.ll
index 3d14d36ed599b..4f9d58758ca9e 100644
--- a/llvm/test/CodeGen/NVPTX/div.ll
+++ b/llvm/test/CodeGen/NVPTX/div.ll
@@ -11,10 +11,10 @@ define float @div_full(float %a, float %b) {
; CHECK-NEXT: ld.param.f32 %f1, [div_full_param_0];
; CHECK-NEXT: ld.param.f32 %f2, [div_full_param_1];
; CHECK-NEXT: div.full.f32 %f3, %f1, %f2;
-; CHECK-NEXT: mov.f32 %f4, 0f40400000;
+; CHECK-NEXT: mov.b32 %f4, 0f40400000;
; CHECK-NEXT: div.full.f32 %f5, %f3, %f4;
; CHECK-NEXT: div.full.ftz.f32 %f6, %f5, %f2;
-; CHECK-NEXT: mov.f32 %f7, 0f40800000;
+; CHECK-NEXT: mov.b32 %f7, 0f40800000;
; CHECK-NEXT: div.full.ftz.f32 %f8, %f6, %f7;
; CHECK-NEXT: st.param.f32 [func_retval0], %f8;
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index f78cfc3172621..70d1167bbb6e2 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -138,7 +138,7 @@ define half @test_fsub(half %a, half %b) #0 {
; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000
; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
+; CHECK-NOF16-DAG: mov.b32 [[Z:%f[0-9]+]], 0f00000000;
; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -646,7 +646,7 @@ else:
; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]];
; CHECK: [[LOOP:\$L__BB[0-9_]+]]:
-; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]];
+; CHECK: mov.b16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]];
; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]];
; CHECK: {
; CHECK: st.param.b64 [param0], %[[P1]];
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 1905fec8ab7a8..539e810c83cbd 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -260,7 +260,7 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 {
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fneg_param_0];
; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2;
-; CHECK-NOF16-NEXT: mov.f32 %f2, 0f00000000;
+; CHECK-NOF16-NEXT: mov.b32 %f2, 0f00000000;
; CHECK-NOF16-NEXT: sub.rn.f32 %f3, %f2, %f1;
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3;
; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1;
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index 3416420367beb..90fbd5ba9dfd6 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -50,7 +50,7 @@ define ptx_device float @f32_iir(float %x) {
}
define ptx_device float @f32_iii(float %x) {
-; CHECK: mov.f32 %f{{[0-9]+}}, 0f41200000;
+; CHECK: mov.b32 %f{{[0-9]+}}, 0f41200000;
; CHECK: ret;
%r = call float @llvm.fma.f32(float 2.0, float 3.0, float 4.0)
ret float %r
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index ca1b5fdabbf8f..546700c2b0335 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -77,7 +77,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63;
; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p16;
; CHECK-NEXT: shl.b64 %rd123, %rd3, %r10;
-; CHECK-NEXT: mov.u64 %rd114, %rd117;
+; CHECK-NEXT: mov.b64 %rd114, %rd117;
; CHECK-NEXT: @%p15 bra $L__BB0_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd119;
@@ -93,7 +93,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1;
; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1;
; CHECK-NEXT: mov.b64 %rd114, 0;
-; CHECK-NEXT: mov.u64 %rd117, %rd114;
+; CHECK-NEXT: mov.b64 %rd117, %rd114;
; CHECK-NEXT: $L__BB0_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd83, %rd121, 63;
@@ -210,7 +210,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63;
; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p14;
; CHECK-NEXT: shl.b64 %rd109, %rd41, %r10;
-; CHECK-NEXT: mov.u64 %rd100, %rd103;
+; CHECK-NEXT: mov.b64 %rd100, %rd103;
; CHECK-NEXT: @%p13 bra $L__BB1_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd105;
@@ -226,7 +226,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd100, 0;
-; CHECK-NEXT: mov.u64 %rd103, %rd100;
+; CHECK-NEXT: mov.b64 %rd103, %rd100;
; CHECK-NEXT: $L__BB1_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd73, %rd107, 63;
@@ -386,7 +386,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63;
; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16;
; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10;
-; CHECK-NEXT: mov.u64 %rd109, %rd112;
+; CHECK-NEXT: mov.b64 %rd109, %rd112;
; CHECK-NEXT: @%p15 bra $L__BB4_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd114;
@@ -402,7 +402,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd109, 0;
-; CHECK-NEXT: mov.u64 %rd112, %rd109;
+; CHECK-NEXT: mov.b64 %rd112, %rd109;
; CHECK-NEXT: $L__BB4_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd84, %rd116, 63;
@@ -513,7 +513,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63;
; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14;
; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10;
-; CHECK-NEXT: mov.u64 %rd94, %rd97;
+; CHECK-NEXT: mov.b64 %rd94, %rd97;
; CHECK-NEXT: @%p13 bra $L__BB5_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd99;
@@ -529,7 +529,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1;
; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1;
; CHECK-NEXT: mov.b64 %rd94, 0;
-; CHECK-NEXT: mov.u64 %rd97, %rd94;
+; CHECK-NEXT: mov.b64 %rd97, %rd94;
; CHECK-NEXT: $L__BB5_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd73, %rd101, 63;
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index 3ae6300d8767d..4509fcfd1a9bc 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -20,7 +20,7 @@ define internal i32 @foo() {
; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov.u64 %SPL, __local_depot0;
+; CHECK-NEXT: mov.b64 %SPL, __local_depot0;
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NEXT: ld.global.u64 %rd1, [ptr];
; CHECK-NEXT: add.u64 %rd3, %SPL, 1;
@@ -63,7 +63,7 @@ define internal i32 @bar() {
; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %entry
-; CHECK-NEXT: mov.u64 %SPL, __local_depot1;
+; CHECK-NEXT: mov.b64 %SPL, __local_depot1;
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NEXT: ld.global.u64 %rd1, [ptr];
; CHECK-NEXT: add.u64 %rd3, %SPL, 8;
diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
index eaf6cf59dd066..311741f737adc 100644
--- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
+++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test1.ll
@@ -16,7 +16,7 @@ define void @test_b128_input_from_const() {
; CHECK-NEXT: mov.b64 %rd2, 0;
; CHECK-NEXT: mov.b64 %rd3, 42;
; CHECK-NEXT: mov.b128 %rq1, {%rd3, %rd2};
-; CHECK-NEXT: mov.u64 %rd4, value;
+; CHECK-NEXT: mov.b64 %rd4, value;
; CHECK-NEXT: cvta.global.u64 %rd1, %rd4;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
@@ -38,7 +38,7 @@ define void @test_b128_input_from_load(ptr nocapture readonly %data) {
; CHECK-NEXT: ld.global.u64 %rd4, [%rd3+8];
; CHECK-NEXT: ld.global.u64 %rd5, [%rd3];
; CHECK-NEXT: mov.b128 %rq1, {%rd5, %rd4};
-; CHECK-NEXT: mov.u64 %rd6, value;
+; CHECK-NEXT: mov.b64 %rd6, value;
; CHECK-NEXT: cvta.global.u64 %rd1, %rd6;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
@@ -67,7 +67,7 @@ define void @test_b128_input_from_select(ptr nocapture readonly %flag) {
; CHECK-NEXT: selp.b64 %rd4, 24, 42, %p1;
; CHECK-NEXT: mov.b64 %rd5, 0;
; CHECK-NEXT: mov.b128 %rq1, {%rd4, %rd5};
-; CHECK-NEXT: mov.u64 %rd6, value;
+; CHECK-NEXT: mov.b64 %rd6, value;
; CHECK-NEXT: cvta.global.u64 %rd1, %rd6;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: { st.b128 [%rd1], %rq1; }
diff --git a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll
index 8441c30e4c4d4..8ca863bba5f4a 100644
--- a/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll
+++ b/llvm/test/CodeGen/NVPTX/inline-asm-b128-test2.ll
@@ -25,7 +25,7 @@ define void @test_corner_values() {
; CHECK-NEXT: add.s64 %rd2, %rd1, 8;
; CHECK-NEXT: mov.b64 %rd13, -1;
; CHECK-NEXT: mov.b128 %rq1, {%rd13, %rd13};
-; CHECK-NEXT: mov.u64 %rd14, v_u128_max;
+; CHECK-NEXT: mov.b64 %rd14, v_u128_max;
; CHECK-NEXT: cvta.global.u64 %rd3, %rd14;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: {
@@ -42,7 +42,7 @@ define void @test_corner_values() {
; CHECK-NEXT: add.s64 %rd5, %rd15, 24;
; CHECK-NEXT: mov.b64 %rd16, 9223372036854775807;
; CHECK-NEXT: mov.b128 %rq2, {%rd13, %rd16};
-; CHECK-NEXT: mov.u64 %rd17, v_i128_max;
+; CHECK-NEXT: mov.b64 %rd17, v_i128_max;
; CHECK-NEXT: cvta.global.u64 %rd6, %rd17;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: {
@@ -60,7 +60,7 @@ define void @test_corner_values() {
; CHECK-NEXT: mov.b64 %rd19, -9223372036854775808;
; CHECK-NEXT: mov.b64 %rd20, 0;
; CHECK-NEXT: mov.b128 %rq3, {%rd20, %rd19};
-; CHECK-NEXT: mov.u64 %rd21, v_i128_min;
+; CHECK-NEXT: mov.b64 %rd21, v_i128_min;
; CHECK-NEXT: cvta.global.u64 %rd9, %rd21;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: {
@@ -76,7 +76,7 @@ define void @test_corner_values() {
; CHECK-NEXT: add.s64 %rd10, %rd22, 48;
; CHECK-NEXT: add.s64 %rd11, %rd22, 56;
; CHECK-NEXT: mov.b128 %rq4, {%rd20, %rd20};
-; CHECK-NEXT: mov.u64 %rd23, v_u128_zero;
+; CHECK-NEXT: mov.b64 %rd23, v_u128_zero;
; CHECK-NEXT: cvta.global.u64 %rd12, %rd23;
; CHECK-NEXT: // begin inline asm
; CHECK-NEXT: {
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index 3523ffe6ae3ca..f49053485fa29 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -15,7 +15,7 @@ define void @foo(i32 %a) {
; PTX32-NEXT: .reg .b32 %r<4>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
-; PTX32-NEXT: mov.u32 %SPL, __local_depot0;
+; PTX32-NEXT: mov.b32 %SPL, __local_depot0;
; PTX32-NEXT: ld.param.u32 %r1, [foo_param_0];
; PTX32-NEXT: add.u32 %r3, %SPL, 0;
; PTX32-NEXT: st.local.u32 [%r3], %r1;
@@ -30,7 +30,7 @@ define void @foo(i32 %a) {
; PTX64-NEXT: .reg .b64 %rd<3>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
-; PTX64-NEXT: mov.u64 %SPL, __local_depot0;
+; PTX64-NEXT: mov.b64 %SPL, __local_depot0;
; PTX64-NEXT: ld.param.u32 %r1, [foo_param_0];
; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
; PTX64-NEXT: st.local.u32 [%rd2], %r1;
@@ -49,7 +49,7 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX32-NEXT: .reg .b32 %r<4>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
-; PTX32-NEXT: mov.u32 %SPL, __local_depot1;
+; PTX32-NEXT: mov.b32 %SPL, __local_depot1;
; PTX32-NEXT: cvta.local.u32 %SP, %SPL;
; PTX32-NEXT: ld.param.u32 %r1, [foo2_param_0];
; PTX32-NEXT: add.u32 %r2, %SP, 0;
@@ -75,7 +75,7 @@ define ptx_kernel void @foo2(i32 %a) {
; PTX64-NEXT: .reg .b64 %rd<3>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
-; PTX64-NEXT: mov.u64 %SPL, __local_depot1;
+; PTX64-NEXT: mov.b64 %SPL, __local_depot1;
; PTX64-NEXT: cvta.local.u64 %SP, %SPL;
; PTX64-NEXT: ld.param.u32 %r1, [foo2_param_0];
; PTX64-NEXT: add.u64 %rd1, %SP, 0;
@@ -108,7 +108,7 @@ define void @foo3(i32 %a) {
; PTX32-NEXT: .reg .b32 %r<6>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
-; PTX32-NEXT: mov.u32 %SPL, __local_depot2;
+; PTX32-NEXT: mov.b32 %SPL, __local_depot2;
; PTX32-NEXT: ld.param.u32 %r1, [foo3_param_0];
; PTX32-NEXT: add.u32 %r3, %SPL, 0;
; PTX32-NEXT: shl.b32 %r4, %r1, 2;
@@ -125,7 +125,7 @@ define void @foo3(i32 %a) {
; PTX64-NEXT: .reg .b64 %rd<5>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
-; PTX64-NEXT: mov.u64 %SPL, __local_depot2;
+; PTX64-NEXT: mov.b64 %SPL, __local_depot2;
; PTX64-NEXT: ld.param.u32 %r1, [foo3_param_0];
; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
; PTX64-NEXT: mul.wide.s32 %rd3, %r1, 4;
@@ -147,7 +147,7 @@ define void @foo4() {
; PTX32-NEXT: .reg .b32 %r<6>;
; PTX32-EMPTY:
; PTX32-NEXT: // %bb.0:
-; PTX32-NEXT: mov.u32 %SPL, __local_depot3;
+; PTX32-NEXT: mov.b32 %SPL, __local_depot3;
; PTX32-NEXT: cvta.local.u32 %SP, %SPL;
; PTX32-NEXT: add.u32 %r1, %SP, 0;
; PTX32-NEXT: add.u32 %r2, %SPL, 0;
@@ -185,7 +185,7 @@ define void @foo4() {
; PTX64-NEXT: .reg .b64 %rd<5>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
-; PTX64-NEXT: mov.u64 %SPL, __local_depot3;
+; PTX64-NEXT: mov.b64 %SPL, __local_depot3;
; PTX64-NEXT: cvta.local.u64 %SP, %SPL;
; PTX64-NEXT: add.u64 %rd1, %SP, 0;
; PTX64-NEXT: add.u64 %rd2, %SPL, 0;
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 90f9306d036cd..e4e1f40d0d8b2 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -33,7 +33,7 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: ld.param.u8 %rs1, [non_kernel_function_param_1];
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
-; PTX-NEXT: mov.u64 %rd3, gi;
+; PTX-NEXT: mov.b64 %rd3, gi;
; PTX-NEXT: cvta.global.u64 %rd4, %rd3;
; PTX-NEXT: selp.b64 %rd5, %rd2, %rd4, %p1;
; PTX-NEXT: ld.param.s32 %rd6, [non_kernel_function_param_2];
@@ -81,7 +81,6 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
; OPT-NEXT: ret void
-;
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
store i32 %add, ptr %out
@@ -116,7 +115,6 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
; OPT-NEXT: ret void
-;
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%int1 = load i32, ptr %gep1
@@ -134,9 +132,9 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
-; PTX-NEXT: mov.u64 %rd3, %rd2;
+; PTX-NEXT: mov.b64 %rd3, %rd2;
; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
-; PTX-NEXT: mov.u64 %rd1, escape;
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd4;
@@ -157,7 +155,6 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
; OPT-NEXT: ret void
-;
%call = call i32 @escape(ptr %input)
ret void
}
@@ -172,19 +169,19 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: .reg .b64 %rd<10>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: mov.u64 %SPL, __local_depot4;
+; PTX-NEXT: mov.b64 %SPL, __local_depot4;
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0;
; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2;
-; PTX-NEXT: mov.u64 %rd4, %rd3;
+; PTX-NEXT: mov.b64 %rd4, %rd3;
; PTX-NEXT: ld.param.u32 %r1, [multiple_grid_const_escape_param_1];
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
-; PTX-NEXT: mov.u64 %rd6, %rd2;
+; PTX-NEXT: mov.b64 %rd6, %rd2;
; PTX-NEXT: cvta.param.u64 %rd7, %rd6;
; PTX-NEXT: add.u64 %rd8, %SP, 0;
; PTX-NEXT: add.u64 %rd9, %SPL, 0;
; PTX-NEXT: st.local.u32 [%rd9], %r1;
-; PTX-NEXT: mov.u64 %rd1, escape3;
+; PTX-NEXT: mov.b64 %rd1, escape3;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd7;
@@ -215,7 +212,6 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
; OPT-NEXT: ret void
-;
%a.addr = alloca i32, align 4
store i32 %a, ptr %a.addr, align 4
%call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
@@ -231,7 +227,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: mov.b64 %rd1, grid_const_memory_escape_param_0;
; PTX-NEXT: ld.param.u64 %rd2, [grid_const_memory_escape_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
-; PTX-NEXT: mov.u64 %rd4, %rd1;
+; PTX-NEXT: mov.b64 %rd4, %rd1;
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
; PTX-NEXT: st.global.u64 [%rd3], %rd5;
; PTX-NEXT: ret;
@@ -243,7 +239,6 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
; OPT-NEXT: ret void
-;
store ptr %input, ptr %addr, align 8
ret void
}
@@ -257,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: mov.b64 %rd4, grid_const_inlineasm_escape_param_0;
; PTX-NEXT: ld.param.u64 %rd5, [grid_const_inlineasm_escape_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd6, %rd5;
-; PTX-NEXT: mov.u64 %rd7, %rd4;
+; PTX-NEXT: mov.b64 %rd7, %rd4;
; PTX-NEXT: cvta.param.u64 %rd2, %rd7;
; PTX-NEXT: add.s64 %rd3, %rd2, 4;
; PTX-NEXT: // begin inline asm
@@ -277,7 +272,6 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
; OPT-NEXT: ret void
-;
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
@@ -295,12 +289,12 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0;
; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escape_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
-; PTX-NEXT: mov.u64 %rd5, %rd2;
+; PTX-NEXT: mov.b64 %rd5, %rd2;
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
; PTX-NEXT: ld.u32 %r1, [%rd6];
; PTX-NEXT: add.s32 %r2, %r1, %r1;
; PTX-NEXT: st.global.u32 [%rd4], %r2;
-; PTX-NEXT: mov.u64 %rd1, escape;
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd6;
@@ -326,7 +320,6 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
-;
%val = load i32, ptr %input
%twice = add i32 %val, %val
store i32 %twice, ptr %output
@@ -344,13 +337,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0;
; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escapemem_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
-; PTX-NEXT: mov.u64 %rd5, %rd2;
+; PTX-NEXT: mov.b64 %rd5, %rd2;
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
; PTX-NEXT: ld.u32 %r1, [%rd6];
; PTX-NEXT: ld.u32 %r2, [%rd6+4];
; PTX-NEXT: st.global.u64 [%rd4], %rd6;
; PTX-NEXT: add.s32 %r3, %r1, %r2;
-; PTX-NEXT: mov.u64 %rd1, escape;
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd6;
@@ -380,7 +373,6 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
-;
%ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%val1 = load i32, ptr %ptr1
%ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
@@ -402,7 +394,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: mov.b64 %rd5, grid_const_phi_param_0;
; PTX-NEXT: ld.param.u64 %rd6, [grid_const_phi_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd1, %rd6;
-; PTX-NEXT: mov.u64 %rd7, %rd5;
+; PTX-NEXT: mov.b64 %rd7, %rd5;
; PTX-NEXT: cvta.param.u64 %rd8, %rd7;
; PTX-NEXT: ld.global.u32 %r1, [%rd1];
; PTX-NEXT: setp.lt.s32 %p1, %r1, 0;
@@ -433,7 +425,6 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
-;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
@@ -463,14 +454,14 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; PTX-NEXT: mov.b64 %rd6, grid_const_phi_ngc_param_0;
; PTX-NEXT: ld.param.u64 %rd7, [grid_const_phi_ngc_param_2];
; PTX-NEXT: cvta.to.global.u64 %rd1, %rd7;
-; PTX-NEXT: mov.u64 %rd10, %rd6;
+; PTX-NEXT: mov.b64 %rd10, %rd6;
; PTX-NEXT: cvta.param.u64 %rd11, %rd10;
; PTX-NEXT: ld.global.u32 %r1, [%rd1];
; PTX-NEXT: setp.lt.s32 %p1, %r1, 0;
; PTX-NEXT: @%p1 bra $L__BB10_2;
; PTX-NEXT: // %bb.1: // %second
; PTX-NEXT: mov.b64 %rd8, grid_const_phi_ngc_param_1;
-; PTX-NEXT: mov.u64 %rd9, %rd8;
+; PTX-NEXT: mov.b64 %rd9, %rd8;
; PTX-NEXT: cvta.param.u64 %rd2, %rd9;
; PTX-NEXT: add.s64 %rd11, %rd2, 4;
; PTX-NEXT: $L__BB10_2: // %merge
@@ -499,7 +490,6 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
-;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
br i1 %less, label %first, label %second
@@ -529,9 +519,9 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; PTX-NEXT: ld.param.u64 %rd2, [grid_const_select_param_2];
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
; PTX-NEXT: mov.b64 %rd4, grid_const_select_param_1;
-; PTX-NEXT: mov.u64 %rd5, %rd4;
+; PTX-NEXT: mov.b64 %rd5, %rd4;
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
-; PTX-NEXT: mov.u64 %rd7, %rd1;
+; PTX-NEXT: mov.b64 %rd7, %rd1;
; PTX-NEXT: cvta.param.u64 %rd8, %rd7;
; PTX-NEXT: ld.global.u32 %r1, [%rd3];
; PTX-NEXT: setp.lt.s32 %p1, %r1, 0;
@@ -553,7 +543,6 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
-;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
%ptrnew = select i1 %less, ptr %input1, ptr %input2
@@ -570,7 +559,7 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd1, grid_const_ptrtoint_param_0;
-; PTX-NEXT: mov.u64 %rd2, %rd1;
+; PTX-NEXT: mov.b64 %rd2, %rd1;
; PTX-NEXT: ld.param.u32 %r1, [grid_const_ptrtoint_param_0];
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
; PTX-NEXT: cvt.u32.u64 %r2, %rd3;
@@ -585,7 +574,6 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]]
; OPT-NEXT: ret i32 [[KEEPALIVE]]
-;
%val = load i32, ptr %input
%ptrval = ptrtoint ptr %input to i32
%keepalive = add i32 %val, %ptrval
diff --git a/llvm/test/CodeGen/NVPTX/no-extra-parens.ll b/llvm/test/CodeGen/NVPTX/no-extra-parens.ll
index 16c397116dc38..214e3a57f912c 100644
--- a/llvm/test/CodeGen/NVPTX/no-extra-parens.ll
+++ b/llvm/test/CodeGen/NVPTX/no-extra-parens.ll
@@ -9,7 +9,7 @@
declare void @str2(ptr %str)
define void @str1() {
entry:
-;; CHECK: mov.u64 %rd{{[0-9]+}}, $str;
+;; CHECK: mov.b64 %rd{{[0-9]+}}, $str;
tail call void @str2(ptr addrspacecast (ptr addrspace(1) @"$str" to ptr))
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
index 41372c531de23..885c711d31f01 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
@@ -143,15 +143,15 @@ return:
}
; SM_52: .visible .func (.param .b32 func_retval0) phi()
-; SM_52: mov.f32 %[[REG:.+]], 0f00000000;
+; SM_52: mov.b32 %[[REG:.+]], 0f00000000;
; SM_52-NEXT: st.param.f32 [func_retval0], %[[REG]];
; SM_52-NEXT: ret;
; SM_70: .visible .func (.param .b32 func_retval0) phi()
-; SM_70: mov.f32 %[[REG:.+]], 0f00000000;
+; SM_70: mov.b32 %[[REG:.+]], 0f00000000;
; SM_70-NEXT: st.param.f32 [func_retval0], %[[REG]];
; SM_70-NEXT: ret;
; SM_90: .visible .func (.param .b32 func_retval0) phi()
-; SM_90: mov.f32 %[[REG:.+]], 0f00000000;
+; SM_90: mov.b32 %[[REG:.+]], 0f00000000;
; SM_90-NEXT: st.param.f32 [func_retval0], %[[REG]];
; SM_90-NEXT: ret;
define float @phi() {
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
index da1a449c5d51f..b95a3287474c4 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure-ptx.ll
@@ -110,7 +110,7 @@ define float @check_f32() {
; PTX-DAG: ld.param.f32 [[LD:%f[0-9]+]], [retval0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.f32 [[PROXY:%f[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: mov.b32 [[PROXY:%f[0-9]+]], [[LD]];
; PTX-WITHOUT-DAG: st.param.f32 [func_retval0], [[PROXY]];
; PTX-WITH-DAG: st.param.f32 [func_retval0], [[LD]];
@@ -125,7 +125,7 @@ define double @check_f64() {
; PTX-DAG: ld.param.f64 [[LD:%fd[0-9]+]], [retval0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.f64 [[PROXY:%fd[0-9]+]], [[LD]];
+ ; PTX-WITHOUT-DAG: mov.b64 [[PROXY:%fd[0-9]+]], [[LD]];
; PTX-WITHOUT-DAG: st.param.f64 [func_retval0], [[PROXY]];
; PTX-WITH-DAG: st.param.f64 [func_retval0], [[LD]];
@@ -173,8 +173,8 @@ define <2 x double> @check_vec_f64() {
; PTX-DAG: ld.param.v2.f64 {[[LD0:%fd[0-9]+]], [[LD1:%fd[0-9]+]]}, [retval0];
; PTX-DAG: } // callseq {{[0-9]+}}
- ; PTX-WITHOUT-DAG: mov.f64 [[PROXY0:%fd[0-9]+]], [[LD0]];
- ; PTX-WITHOUT-DAG: mov.f64 [[PROXY1:%fd[0-9]+]], [[LD1]];
+ ; PTX-WITHOUT-DAG: mov.b64 [[PROXY0:%fd[0-9]+]], [[LD0]];
+ ; PTX-WITHOUT-DAG: mov.b64 [[PROXY1:%fd[0-9]+]], [[LD1]];
; PTX-WITHOUT-DAG: st.param.v2.f64 [func_retval0], {[[PROXY0]], [[PROXY1]]};
; PTX-WITH-DAG: st.param.v2.f64 [func_retval0], {[[LD0]], [[LD1]]};
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index 5a7e40ce898df..35db4894c1b49 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -109,7 +109,7 @@ define dso_local i32 @foo() {
; CHECK-PTX-NEXT: .reg .b64 %rd<5>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
-; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot1;
+; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot1;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.b64 %rd1, 4294967297;
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
@@ -156,7 +156,7 @@ define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
-; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
+; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot2;
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 0;
@@ -217,7 +217,7 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .reg .b64 %rd<5>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
-; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
+; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot3;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [__const_$_bar_$_s1+7];
@@ -308,7 +308,7 @@ define dso_local i32 @baz() {
; CHECK-PTX-NEXT: .reg .b64 %rd<2>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
-; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot5;
+; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.v4.u32 [%SP], {%r1, %r1, %r1, %r1};
@@ -382,7 +382,7 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
-; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
+; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot7;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [__const_$_qux_$_s+8];
diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
index 31517939a4b75..8a9052c6f98f9 100644
--- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
+++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -5,7 +5,7 @@
; CHECK: .visible .func use_dbg_declare()
; CHECK: .local .align 8 .b8 __local_depot0[8];
-; CHECK: mov.u64 %SPL, __local_depot0;
+; CHECK: mov.b64 %SPL, __local_depot0;
; CHECK: add.u64 %rd1, %SP, 0;
; CHECK: .loc 1 5 3 // t.c:5:3
; CHECK: { // callseq 0, 0
More information about the llvm-commits
mailing list