[llvm] [NVPTX] (PR #129344)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 28 16:44:14 PST 2025
https://github.com/justinfargnoli created https://github.com/llvm/llvm-project/pull/129344
None
>From 1e9697d78972a8eaa2e818ecb71ab5517146af8b Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 27 Feb 2025 20:38:38 -0800
Subject: [PATCH 1/3] Initial commit
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 63 +++++++------------
llvm/test/CodeGen/NVPTX/atomics-sm70.ll | 8 +--
llvm/test/CodeGen/NVPTX/atomics-sm90.ll | 8 +--
llvm/test/CodeGen/NVPTX/cmpxchg.ll | 36 +++++------
llvm/test/CodeGen/NVPTX/div.ll | 4 +-
llvm/test/CodeGen/NVPTX/f16-instructions.ll | 4 +-
llvm/test/CodeGen/NVPTX/f16x2-instructions.ll | 2 +-
llvm/test/CodeGen/NVPTX/fma.ll | 2 +-
llvm/test/CodeGen/NVPTX/i128.ll | 16 ++---
.../CodeGen/NVPTX/lower-args-gridconstant.ll | 26 ++++----
.../CodeGen/NVPTX/nvvm-reflect-arch-O0.ll | 6 +-
11 files changed, 75 insertions(+), 100 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index f94d7099f1b0e..a853dcd5b8db1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1961,50 +1961,29 @@ let hasSideEffects = false in {
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
-let hasSideEffects=0, isAsCheapAsAMove=1 in {
- def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
- "mov.pred \t$dst, $sss;", []>;
- def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
- "mov.u16 \t$dst, $sss;", []>;
- def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
- "mov.u32 \t$dst, $sss;", []>;
- def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
- "mov.u64 \t$dst, $sss;", []>;
- def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$sss),
- "mov.b128 \t$dst, $sss;", []>;
-
- def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
- "mov.f32 \t$dst, $src;", []>;
- def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
- "mov.f64 \t$dst, $src;", []>;
-
- def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
- "mov.pred \t$dst, $src;",
- [(set i1:$dst, imm:$src)]>;
- def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
- "mov.b16 \t$dst, $src;",
- [(set i16:$dst, imm:$src)]>;
- def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
- "mov.b32 \t$dst, $src;",
- [(set i32:$dst, imm:$src)]>;
- def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
- "mov.b64 \t$dst, $src;",
- [(set i64:$dst, imm:$src)]>;
-
- def FMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins f16imm:$src),
- "mov.b16 \t$dst, $src;",
- [(set f16:$dst, fpimm:$src)]>;
- def BFMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins bf16imm:$src),
- "mov.b16 \t$dst, $src;",
- [(set bf16:$dst, fpimm:$src)]>;
- def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
- "mov.f32 \t$dst, $src;",
- [(set f32:$dst, fpimm:$src)]>;
- def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
- "mov.f64 \t$dst, $src;",
- [(set f64:$dst, fpimm:$src)]>;
+let hasSideEffects = false, isAsCheapAsAMove = true in {
+ multiclass MOV<RegisterClass RC, string OpStr, ValueType VT, Operand IMMType, SDNode ImmNode> {
+ def rr : NVPTXInst<(outs RC:$dst), (ins RC:$src),
+ "mov." # OpStr # " \t$dst, $src;", []>;
+ let isMoveImm = true in {
+ def ri : NVPTXInst<(outs RC:$dst), (ins IMMType:$src),
+ "mov." # OpStr # " \t$dst, $src;",
+ [(set VT:$dst, ImmNode:$src)]>;
+ }
+ }
}
+defm IMOV1 : MOV<Int1Regs, "pred", i1, i1imm, imm>;
+defm IMOV16 : MOV<Int16Regs, "b16", i16, i16imm, imm>;
+defm IMOV32 : MOV<Int32Regs, "b32", i32, i32imm, imm>;
+defm IMOV64 : MOV<Int64Regs, "b64", i64, i64imm, imm>;
+def IMOV128rr : NVPTXInst<(outs Int128Regs:$dst), (ins Int128Regs:$src),
+ "mov.b128 \t$dst, $src;", []>;
+defm FMOV16 : MOV<Int16Regs, "b16", f16, f16imm, fpimm>;
+defm BFMOV16 : MOV<Int16Regs, "b16", bf16, bf16imm, fpimm>;
+defm FMOV32 : MOV<Float32Regs, "b32", f32, f32imm, fpimm>;
+defm FMOV64 : MOV<Float64Regs, "b64", f64, f64imm, fpimm>;
+
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index b180928af82a4..b14295020bc0e 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -72,7 +72,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX62-NEXT: atom.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX62-NEXT: setp.ne.s32 %p1, %r6, %r54;
-; CHECKPTX62-NEXT: mov.u32 %r54, %r6;
+; CHECKPTX62-NEXT: mov.b32 %r54, %r6;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX62-NEXT: ld.u32 %r55, [%r1];
@@ -88,7 +88,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX62-NEXT: atom.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX62-NEXT: setp.ne.s32 %p2, %r9, %r55;
-; CHECKPTX62-NEXT: mov.u32 %r55, %r9;
+; CHECKPTX62-NEXT: mov.b32 %r55, %r9;
; CHECKPTX62-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX62-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
@@ -109,7 +109,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX62-NEXT: atom.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX62-NEXT: setp.ne.s32 %p3, %r15, %r56;
-; CHECKPTX62-NEXT: mov.u32 %r56, %r15;
+; CHECKPTX62-NEXT: mov.b32 %r56, %r15;
; CHECKPTX62-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX62-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
@@ -130,7 +130,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX62-NEXT: atom.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX62-NEXT: setp.ne.s32 %p4, %r21, %r57;
-; CHECKPTX62-NEXT: mov.u32 %r57, %r21;
+; CHECKPTX62-NEXT: mov.b32 %r57, %r21;
; CHECKPTX62-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX62-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX62-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index 9027bd6a14780..f27e574724ce4 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -73,7 +73,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r32, %r31, %r30;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r6, [%r1], %r54, %r32;
; CHECKPTX71-NEXT: setp.ne.s32 %p1, %r6, %r54;
-; CHECKPTX71-NEXT: mov.u32 %r54, %r6;
+; CHECKPTX71-NEXT: mov.b32 %r54, %r6;
; CHECKPTX71-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX71-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX71-NEXT: ld.u32 %r55, [%r1];
@@ -89,7 +89,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r37, %r36, %r35;
; CHECKPTX71-NEXT: atom.relaxed.cas.b32 %r9, [%r1], %r55, %r37;
; CHECKPTX71-NEXT: setp.ne.s32 %p2, %r9, %r55;
-; CHECKPTX71-NEXT: mov.u32 %r55, %r9;
+; CHECKPTX71-NEXT: mov.b32 %r55, %r9;
; CHECKPTX71-NEXT: @%p2 bra $L__BB0_3;
; CHECKPTX71-NEXT: // %bb.4: // %atomicrmw.end26
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
@@ -111,7 +111,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r45, %r44, %r43;
; CHECKPTX71-NEXT: atom.relaxed.global.cas.b32 %r15, [%r10], %r56, %r45;
; CHECKPTX71-NEXT: setp.ne.s32 %p3, %r15, %r56;
-; CHECKPTX71-NEXT: mov.u32 %r56, %r15;
+; CHECKPTX71-NEXT: mov.b32 %r56, %r15;
; CHECKPTX71-NEXT: @%p3 bra $L__BB0_5;
; CHECKPTX71-NEXT: // %bb.6: // %atomicrmw.end8
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
@@ -133,7 +133,7 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: or.b32 %r53, %r52, %r51;
; CHECKPTX71-NEXT: atom.relaxed.shared.cas.b32 %r21, [%r16], %r57, %r53;
; CHECKPTX71-NEXT: setp.ne.s32 %p4, %r21, %r57;
-; CHECKPTX71-NEXT: mov.u32 %r57, %r21;
+; CHECKPTX71-NEXT: mov.b32 %r57, %r21;
; CHECKPTX71-NEXT: @%p4 bra $L__BB0_7;
; CHECKPTX71-NEXT: // %bb.8: // %atomicrmw.end
; CHECKPTX71-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/cmpxchg.ll b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
index aaea0d2ee25ef..fd721a1bb0371 100644
--- a/llvm/test/CodeGen/NVPTX/cmpxchg.ll
+++ b/llvm/test/CodeGen/NVPTX/cmpxchg.ll
@@ -1068,12 +1068,11 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM30-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
-; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
-; SM30-NEXT: @%p2 bra $L__BB8_1;
-; SM30-NEXT: $L__BB8_3: // %partword.cmpxchg.end
-; SM30-NEXT: membar.sys;
-; SM30-NEXT: st.param.b32 [func_retval0], %r14;
+; SM30-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM30-NEXT: mov.b32 %r20, %r8;
+; SM30-NEXT: @%p2 bra $L__BB0_1;
+; SM30-NEXT: $L__BB0_3: // %partword.cmpxchg.end
+; SM30-NEXT: st.param.b32 [func_retval0], %r13;
; SM30-NEXT: ret;
;
; SM70-LABEL: acq_rel_sys_i16(
@@ -1110,12 +1109,11 @@ define i16 @acq_rel_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // %bb.2: // %partword.cmpxchg.failure
; SM70-NEXT: // in Loop: Header=BB8_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
-; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
-; SM70-NEXT: @%p2 bra $L__BB8_1;
-; SM70-NEXT: $L__BB8_3: // %partword.cmpxchg.end
-; SM70-NEXT: fence.acq_rel.sys;
-; SM70-NEXT: st.param.b32 [func_retval0], %r14;
+; SM70-NEXT: setp.ne.s32 %p2, %r20, %r8;
+; SM70-NEXT: mov.b32 %r20, %r8;
+; SM70-NEXT: @%p2 bra $L__BB0_1;
+; SM70-NEXT: $L__BB0_3: // %partword.cmpxchg.end
+; SM70-NEXT: st.param.b32 [func_retval0], %r13;
; SM70-NEXT: ret;
; SM90-LABEL: acq_rel_sys_i16(
; SM90: {
@@ -1199,10 +1197,9 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM30-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM30-NEXT: and.b32 %r8, %r7, %r2;
; SM30-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM30-NEXT: mov.u32 %r19, %r8;
-; SM30-NEXT: @%p2 bra $L__BB9_1;
-; SM30-NEXT: $L__BB9_3: // %partword.cmpxchg.end
-; SM30-NEXT: membar.sys;
+; SM30-NEXT: mov.b32 %r19, %r8;
+; SM30-NEXT: @%p2 bra $L__BB1_1;
+; SM30-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM30-NEXT: st.param.b32 [func_retval0], %r14;
; SM30-NEXT: ret;
;
@@ -1241,10 +1238,9 @@ define i16 @seq_cst_sys_i16(ptr %addr, i16 %cmp, i16 %new) {
; SM70-NEXT: // in Loop: Header=BB9_1 Depth=1
; SM70-NEXT: and.b32 %r8, %r7, %r2;
; SM70-NEXT: setp.ne.s32 %p2, %r19, %r8;
-; SM70-NEXT: mov.u32 %r19, %r8;
-; SM70-NEXT: @%p2 bra $L__BB9_1;
-; SM70-NEXT: $L__BB9_3: // %partword.cmpxchg.end
-; SM70-NEXT: fence.acq_rel.sys;
+; SM70-NEXT: mov.b32 %r19, %r8;
+; SM70-NEXT: @%p2 bra $L__BB1_1;
+; SM70-NEXT: $L__BB1_3: // %partword.cmpxchg.end
; SM70-NEXT: st.param.b32 [func_retval0], %r14;
; SM70-NEXT: ret;
; SM90-LABEL: seq_cst_sys_i16(
diff --git a/llvm/test/CodeGen/NVPTX/div.ll b/llvm/test/CodeGen/NVPTX/div.ll
index 3d14d36ed599b..4f9d58758ca9e 100644
--- a/llvm/test/CodeGen/NVPTX/div.ll
+++ b/llvm/test/CodeGen/NVPTX/div.ll
@@ -11,10 +11,10 @@ define float @div_full(float %a, float %b) {
; CHECK-NEXT: ld.param.f32 %f1, [div_full_param_0];
; CHECK-NEXT: ld.param.f32 %f2, [div_full_param_1];
; CHECK-NEXT: div.full.f32 %f3, %f1, %f2;
-; CHECK-NEXT: mov.f32 %f4, 0f40400000;
+; CHECK-NEXT: mov.b32 %f4, 0f40400000;
; CHECK-NEXT: div.full.f32 %f5, %f3, %f4;
; CHECK-NEXT: div.full.ftz.f32 %f6, %f5, %f2;
-; CHECK-NEXT: mov.f32 %f7, 0f40800000;
+; CHECK-NEXT: mov.b32 %f7, 0f40800000;
; CHECK-NEXT: div.full.ftz.f32 %f8, %f6, %f7;
; CHECK-NEXT: st.param.f32 [func_retval0], %f8;
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/f16-instructions.ll b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
index f78cfc3172621..70d1167bbb6e2 100644
--- a/llvm/test/CodeGen/NVPTX/f16-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16-instructions.ll
@@ -138,7 +138,7 @@ define half @test_fsub(half %a, half %b) #0 {
; CHECK-F16-FTZ-NEXT: mov.b16 [[Z:%rs[0-9]+]], 0x0000
; CHECK-F16-FTZ-NEXT: sub.rn.ftz.f16 [[R:%rs[0-9]+]], [[Z]], [[A]];
; CHECK-NOF16-DAG: cvt.f32.f16 [[A32:%f[0-9]+]], [[A]]
-; CHECK-NOF16-DAG: mov.f32 [[Z:%f[0-9]+]], 0f00000000;
+; CHECK-NOF16-DAG: mov.b32 [[Z:%f[0-9]+]], 0f00000000;
; CHECK-NOF16-NEXT: sub.rn.f32 [[R32:%f[0-9]+]], [[Z]], [[A32]];
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%rs[0-9]+]], [[R32]]
; CHECK-NEXT: st.param.b16 [func_retval0], [[R]];
@@ -646,7 +646,7 @@ else:
; CHECK: ld.param.u64 %[[P1:rd[0-9]+]], [test_phi_param_0];
; CHECK: ld.b16 {{%rs[0-9]+}}, [%[[P1]]];
; CHECK: [[LOOP:\$L__BB[0-9_]+]]:
-; CHECK: mov.u16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]];
+; CHECK: mov.b16 [[R:%rs[0-9]+]], [[AB:%rs[0-9]+]];
; CHECK: ld.b16 [[AB:%rs[0-9]+]], [%[[P1]]];
; CHECK: {
; CHECK: st.param.b64 [param0], %[[P1]];
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 1905fec8ab7a8..539e810c83cbd 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -260,7 +260,7 @@ define <2 x half> @test_fneg(<2 x half> %a) #0 {
; CHECK-NOF16-NEXT: ld.param.b32 %r1, [test_fneg_param_0];
; CHECK-NOF16-NEXT: mov.b32 {%rs1, %rs2}, %r1;
; CHECK-NOF16-NEXT: cvt.f32.f16 %f1, %rs2;
-; CHECK-NOF16-NEXT: mov.f32 %f2, 0f00000000;
+; CHECK-NOF16-NEXT: mov.b32 %f2, 0f00000000;
; CHECK-NOF16-NEXT: sub.rn.f32 %f3, %f2, %f1;
; CHECK-NOF16-NEXT: cvt.rn.f16.f32 %rs3, %f3;
; CHECK-NOF16-NEXT: cvt.f32.f16 %f4, %rs1;
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index 3416420367beb..90fbd5ba9dfd6 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -50,7 +50,7 @@ define ptx_device float @f32_iir(float %x) {
}
define ptx_device float @f32_iii(float %x) {
-; CHECK: mov.f32 %f{{[0-9]+}}, 0f41200000;
+; CHECK: mov.b32 %f{{[0-9]+}}, 0f41200000;
; CHECK: ret;
%r = call float @llvm.fma.f32(float 2.0, float 3.0, float 4.0)
ret float %r
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index ca1b5fdabbf8f..546700c2b0335 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -77,7 +77,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63;
; CHECK-NEXT: selp.b64 %rd124, %rd76, %rd75, %p16;
; CHECK-NEXT: shl.b64 %rd123, %rd3, %r10;
-; CHECK-NEXT: mov.u64 %rd114, %rd117;
+; CHECK-NEXT: mov.b64 %rd114, %rd117;
; CHECK-NEXT: @%p15 bra $L__BB0_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd119;
@@ -93,7 +93,7 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd35, %rd5, -1;
; CHECK-NEXT: addc.cc.s64 %rd36, %rd6, -1;
; CHECK-NEXT: mov.b64 %rd114, 0;
-; CHECK-NEXT: mov.u64 %rd117, %rd114;
+; CHECK-NEXT: mov.b64 %rd117, %rd114;
; CHECK-NEXT: $L__BB0_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd83, %rd121, 63;
@@ -210,7 +210,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63;
; CHECK-NEXT: selp.b64 %rd110, %rd66, %rd65, %p14;
; CHECK-NEXT: shl.b64 %rd109, %rd41, %r10;
-; CHECK-NEXT: mov.u64 %rd100, %rd103;
+; CHECK-NEXT: mov.b64 %rd100, %rd103;
; CHECK-NEXT: @%p13 bra $L__BB1_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd105;
@@ -226,7 +226,7 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd33, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd34, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd100, 0;
-; CHECK-NEXT: mov.u64 %rd103, %rd100;
+; CHECK-NEXT: mov.b64 %rd103, %rd100;
; CHECK-NEXT: $L__BB1_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd73, %rd107, 63;
@@ -386,7 +386,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p16, %r10, 63;
; CHECK-NEXT: selp.b64 %rd119, %rd77, %rd76, %p16;
; CHECK-NEXT: shl.b64 %rd118, %rd1, %r10;
-; CHECK-NEXT: mov.u64 %rd109, %rd112;
+; CHECK-NEXT: mov.b64 %rd109, %rd112;
; CHECK-NEXT: @%p15 bra $L__BB4_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd114;
@@ -402,7 +402,7 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd35, %rd3, -1;
; CHECK-NEXT: addc.cc.s64 %rd36, %rd4, -1;
; CHECK-NEXT: mov.b64 %rd109, 0;
-; CHECK-NEXT: mov.u64 %rd112, %rd109;
+; CHECK-NEXT: mov.b64 %rd112, %rd109;
; CHECK-NEXT: $L__BB4_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd84, %rd116, 63;
@@ -513,7 +513,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: setp.gt.s32 %p14, %r10, 63;
; CHECK-NEXT: selp.b64 %rd104, %rd66, %rd65, %p14;
; CHECK-NEXT: shl.b64 %rd103, %rd41, %r10;
-; CHECK-NEXT: mov.u64 %rd94, %rd97;
+; CHECK-NEXT: mov.b64 %rd94, %rd97;
; CHECK-NEXT: @%p13 bra $L__BB5_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r13, %rd99;
@@ -529,7 +529,7 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: add.cc.s64 %rd33, %rd43, -1;
; CHECK-NEXT: addc.cc.s64 %rd34, %rd44, -1;
; CHECK-NEXT: mov.b64 %rd94, 0;
-; CHECK-NEXT: mov.u64 %rd97, %rd94;
+; CHECK-NEXT: mov.b64 %rd97, %rd94;
; CHECK-NEXT: $L__BB5_2: // %udiv-do-while
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: shr.u64 %rd73, %rd101, 63;
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 90f9306d036cd..010eafdf2f2ac 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -134,7 +134,7 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
-; PTX-NEXT: mov.u64 %rd3, %rd2;
+; PTX-NEXT: mov.b64 %rd3, %rd2;
; PTX-NEXT: cvta.param.u64 %rd4, %rd3;
; PTX-NEXT: mov.u64 %rd1, escape;
; PTX-NEXT: { // callseq 0, 0
@@ -176,10 +176,10 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_0;
; PTX-NEXT: mov.b64 %rd3, multiple_grid_const_escape_param_2;
-; PTX-NEXT: mov.u64 %rd4, %rd3;
+; PTX-NEXT: mov.b64 %rd4, %rd3;
; PTX-NEXT: ld.param.u32 %r1, [multiple_grid_const_escape_param_1];
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
-; PTX-NEXT: mov.u64 %rd6, %rd2;
+; PTX-NEXT: mov.b64 %rd6, %rd2;
; PTX-NEXT: cvta.param.u64 %rd7, %rd6;
; PTX-NEXT: add.u64 %rd8, %SP, 0;
; PTX-NEXT: add.u64 %rd9, %SPL, 0;
@@ -231,7 +231,7 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; PTX-NEXT: mov.b64 %rd1, grid_const_memory_escape_param_0;
; PTX-NEXT: ld.param.u64 %rd2, [grid_const_memory_escape_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
-; PTX-NEXT: mov.u64 %rd4, %rd1;
+; PTX-NEXT: mov.b64 %rd4, %rd1;
; PTX-NEXT: cvta.param.u64 %rd5, %rd4;
; PTX-NEXT: st.global.u64 [%rd3], %rd5;
; PTX-NEXT: ret;
@@ -257,7 +257,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: mov.b64 %rd4, grid_const_inlineasm_escape_param_0;
; PTX-NEXT: ld.param.u64 %rd5, [grid_const_inlineasm_escape_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd6, %rd5;
-; PTX-NEXT: mov.u64 %rd7, %rd4;
+; PTX-NEXT: mov.b64 %rd7, %rd4;
; PTX-NEXT: cvta.param.u64 %rd2, %rd7;
; PTX-NEXT: add.s64 %rd3, %rd2, 4;
; PTX-NEXT: // begin inline asm
@@ -295,7 +295,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escape_param_0;
; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escape_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
-; PTX-NEXT: mov.u64 %rd5, %rd2;
+; PTX-NEXT: mov.b64 %rd5, %rd2;
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
; PTX-NEXT: ld.u32 %r1, [%rd6];
; PTX-NEXT: add.s32 %r2, %r1, %r1;
@@ -344,7 +344,7 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: mov.b64 %rd2, grid_const_partial_escapemem_param_0;
; PTX-NEXT: ld.param.u64 %rd3, [grid_const_partial_escapemem_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd4, %rd3;
-; PTX-NEXT: mov.u64 %rd5, %rd2;
+; PTX-NEXT: mov.b64 %rd5, %rd2;
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
; PTX-NEXT: ld.u32 %r1, [%rd6];
; PTX-NEXT: ld.u32 %r2, [%rd6+4];
@@ -402,7 +402,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; PTX-NEXT: mov.b64 %rd5, grid_const_phi_param_0;
; PTX-NEXT: ld.param.u64 %rd6, [grid_const_phi_param_1];
; PTX-NEXT: cvta.to.global.u64 %rd1, %rd6;
-; PTX-NEXT: mov.u64 %rd7, %rd5;
+; PTX-NEXT: mov.b64 %rd7, %rd5;
; PTX-NEXT: cvta.param.u64 %rd8, %rd7;
; PTX-NEXT: ld.global.u32 %r1, [%rd1];
; PTX-NEXT: setp.lt.s32 %p1, %r1, 0;
@@ -463,14 +463,14 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; PTX-NEXT: mov.b64 %rd6, grid_const_phi_ngc_param_0;
; PTX-NEXT: ld.param.u64 %rd7, [grid_const_phi_ngc_param_2];
; PTX-NEXT: cvta.to.global.u64 %rd1, %rd7;
-; PTX-NEXT: mov.u64 %rd10, %rd6;
+; PTX-NEXT: mov.b64 %rd10, %rd6;
; PTX-NEXT: cvta.param.u64 %rd11, %rd10;
; PTX-NEXT: ld.global.u32 %r1, [%rd1];
; PTX-NEXT: setp.lt.s32 %p1, %r1, 0;
; PTX-NEXT: @%p1 bra $L__BB10_2;
; PTX-NEXT: // %bb.1: // %second
; PTX-NEXT: mov.b64 %rd8, grid_const_phi_ngc_param_1;
-; PTX-NEXT: mov.u64 %rd9, %rd8;
+; PTX-NEXT: mov.b64 %rd9, %rd8;
; PTX-NEXT: cvta.param.u64 %rd2, %rd9;
; PTX-NEXT: add.s64 %rd11, %rd2, 4;
; PTX-NEXT: $L__BB10_2: // %merge
@@ -529,9 +529,9 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; PTX-NEXT: ld.param.u64 %rd2, [grid_const_select_param_2];
; PTX-NEXT: cvta.to.global.u64 %rd3, %rd2;
; PTX-NEXT: mov.b64 %rd4, grid_const_select_param_1;
-; PTX-NEXT: mov.u64 %rd5, %rd4;
+; PTX-NEXT: mov.b64 %rd5, %rd4;
; PTX-NEXT: cvta.param.u64 %rd6, %rd5;
-; PTX-NEXT: mov.u64 %rd7, %rd1;
+; PTX-NEXT: mov.b64 %rd7, %rd1;
; PTX-NEXT: cvta.param.u64 %rd8, %rd7;
; PTX-NEXT: ld.global.u32 %r1, [%rd3];
; PTX-NEXT: setp.lt.s32 %p1, %r1, 0;
@@ -570,7 +570,7 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd1, grid_const_ptrtoint_param_0;
-; PTX-NEXT: mov.u64 %rd2, %rd1;
+; PTX-NEXT: mov.b64 %rd2, %rd1;
; PTX-NEXT: ld.param.u32 %r1, [grid_const_ptrtoint_param_0];
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
; PTX-NEXT: cvt.u32.u64 %r2, %rd3;
diff --git a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
index 41372c531de23..885c711d31f01 100644
--- a/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
+++ b/llvm/test/CodeGen/NVPTX/nvvm-reflect-arch-O0.ll
@@ -143,15 +143,15 @@ return:
}
; SM_52: .visible .func (.param .b32 func_retval0) phi()
-; SM_52: mov.f32 %[[REG:.+]], 0f00000000;
+; SM_52: mov.b32 %[[REG:.+]], 0f00000000;
; SM_52-NEXT: st.param.f32 [func_retval0], %[[REG]];
; SM_52-NEXT: ret;
; SM_70: .visible .func (.param .b32 func_retval0) phi()
-; SM_70: mov.f32 %[[REG:.+]], 0f00000000;
+; SM_70: mov.b32 %[[REG:.+]], 0f00000000;
; SM_70-NEXT: st.param.f32 [func_retval0], %[[REG]];
; SM_70-NEXT: ret;
; SM_90: .visible .func (.param .b32 func_retval0) phi()
-; SM_90: mov.f32 %[[REG:.+]], 0f00000000;
+; SM_90: mov.b32 %[[REG:.+]], 0f00000000;
; SM_90-NEXT: st.param.f32 [func_retval0], %[[REG]];
; SM_90-NEXT: ret;
define float @phi() {
>From f7d1a055fd94e2da74cdd7f8536e77fcd7eb9ccc Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Thu, 27 Feb 2025 20:50:39 -0800
Subject: [PATCH 2/3] Revert "Initial commit"
This reverts commit ee5d75a95362675f655bb951240dcc5055f471fe.
---
.../Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 14 ++++++--------
llvm/lib/Target/NVPTX/NVPTXInstrFormats.td | 18 ++++++++++--------
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index ef1f5da34cccd..d06e2c00ec3f9 100644
--- a/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -24,14 +24,12 @@ using namespace NVPTXAS;
namespace NVPTXII {
enum {
// These must be kept in sync with TSFlags in NVPTXInstrFormats.td
- // clang-format off
- IsTexFlag = 0x40,
- IsSuldMask = 0x180,
- IsSuldShift = 0x7,
- IsSustFlag = 0x200,
- IsSurfTexQueryFlag = 0x400,
- IsTexModeUnifiedFlag = 0x800,
- // clang-format on
+ IsTexFlag = 0x80,
+ IsSuldMask = 0x300,
+ IsSuldShift = 8,
+ IsSustFlag = 0x400,
+ IsSurfTexQueryFlag = 0x800,
+ IsTexModeUnifiedFlag = 0x1000
};
} // namespace NVPTXII
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td b/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
index 86dcb4a9384f1..9220f4766d92c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrFormats.td
@@ -31,6 +31,7 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// TSFlagFields
bits<4> VecInstType = VecNOP.Value;
+ bit IsSimpleMove = false;
bit IsLoad = false;
bit IsStore = false;
@@ -45,12 +46,13 @@ class NVPTXInst<dag outs, dag ins, string asmstr, list<dag> pattern>
// 2**(2-1) = 2.
bits<2> IsSuld = 0;
- let TSFlags{3...0} = VecInstType;
- let TSFlags{4} = IsLoad;
- let TSFlags{5} = IsStore;
- let TSFlags{6} = IsTex;
- let TSFlags{8...7} = IsSuld;
- let TSFlags{9} = IsSust;
- let TSFlags{10} = IsSurfTexQuery;
- let TSFlags{11} = IsTexModeUnified;
+ let TSFlags{3...0} = VecInstType;
+ let TSFlags{4...4} = IsSimpleMove;
+ let TSFlags{5...5} = IsLoad;
+ let TSFlags{6...6} = IsStore;
+ let TSFlags{7} = IsTex;
+ let TSFlags{9...8} = IsSuld;
+ let TSFlags{10} = IsSust;
+ let TSFlags{11} = IsSurfTexQuery;
+ let TSFlags{12} = IsTexModeUnified;
}
>From 7a13e4f316f695a0d56f835994bcf677c890ecd0 Mon Sep 17 00:00:00 2001
From: Justin Fargnoli <jfargnoli at nvidia.com>
Date: Fri, 28 Feb 2025 13:19:34 -0800
Subject: [PATCH 3/3] Remove isMoveImm
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index a853dcd5b8db1..b967bb6b1dd13 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1965,11 +1965,9 @@ let hasSideEffects = false, isAsCheapAsAMove = true in {
multiclass MOV<RegisterClass RC, string OpStr, ValueType VT, Operand IMMType, SDNode ImmNode> {
def rr : NVPTXInst<(outs RC:$dst), (ins RC:$src),
"mov." # OpStr # " \t$dst, $src;", []>;
- let isMoveImm = true in {
- def ri : NVPTXInst<(outs RC:$dst), (ins IMMType:$src),
- "mov." # OpStr # " \t$dst, $src;",
- [(set VT:$dst, ImmNode:$src)]>;
- }
+ def ri : NVPTXInst<(outs RC:$dst), (ins IMMType:$src),
+ "mov." # OpStr # " \t$dst, $src;",
+ [(set VT:$dst, ImmNode:$src)]>;
}
}
More information about the llvm-commits
mailing list