[llvm] [NVPTX] Improve attributes for NVPTX Instructions (PR #93629)
Alex MacLean via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 18:12:28 PDT 2024
https://github.com/AlexMaclean created https://github.com/llvm/llvm-project/pull/93629
Add more descriptive attributes to some machine instructions to improve the behavior of Machine IR transformation passes.
>From 2bb80a7f4b1bde950d630b0b3ecf9489e529269f Mon Sep 17 00:00:00 2001
From: Alex MacLean <amaclean at nvidia.com>
Date: Tue, 28 May 2024 16:12:58 +0000
Subject: [PATCH] [NVPTX] Improve attributes for NVPTX Instructions
---
llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 58 ++++++++--------
llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 85 +++++++++++++-----------
llvm/test/CodeGen/NVPTX/atomics-sm70.ll | 6 +-
llvm/test/CodeGen/NVPTX/atomics-sm90.ll | 6 +-
4 files changed, 82 insertions(+), 73 deletions(-)
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index c4c35a1f74ba9..01505106a6d90 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1946,7 +1946,7 @@ def : Pat<(i16 (sext_inreg (trunc (srl (i64 Int64Regs:$s), (i32 imm:$o))), i8))
// FIXME: This doesn't cover versions of set and setp that combine with a
// boolean predicate, e.g. setp.eq.and.b16.
-let hasSideEffects = false in {
+let hasSideEffects = false, isCompare = true in {
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
@@ -2088,7 +2088,7 @@ let hasSideEffects = false in {
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
-let IsSimpleMove=1, hasSideEffects=0 in {
+let IsSimpleMove = true, hasSideEffects = false, isAsCheapAsAMove = true in {
def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
"mov.pred \t$dst, $sss;", []>;
def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
@@ -2112,34 +2112,34 @@ let IsSimpleMove=1, hasSideEffects=0 in {
"mov.f32 \t$dst, $src;", []>;
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
"mov.f64 \t$dst, $src;", []>;
-}
-def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
- "mov.pred \t$dst, $src;",
- [(set Int1Regs:$dst, imm:$src)]>;
-def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
- "mov.u16 \t$dst, $src;",
- [(set Int16Regs:$dst, imm:$src)]>;
-def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
- "mov.u32 \t$dst, $src;",
- [(set (i32 Int32Regs:$dst), imm:$src)]>;
-def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
- "mov.u64 \t$dst, $src;",
- [(set Int64Regs:$dst, imm:$src)]>;
-
-def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
- "mov.b16 \t$dst, $src;", []>;
-def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
- "mov.b32 \t$dst, $src;", []>;
-def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
- "mov.b64 \t$dst, $src;", []>;
-
-def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
- "mov.f32 \t$dst, $src;",
- [(set Float32Regs:$dst, fpimm:$src)]>;
-def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
- "mov.f64 \t$dst, $src;",
- [(set Float64Regs:$dst, fpimm:$src)]>;
+ def IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),
+ "mov.pred \t$dst, $src;",
+ [(set Int1Regs:$dst, imm:$src)]>;
+ def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.u16 \t$dst, $src;",
+ [(set Int16Regs:$dst, imm:$src)]>;
+ def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.u32 \t$dst, $src;",
+ [(set (i32 Int32Regs:$dst), imm:$src)]>;
+ def IMOV64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.u64 \t$dst, $src;",
+ [(set Int64Regs:$dst, imm:$src)]>;
+
+ def IMOVB16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),
+ "mov.b16 \t$dst, $src;", []>;
+ def IMOVB32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),
+ "mov.b32 \t$dst, $src;", []>;
+ def IMOVB64ri : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),
+ "mov.b64 \t$dst, $src;", []>;
+
+ def FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
+ "mov.f32 \t$dst, $src;",
+ [(set Float32Regs:$dst, fpimm:$src)]>;
+ def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
+ "mov.f64 \t$dst, $src;",
+ [(set Float64Regs:$dst, fpimm:$src)]>;
+}
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
def : Pat<(i64 (Wrapper texternalsym:$dst)), (IMOV64ri texternalsym:$dst)>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index 440af085cb8e9..e66bc58c5dfb4 100644
--- a/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1538,14 +1538,16 @@ multiclass F_ATOMIC_2_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, SDNode IMM, list<Predicate> Pred> {
- def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
- Requires<Pred>;
- def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
- Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
+ let mayLoad = true, mayStore = true, hasSideEffects = true in {
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
+ Requires<Pred>;
+ def imm : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, IMMType:$b),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b;", ""),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), IMM:$b))]>,
+ Requires<!if(!or(!eq(TypeStr, ".f16"), !eq(TypeStr, ".bf16")), [Predicate<"false">], Pred)>;
+ }
}
multiclass F_ATOMIC_2<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, SDNode IMM,
@@ -1561,15 +1563,16 @@ multiclass F_ATOMIC_2_NEG_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
list<Predicate> Pred> {
- def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
- !strconcat(
- "{{ \n\t",
- ".reg \t.s", TypeStr, " temp; \n\t",
- "neg.s", TypeStr, " \ttemp, $b; \n\t",
- "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
- "}}"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
- Requires<Pred>;
+ let mayLoad = true, mayStore = true, hasSideEffects = true in
+ def reg : NVPTXInst<(outs regclass:$dst), (ins ptrclass:$addr, regclass:$b),
+ !strconcat(
+ "{{ \n\t",
+ ".reg \t.s", TypeStr, " temp; \n\t",
+ "neg.s", TypeStr, " \ttemp, $b; \n\t",
+ "atom", SpaceStr, OpcStr, ".u", TypeStr, " \t$dst, [$addr], temp; \n\t",
+ "}}"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b)))]>,
+ Requires<Pred>;
}
multiclass F_ATOMIC_2_NEG<ValueType regT, NVPTXRegClass regclass, string SpaceStr,
string TypeStr, string OpcStr, PatFrag IntOp, list<Predicate> Pred = []> {
@@ -1584,29 +1587,31 @@ multiclass F_ATOMIC_3_imp<ValueType ptrT, NVPTXRegClass ptrclass,
ValueType regT, NVPTXRegClass regclass,
string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
Operand IMMType, list<Predicate> Pred> {
- def reg : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, regclass:$b, regclass:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
- Requires<Pred>;
-
- def imm1 : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, IMMType:$b, regclass:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
- Requires<Pred>;
-
- def imm2 : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, regclass:$b, IMMType:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
- Requires<Pred>;
-
- def imm3 : NVPTXInst<(outs regclass:$dst),
- (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
- !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
- [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
- Requires<Pred>;
+ let mayLoad = true, mayStore = true, hasSideEffects = true in {
+ def reg : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, regclass:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), (regT regclass:$c)))]>,
+ Requires<Pred>;
+
+ def imm1 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, regclass:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, (regT regclass:$c)))]>,
+ Requires<Pred>;
+
+ def imm2 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, regclass:$b, IMMType:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;", ""),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), (regT regclass:$b), imm:$c))]>,
+ Requires<Pred>;
+
+ def imm3 : NVPTXInst<(outs regclass:$dst),
+ (ins ptrclass:$addr, IMMType:$b, IMMType:$c),
+ !strconcat("atom", SpaceStr, OpcStr, TypeStr, " \t$dst, [$addr], $b, $c;"),
+ [(set (regT regclass:$dst), (IntOp (ptrT ptrclass:$addr), imm:$b, imm:$c))]>,
+ Requires<Pred>;
+ }
}
multiclass F_ATOMIC_3<ValueType regT, NVPTXRegClass regclass, string SpaceStr, string TypeStr,
string OpcStr, PatFrag IntOp, Operand IMMType, list<Predicate> Pred = []> {
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index 9cc45fbe313b7..2d42f32fc2242 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -94,7 +94,8 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: and.b32 %r10, %r22, -4;
; CHECKPTX62-NEXT: shl.b32 %r38, %r22, 3;
; CHECKPTX62-NEXT: and.b32 %r11, %r38, 24;
-; CHECKPTX62-NEXT: shl.b32 %r40, %r26, %r11;
+; CHECKPTX62-NEXT: mov.b32 %r39, 65535;
+; CHECKPTX62-NEXT: shl.b32 %r40, %r39, %r11;
; CHECKPTX62-NEXT: not.b32 %r12, %r40;
; CHECKPTX62-NEXT: ld.global.u32 %r56, [%r10];
; CHECKPTX62-NEXT: $L__BB0_5: // %atomicrmw.start27
@@ -114,7 +115,8 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: and.b32 %r16, %r23, -4;
; CHECKPTX62-NEXT: shl.b32 %r46, %r23, 3;
; CHECKPTX62-NEXT: and.b32 %r17, %r46, 24;
-; CHECKPTX62-NEXT: shl.b32 %r48, %r26, %r17;
+; CHECKPTX62-NEXT: mov.b32 %r47, 65535;
+; CHECKPTX62-NEXT: shl.b32 %r48, %r47, %r17;
; CHECKPTX62-NEXT: not.b32 %r18, %r48;
; CHECKPTX62-NEXT: ld.shared.u32 %r57, [%r16];
; CHECKPTX62-NEXT: $L__BB0_7: // %atomicrmw.start45
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
index 9301ea44c6936..1fb874357c883 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm90.ll
@@ -99,7 +99,8 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: and.b32 %r10, %r22, -4;
; CHECKPTX71-NEXT: shl.b32 %r38, %r22, 3;
; CHECKPTX71-NEXT: and.b32 %r11, %r38, 24;
-; CHECKPTX71-NEXT: shl.b32 %r40, %r26, %r11;
+; CHECKPTX71-NEXT: mov.b32 %r39, 65535;
+; CHECKPTX71-NEXT: shl.b32 %r40, %r39, %r11;
; CHECKPTX71-NEXT: not.b32 %r12, %r40;
; CHECKPTX71-NEXT: ld.global.u32 %r56, [%r10];
; CHECKPTX71-NEXT: $L__BB0_5: // %atomicrmw.start27
@@ -121,7 +122,8 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, bfloat
; CHECKPTX71-NEXT: and.b32 %r16, %r23, -4;
; CHECKPTX71-NEXT: shl.b32 %r46, %r23, 3;
; CHECKPTX71-NEXT: and.b32 %r17, %r46, 24;
-; CHECKPTX71-NEXT: shl.b32 %r48, %r26, %r17;
+; CHECKPTX71-NEXT: mov.b32 %r47, 65535;
+; CHECKPTX71-NEXT: shl.b32 %r48, %r47, %r17;
; CHECKPTX71-NEXT: not.b32 %r18, %r48;
; CHECKPTX71-NEXT: ld.shared.u32 %r57, [%r16];
; CHECKPTX71-NEXT: $L__BB0_7: // %atomicrmw.start45
More information about the llvm-commits
mailing list