[llvm] 6c372da - [AMDGPU] New GFX11 intrinsic llvm.amdgcn.s.sendmsg.rtn
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 10 00:21:22 PDT 2022
Author: Jay Foad
Date: 2022-06-10T08:15:23+01:00
New Revision: 6c372daa84d4c7a40274c4b6de27445cede46fb7
URL: https://github.com/llvm/llvm-project/commit/6c372daa84d4c7a40274c4b6de27445cede46fb7
DIFF: https://github.com/llvm/llvm-project/commit/6c372daa84d4c7a40274c4b6de27445cede46fb7.diff
LOG: [AMDGPU] New GFX11 intrinsic llvm.amdgcn.s.sendmsg.rtn
Add new intrinsic and codegen support for the s_sendmsg_rtn_b32 and
s_sendmsg_rtn_b64 instructions.
Differential Revision: https://reviews.llvm.org/D127315
Added:
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/lib/Target/AMDGPU/SOPInstructions.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 59444c2cb423b..354242433b442 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -207,6 +207,11 @@ def int_amdgcn_s_sendmsghalt : GCCBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
Intrinsic <[], [llvm_i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
+// gfx11 intrinsic
+// The first parameter is s_sendmsg immediate (i16). Return type is i32 or i64.
+def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty],
+ [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
+
def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d4e9af0dfb96c..950ffa694f299 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4497,7 +4497,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_s_getreg:
case Intrinsic::amdgcn_s_memtime:
case Intrinsic::amdgcn_s_memrealtime:
- case Intrinsic::amdgcn_s_get_waveid_in_workgroup: {
+ case Intrinsic::amdgcn_s_get_waveid_in_workgroup:
+ case Intrinsic::amdgcn_s_sendmsg_rtn: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 842d8618e4144..96efef2cc0515 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1361,6 +1361,8 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
} else {
switch (Inst.getOpcode()) {
case AMDGPU::S_SENDMSG:
+ case AMDGPU::S_SENDMSG_RTN_B32:
+ case AMDGPU::S_SENDMSG_RTN_B64:
case AMDGPU::S_SENDMSGHALT:
ScoreBrackets->updateByEvent(TII, TRI, MRI, SQ_MESSAGE, Inst);
break;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 62eaad5641252..37d20045adb52 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -390,11 +390,11 @@ let SubtargetPredicate = isGFX11Plus in {
// is not an SGPR number.
def S_SENDMSG_RTN_B32 : SOP1_Pseudo<
"s_sendmsg_rtn_b32", (outs SReg_32:$sdst), (ins SendMsgImm:$src0),
- "$sdst, $src0"
+ "$sdst, $src0", [(set i32:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))]
>;
def S_SENDMSG_RTN_B64 : SOP1_Pseudo<
"s_sendmsg_rtn_b64", (outs SReg_64:$sdst), (ins SendMsgImm:$src0),
- "$sdst, $src0"
+ "$sdst, $src0", [(set i64:$sdst, (int_amdgcn_s_sendmsg_rtn timm:$src0))]
>;
}
} // End SubtargetPredicate = isGFX11Plus
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
new file mode 100644
index 0000000000000..2d3742699ab35
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.sendmsg.rtn.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GFX11-GISEL %s
+
+define amdgpu_kernel void @test_get_doorbell(i32 addrspace(1)* %out) {
+; GFX11-SDAG-LABEL: test_get_doorbell:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_DOORBELL)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: test_get_doorbell:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_DOORBELL)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
+ %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 128)
+ store i32 %ret, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_get_ddid(i32 addrspace(1)* %out) {
+; GFX11-SDAG-LABEL: test_get_ddid:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_DDID)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: test_get_ddid:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_GET_DDID)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
+ %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 129)
+ store i32 %ret, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_get_tma(i64 addrspace(1)* %out) {
+; GFX11-LABEL: test_get_tma:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_TMA)
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
+ %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 130)
+ store i64 %ret, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_get_realtime(i64 addrspace(1)* %out) {
+; GFX11-LABEL: test_get_realtime:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_REALTIME)
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
+ %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 131)
+ store i64 %ret, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_savewave(i32 addrspace(1)* %out) {
+; GFX11-SDAG-LABEL: test_savewave:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_SAVE_WAVE)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: test_savewave:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(MSG_RTN_SAVE_WAVE)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
+ %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 132)
+ store i32 %ret, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_get_tba(i64 addrspace(1)* %out) {
+; GFX11-LABEL: test_get_tba:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_sendmsg_rtn_b64 s[2:3], sendmsg(MSG_RTN_GET_TBA)
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
+ %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 133)
+ store i64 %ret, i64 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_get_0_i32(i32 addrspace(1)* %out) {
+; GFX11-SDAG-LABEL: test_get_0_i32:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-SDAG-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(0, 0, 0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s2
+; GFX11-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: test_get_0_i32:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-GISEL-NEXT: s_sendmsg_rtn_b32 s2, sendmsg(0, 0, 0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX11-GISEL-NEXT: s_endpgm
+ %ret = call i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32 0)
+ store i32 %ret, i32 addrspace(1)* %out
+ ret void
+}
+
+define amdgpu_kernel void @test_get_99999_i64(i64 addrspace(1)* %out) {
+; GFX11-LABEL: test_get_99999_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
+; GFX11-NEXT: s_sendmsg_rtn_b64 s[2:3], 99999
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-NEXT: v_mov_b32_e32 v1, s3
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
+ %ret = call i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32 99999)
+ store i64 %ret, i64 addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.amdgcn.s.sendmsg.rtn.i32(i32)
+declare i64 @llvm.amdgcn.s.sendmsg.rtn.i64(i32)
More information about the llvm-commits
mailing list