[llvm] [AMDGPU] Add GFX12 s_sleep_var instruction and intrinsic (PR #75499)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 09:28:49 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-ir
@llvm/pr-subscribers-mc
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/75499.diff
7 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+6)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+7)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+13)
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+5)
- (added) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll (+38)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_sop1.s (+6)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt (+6)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 09e88152e65d2a..b1aefc1777f855 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1715,6 +1715,12 @@ def int_amdgcn_s_sleep :
IntrHasSideEffects]> {
}
+def int_amdgcn_s_sleep_var
+ : ClangBuiltin<"__builtin_amdgcn_s_sleep_var">,
+ Intrinsic<[], [llvm_i32_ty],
+ [IntrNoMem, IntrHasSideEffects, IntrWillReturn]> {
+}
+
def int_amdgcn_s_nop :
DefaultAttrsIntrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem,
IntrHasSideEffects]> {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 03b6d19b2b3c06..d0c1302c3f003c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3101,6 +3101,10 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
applyDefaultMapping(OpdMapper);
constrainOpWithReadfirstlane(B, MI, 8); // M0
return;
+ case Intrinsic::amdgcn_s_sleep_var:
+ assert(OpdMapper.getVRegs(1).empty());
+ constrainOpWithReadfirstlane(B, MI, 1);
+ return;
case Intrinsic::amdgcn_s_barrier_signal_var:
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
@@ -4849,6 +4853,9 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
getVGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI); // %data1
break;
}
+ case Intrinsic::amdgcn_s_sleep_var:
+ OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
+ break;
case Intrinsic::amdgcn_s_barrier_signal_var:
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d4746b559d9256..03ffe8e10f4bbd 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6564,6 +6564,19 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
}
}
+ // Legalize s_sleep_var.
+ if (MI.getOpcode() == AMDGPU::S_SLEEP_VAR) {
+ const DebugLoc &DL = MI.getDebugLoc();
+ Register Reg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+ int Src0Idx =
+ AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
+ MachineOperand &Src0 = MI.getOperand(Src0Idx);
+ BuildMI(*MI.getParent(), MI, DL, get(AMDGPU::V_READFIRSTLANE_B32), Reg)
+ .add(Src0);
+ Src0.ChangeToRegister(Reg, false);
+ return nullptr;
+ }
+
// Legalize MUBUF instructions.
bool isSoffsetLegal = true;
int SoffsetIdx =
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 50c4d279cfe23d..c51534cdbd3054 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1602,6 +1602,10 @@ def S_SLEEP : SOPP_Pseudo <"s_sleep", (ins i32imm:$simm16),
"$simm16", [(int_amdgcn_s_sleep timm:$simm16)]> {
}
+def S_SLEEP_VAR : SOP1_0_32 <"s_sleep_var", [(int_amdgcn_s_sleep_var SSrc_b32:$src0)]> {
+ let hasSideEffects = 1;
+}
+
def S_SETPRIO : SOPP_Pseudo <"s_setprio", (ins i16imm:$simm16), "$simm16",
[(int_amdgcn_s_setprio timm:$simm16)]> {
}
@@ -1997,6 +2001,7 @@ defm S_GET_BARRIER_STATE_IMM : SOP1_Real_gfx12<0x050>;
defm S_BARRIER_INIT_IMM : SOP1_Real_gfx12<0x051>;
defm S_BARRIER_JOIN_IMM : SOP1_Real_gfx12<0x052>;
defm S_WAKEUP_BARRIER_IMM : SOP1_Real_gfx12<0x057>;
+defm S_SLEEP_VAR : SOP1_Real_gfx12<0x058>;
//===----------------------------------------------------------------------===//
// SOP1 - GFX1150, GFX12
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
new file mode 100644
index 00000000000000..5ad7ddfbe5fe9d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.sleep.var.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=0 < %s | FileCheck -check-prefixes=GCN %s
+; RUN: llc -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs -global-isel=1 < %s | FileCheck -check-prefixes=GCN %s
+
+declare void @llvm.amdgcn.s.sleep.var(i32)
+
+define void @test_s_sleep_var1(i32 %arg) {
+; GCN-LABEL: test_s_sleep_var1:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_readfirstlane_b32 s0, v0
+; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GCN-NEXT: s_sleep_var s0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.s.sleep.var(i32 %arg)
+ ret void
+}
+
+define void @test_s_sleep_var2() {
+; GCN-LABEL: test_s_sleep_var2:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: s_sleep_var 10
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ call void @llvm.amdgcn.s.sleep.var(i32 10)
+ ret void
+}
+
+define amdgpu_kernel void @test_s_sleep_var3(i32 %arg) {
+; GCN-LABEL: test_s_sleep_var3:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_load_b32 s0, s[0:1], 0x24
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_sleep_var s0
+; GCN-NEXT: s_endpgm
+ call void @llvm.amdgcn.s.sleep.var(i32 %arg)
+ ret void
+}
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index 8f2944586ed290..495a2ea78ffef7 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -1,5 +1,11 @@
// RUN: llvm-mc -arch=amdgcn -show-encoding -mcpu=gfx1200 %s | FileCheck --check-prefix=GFX12 %s
+s_sleep_var 0x1234
+// GFX12: encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
+
+s_sleep_var s1
+// GFX12: encoding: [0x01,0x58,0x80,0xbe]
+
s_cvt_f32_i32 s5, s1
// GFX12: encoding: [0x01,0x64,0x85,0xbe]
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index 1c31ee1e5dd7fb..d15a329c8eade7 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -1,5 +1,11 @@
# RUN: llvm-mc -arch=amdgcn -mcpu=gfx1200 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12 %s
+# GFX12: s_sleep_var 0x1234 ; encoding: [0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00]
+0xff,0x58,0x80,0xbe,0x34,0x12,0x00,0x00
+
+# GFX12: s_sleep_var s1 ; encoding: [0x01,0x58,0x80,0xbe]
+0x01,0x58,0x80,0xbe
+
# GFX12: s_cvt_f32_i32 s5, s1 ; encoding: [0x01,0x64,0x85,0xbe]
0x01,0x64,0x85,0xbe
``````````
</details>
https://github.com/llvm/llvm-project/pull/75499
More information about the llvm-commits
mailing list