[llvm] [AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id (PR #89612)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 09:13:33 PDT 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/89612
>From 2dc23c1d7d6f9ed0e9715233eedee8c171679b9a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 22 Apr 2024 15:20:23 +0100
Subject: [PATCH] [AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id
This provides access to the special scalar source value
SRC_POPS_EXITING_WAVE_ID on GFX9 and GFX10.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 5 +++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 11 ++++++
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h | 1 +
.../AMDGPU/AMDGPUInstructionSelector.cpp | 17 ++++++++++
.../Target/AMDGPU/AMDGPUInstructionSelector.h | 1 +
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 2 ++
.../llvm.amdgcn.pops.exiting.wave.id.ll | 34 +++++++++++++++++++
7 files changed, 71 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 0b774b724d0c0..5c001a4dd6247 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2482,6 +2482,11 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
+// Use read/write of inaccessible memory to model the fact that this reads a
+// volatile value.
+def int_amdgcn_pops_exiting_wave_id :
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
+
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index c11c7a57e0596..c498cd991ba73 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2526,6 +2526,14 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
+void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
+ // TODO: Select this with a tablegen pattern. This is tricky because the
+ // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
+ // mayLoad/mayStore and tablegen complains about the mismatch.
+ SDNode *Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
+ CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), Reg);
+}
+
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
@@ -2682,6 +2690,9 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
+ case Intrinsic::amdgcn_pops_exiting_wave_id:
+ SelectPOPSExitingWaveID(N);
+ return;
}
SelectCode(N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index f987b747c0e21..53d25b4cf4ca8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -274,6 +274,7 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
+ void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index b48a09489653a..99109b23a1591 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2079,6 +2079,21 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
+bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
+ MachineInstr &MI) const {
+ Register Dst = MI.getOperand(0).getReg();
+ const DebugLoc &DL = MI.getDebugLoc();
+ MachineBasicBlock *MBB = MI.getParent();
+
+ // TODO: Select this with a tablegen pattern. This is tricky because the
+ // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
+ // mayLoad/mayStore and tablegen complains about the mismatch.
+ auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
+ .addDef(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
+ MI.eraseFromParent();
+ return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
+}
+
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
unsigned IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2129,6 +2144,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
+ case Intrinsic::amdgcn_pops_exiting_wave_id:
+ return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index f561d5d29efc4..48f3b18118014 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -125,6 +125,7 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
+ bool selectPOPSExitingWaveID(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 56345d14a331c..dbb42a60f71fe 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_pops_exiting_wave_id:
+ return getDefaultMappingSOP(MI);
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
new file mode 100644
index 0000000000000..4927c2ffcdf30
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
+
+define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
+; SDAG-LABEL: test:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: test:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: test:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
+ store i32 %id, ptr addrspace(1) %ptr
+ ret void
+}
More information about the llvm-commits
mailing list