[llvm] [AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id (PR #89612)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 23 02:56:45 PDT 2024
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/89612
>From fbb61b7ee76b64f4f8a5a3ff05fb83f1839ccabf Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 22 Apr 2024 15:20:23 +0100
Subject: [PATCH 1/2] [AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id
This provides access to the special scalar source value
SRC_POPS_EXITING_WAVE_ID on GFX9 and GFX10.
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 3 ++
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 6 +++-
llvm/lib/Target/AMDGPU/SOPInstructions.td | 6 ++++
.../llvm.amdgcn.pops.exiting.wave.id.ll | 34 +++++++++++++++++++
4 files changed, 48 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ee9a5d7a343980..1ec48650eba7a9 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2485,6 +2485,9 @@ class AMDGPUGlobalLoadLDS : Intrinsic <
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
+def int_amdgcn_pops_exiting_wave_id :
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index aa4ec785bf02a3..6f4906a696d3af 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4825,11 +4825,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
break;
}
- case Intrinsic::amdgcn_s_bitreplicate:
+ case Intrinsic::amdgcn_s_bitreplicate: {
Register MaskReg = MI.getOperand(2).getReg();
unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
+ break;
+ }
+ case Intrinsic::amdgcn_pops_exiting_wave_id:
+ return getDefaultMappingSOP(MI);
}
break;
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 0b7d45ee8c027d..4525c2345d9e45 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1865,6 +1865,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
def : GetFPModePat<fpmode_mask_gfx6plus>;
}
+let SubtargetPredicate = isGFX9GFX10 in
+def : GCNPat<
+ (int_amdgcn_pops_exiting_wave_id),
+ (S_MOV_B32 (i32 SRC_POPS_EXITING_WAVE_ID))
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
new file mode 100644
index 00000000000000..4927c2ffcdf30d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
+
+define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
+; SDAG-LABEL: test:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
+; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v1, s2
+; SDAG-NEXT: global_store_dword v0, v1, s[0:1]
+; SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: test:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: test:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10-GISEL-NEXT: s_endpgm
+ %id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
+ store i32 %id, ptr addrspace(1) %ptr
+ ret void
+}
>From 3a2f03a2b13aec66df67f71b06bf73e24bc3afd4 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Tue, 23 Apr 2024 10:55:49 +0100
Subject: [PATCH 2/2] Use sideeffects
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 2 +-
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 8 +++-----
llvm/lib/Target/AMDGPU/SOPInstructions.td | 7 ++++++-
3 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1ec48650eba7a9..5c5df57a6ec734 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2486,7 +2486,7 @@ class AMDGPUGlobalLoadLDS : Intrinsic <
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
def int_amdgcn_pops_exiting_wave_id :
- DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrSpeculatable]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 6f4906a696d3af..525c2bf54d3887 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4825,15 +4825,11 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = AMDGPU::getValueMapping(regBankID, OpSize);
break;
}
- case Intrinsic::amdgcn_s_bitreplicate: {
+ case Intrinsic::amdgcn_s_bitreplicate:
Register MaskReg = MI.getOperand(2).getReg();
unsigned MaskBank = getRegBankID(MaskReg, MRI, AMDGPU::SGPRRegBankID);
OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, 64);
OpdsMapping[2] = AMDGPU::getValueMapping(MaskBank, 32);
- break;
- }
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- return getDefaultMappingSOP(MI);
}
break;
}
@@ -5136,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_pops_exiting_wave_id:
+ return getDefaultMappingSOP(MI);
default:
return getInvalidInstructionMapping();
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 4525c2345d9e45..83059377280736 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -213,6 +213,11 @@ let isMoveImm = 1 in {
} // End Uses = [SCC]
} // End isMoveImm = 1
+// Variant of S_MOV_B32 used for reading volatile source values like
+// SRC_POPS_EXITING_WAVE_ID.
+let hasSideEffects = 1 in
+def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
+
let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1868,7 +1873,7 @@ def : GetFPModePat<fpmode_mask_gfx6plus>;
let SubtargetPredicate = isGFX9GFX10 in
def : GCNPat<
(int_amdgcn_pops_exiting_wave_id),
- (S_MOV_B32 (i32 SRC_POPS_EXITING_WAVE_ID))
+ (S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
>;
//===----------------------------------------------------------------------===//
More information about the llvm-commits
mailing list