[llvm] [AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id (PR #96190)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 20 06:38:09 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-llvm-ir
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
Mark both the intrinsic and the selected MachineInstr as having side
effects to prevent MachineLICM and MachineCSE from moving/removing them.
---
Full diff: https://github.com/llvm/llvm-project/pull/96190.diff
7 Files Affected:
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+2-3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (-12)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (-17)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (-1)
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+11)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll (+18-4)
``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8a5566ae12020..059449a8d7d2c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2349,10 +2349,9 @@ class AMDGPUGlobalLoadLDS :
"", [SDNPMemOperand]>;
def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
-// Use read/write of inaccessible memory to model the fact that this reads a
-// volatile value.
+// This is IntrHasSideEffects because it reads from a volatile hardware register.
def int_amdgcn_pops_exiting_wave_id :
- DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
+ DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
//===----------------------------------------------------------------------===//
// GFX10 Intrinsics
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5b616d8cc77d1..7ab76b7ffaa55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2549,15 +2549,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
-void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
- SDValue Chain = N->getOperand(0);
- CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
-}
-
static unsigned gwsIntrinToOpcode(unsigned IntrID) {
switch (IntrID) {
case Intrinsic::amdgcn_ds_gws_init:
@@ -2714,9 +2705,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
SelectDSBvhStackIntrinsic(N);
return;
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- SelectPOPSExitingWaveID(N);
- return;
}
SelectCode(N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index ae6fdb594a2ef..6d370a8b7782b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -263,7 +263,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
void SelectFP_EXTEND(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
void SelectDSBvhStackIntrinsic(SDNode *N);
- void SelectPOPSExitingWaveID(SDNode *N);
void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectInterpP1F16(SDNode *N);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 03e2d622dd319..16a2987e7b463 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
}
-bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
- MachineInstr &MI) const {
- Register Dst = MI.getOperand(0).getReg();
- const DebugLoc &DL = MI.getDebugLoc();
- MachineBasicBlock *MBB = MI.getParent();
-
- // TODO: Select this with a tablegen pattern. This is tricky because the
- // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
- // mayLoad/mayStore and tablegen complains about the mismatch.
- auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
- .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
- MI.eraseFromParent();
- return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
MachineInstr &I) const {
Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
return selectSBarrierSignalIsfirst(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_leave:
return selectSBarrierLeave(I);
- case Intrinsic::amdgcn_pops_exiting_wave_id:
- return selectPOPSExitingWaveID(I);
}
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 48f3b18118014..f561d5d29efc4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -125,7 +125,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
bool selectSBarrier(MachineInstr &MI) const;
bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
- bool selectPOPSExitingWaveID(MachineInstr &MI) const;
bool selectImageIntrinsic(MachineInstr &MI,
const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e1253d3ed0500..64f33199545a1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -215,6 +215,11 @@ let isMoveImm = 1 in {
} // End Uses = [SCC]
} // End isMoveImm = 1
+// Variant of S_MOV_B32 used for reading from volatile registers like
+// SRC_POPS_EXITING_WAVE_ID.
+let hasSideEffects = 1 in
+def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
+
let Defs = [SCC] in {
def S_NOT_B32 : SOP1_32 <"s_not_b32",
[(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
def : GetFPModePat<fpmode_mask_gfx6plus>;
}
+let SubtargetPredicate = isGFX9GFX10 in
+def : GCNPat<
+ (int_amdgcn_pops_exiting_wave_id),
+ (S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
+>;
+
//===----------------------------------------------------------------------===//
// SOP2 Patterns
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index aaa4e2b3622df..f3c5ac757e22b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
define amdgpu_ps void @test_loop() {
; SDAG-LABEL: test_loop:
; SDAG: ; %bb.0:
-; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: .LBB1_1: ; %loop
; SDAG-NEXT: ; =>This Inner Loop Header: Depth=1
+; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; SDAG-NEXT: s_cmp_eq_u32 s0, 0
; SDAG-NEXT: s_cbranch_scc1 .LBB1_1
; SDAG-NEXT: ; %bb.2: ; %exit
@@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX9-GISEL-LABEL: test_loop:
; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: .LBB1_1: ; %loop
; GFX9-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX9-GISEL-NEXT: ; %bb.2: ; %exit
@@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() {
;
; GFX10-GISEL-LABEL: test_loop:
; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: .LBB1_1: ; %loop
; GFX10-GISEL-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX10-GISEL-NEXT: s_cmp_eq_u32 s0, 0
; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB1_1
; GFX10-GISEL-NEXT: ; %bb.2: ; %exit
@@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; SDAG: ; %bb.0: ; %entry
; SDAG-NEXT: s_bitcmp0_b32 s0, 0
; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT: s_cbranch_scc1 .LBB2_2
+; SDAG-NEXT: ; %bb.1: ; %body
+; SDAG-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT: .LBB2_2: ; %exit
; SDAG-NEXT: ; return to shader part epilog
;
; GFX9-GISEL-LABEL: test_if:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_mov_b32 s1, s0
-; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
; GFX9-GISEL-NEXT: s_xor_b32 s1, s1, 1
; GFX9-GISEL-NEXT: s_and_b32 s1, s1, 1
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX9-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
+; GFX9-GISEL-NEXT: ; %bb.1: ; %body
+; GFX9-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT: .LBB2_2: ; %exit
; GFX9-GISEL-NEXT: ; return to shader part epilog
;
; GFX10-GISEL-LABEL: test_if:
@@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
; GFX10-GISEL-NEXT: s_xor_b32 s0, s0, 1
; GFX10-GISEL-NEXT: s_and_b32 s1, s0, 1
; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s1, 0
+; GFX10-GISEL-NEXT: s_cbranch_scc1 .LBB2_2
+; GFX10-GISEL-NEXT: ; %bb.1: ; %body
+; GFX10-GISEL-NEXT: s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT: .LBB2_2: ; %exit
; GFX10-GISEL-NEXT: ; return to shader part epilog
entry:
%id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()
``````````
</details>
https://github.com/llvm/llvm-project/pull/96190
More information about the llvm-commits
mailing list