[llvm] [AMDGPU] Fix unwanted LICM/CSE of llvm.amdgcn.pops.exiting.wave.id (PR #96190)

via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 20 06:38:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu

@llvm/pr-subscribers-llvm-ir

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

Mark both the intrinsic and the selected MachineInstr as having side
effects to prevent MachineLICM and MachineCSE from moving/removing them.


---
Full diff: https://github.com/llvm/llvm-project/pull/96190.diff


7 Files Affected:

- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+2-3) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (-12) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h (-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (-17) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (-1) 
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+11) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll (+18-4) 


``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 8a5566ae12020..059449a8d7d2c 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2349,10 +2349,9 @@ class AMDGPUGlobalLoadLDS :
      "", [SDNPMemOperand]>;
 def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
 
-// Use read/write of inaccessible memory to model the fact that this reads a
-// volatile value.
+// This is IntrHasSideEffects because it reads from a volatile hardware register.
 def int_amdgcn_pops_exiting_wave_id :
-  DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrNoMem, IntrHasSideEffects]>;
 
 //===----------------------------------------------------------------------===//
 // GFX10 Intrinsics
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 5b616d8cc77d1..7ab76b7ffaa55 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -2549,15 +2549,6 @@ void AMDGPUDAGToDAGISel::SelectDSBvhStackIntrinsic(SDNode *N) {
   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
 }
 
-void AMDGPUDAGToDAGISel::SelectPOPSExitingWaveID(SDNode *N) {
-  // TODO: Select this with a tablegen pattern. This is tricky because the
-  // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
-  // mayLoad/mayStore and tablegen complains about the mismatch.
-  SDValue Reg = CurDAG->getRegister(AMDGPU::SRC_POPS_EXITING_WAVE_ID, MVT::i32);
-  SDValue Chain = N->getOperand(0);
-  CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, N->getVTList(), {Reg, Chain});
-}
-
 static unsigned gwsIntrinToOpcode(unsigned IntrID) {
   switch (IntrID) {
   case Intrinsic::amdgcn_ds_gws_init:
@@ -2714,9 +2705,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
   case Intrinsic::amdgcn_ds_bvh_stack_rtn:
     SelectDSBvhStackIntrinsic(N);
     return;
-  case Intrinsic::amdgcn_pops_exiting_wave_id:
-    SelectPOPSExitingWaveID(N);
-    return;
   }
 
   SelectCode(N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
index ae6fdb594a2ef..6d370a8b7782b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.h
@@ -263,7 +263,6 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   void SelectFP_EXTEND(SDNode *N);
   void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
   void SelectDSBvhStackIntrinsic(SDNode *N);
-  void SelectPOPSExitingWaveID(SDNode *N);
   void SelectDS_GWS(SDNode *N, unsigned IntrID);
   void SelectInterpP1F16(SDNode *N);
   void SelectINTRINSIC_W_CHAIN(SDNode *N);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 03e2d622dd319..16a2987e7b463 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2079,21 +2079,6 @@ bool AMDGPUInstructionSelector::selectDSBvhStackIntrinsic(
   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
 }
 
-bool AMDGPUInstructionSelector::selectPOPSExitingWaveID(
-    MachineInstr &MI) const {
-  Register Dst = MI.getOperand(0).getReg();
-  const DebugLoc &DL = MI.getDebugLoc();
-  MachineBasicBlock *MBB = MI.getParent();
-
-  // TODO: Select this with a tablegen pattern. This is tricky because the
-  // intrinsic is IntrReadMem/IntrWriteMem but the instruction is not marked
-  // mayLoad/mayStore and tablegen complains about the mismatch.
-  auto MIB = BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), Dst)
-                 .addReg(AMDGPU::SRC_POPS_EXITING_WAVE_ID);
-  MI.eraseFromParent();
-  return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
-}
-
 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     MachineInstr &I) const {
   Intrinsic::ID IntrinsicID = cast<GIntrinsic>(I).getIntrinsicID();
@@ -2144,8 +2129,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     return selectSBarrierSignalIsfirst(I, IntrinsicID);
   case Intrinsic::amdgcn_s_barrier_leave:
     return selectSBarrierLeave(I);
-  case Intrinsic::amdgcn_pops_exiting_wave_id:
-    return selectPOPSExitingWaveID(I);
   }
   return selectImpl(I, *CoverageInfo);
 }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 48f3b18118014..f561d5d29efc4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -125,7 +125,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
   bool selectDSAppendConsume(MachineInstr &MI, bool IsAppend) const;
   bool selectSBarrier(MachineInstr &MI) const;
   bool selectDSBvhStackIntrinsic(MachineInstr &MI) const;
-  bool selectPOPSExitingWaveID(MachineInstr &MI) const;
 
   bool selectImageIntrinsic(MachineInstr &MI,
                             const AMDGPU::ImageDimIntrinsicInfo *Intr) const;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index e1253d3ed0500..64f33199545a1 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -215,6 +215,11 @@ let isMoveImm = 1 in {
   } // End Uses = [SCC]
 } // End isMoveImm = 1
 
+// Variant of S_MOV_B32 used for reading from volatile registers like
+// SRC_POPS_EXITING_WAVE_ID.
+let hasSideEffects = 1 in
+def S_MOV_B32_sideeffects : SOP1_32 <"s_mov_b32">;
+
 let Defs = [SCC] in {
   def S_NOT_B32 : SOP1_32 <"s_not_b32",
     [(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1880,6 +1885,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
 def : GetFPModePat<fpmode_mask_gfx6plus>;
 }
 
+let SubtargetPredicate = isGFX9GFX10 in
+def : GCNPat<
+  (int_amdgcn_pops_exiting_wave_id),
+  (S_MOV_B32_sideeffects (i32 SRC_POPS_EXITING_WAVE_ID))
+>;
+
 //===----------------------------------------------------------------------===//
 // SOP2 Patterns
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
index aaa4e2b3622df..f3c5ac757e22b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -36,9 +36,9 @@ define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
 define amdgpu_ps void @test_loop() {
 ; SDAG-LABEL: test_loop:
 ; SDAG:       ; %bb.0:
-; SDAG-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; SDAG-NEXT:  .LBB1_1: ; %loop
 ; SDAG-NEXT:    ; =>This Inner Loop Header: Depth=1
+; SDAG-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; SDAG-NEXT:    s_cmp_eq_u32 s0, 0
 ; SDAG-NEXT:    s_cbranch_scc1 .LBB1_1
 ; SDAG-NEXT:  ; %bb.2: ; %exit
@@ -46,9 +46,9 @@ define amdgpu_ps void @test_loop() {
 ;
 ; GFX9-GISEL-LABEL: test_loop:
 ; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; GFX9-GISEL-NEXT:  .LBB1_1: ; %loop
 ; GFX9-GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; GFX9-GISEL-NEXT:    s_cmp_eq_u32 s0, 0
 ; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX9-GISEL-NEXT:  ; %bb.2: ; %exit
@@ -56,9 +56,9 @@ define amdgpu_ps void @test_loop() {
 ;
 ; GFX10-GISEL-LABEL: test_loop:
 ; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; GFX10-GISEL-NEXT:  .LBB1_1: ; %loop
 ; GFX10-GISEL-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; GFX10-GISEL-NEXT:    s_cmp_eq_u32 s0, 0
 ; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB1_1
 ; GFX10-GISEL-NEXT:  ; %bb.2: ; %exit
@@ -77,14 +77,23 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
 ; SDAG:       ; %bb.0: ; %entry
 ; SDAG-NEXT:    s_bitcmp0_b32 s0, 0
 ; SDAG-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT:    s_cbranch_scc1 .LBB2_2
+; SDAG-NEXT:  ; %bb.1: ; %body
+; SDAG-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; SDAG-NEXT:  .LBB2_2: ; %exit
 ; SDAG-NEXT:    ; return to shader part epilog
 ;
 ; GFX9-GISEL-LABEL: test_if:
 ; GFX9-GISEL:       ; %bb.0: ; %entry
 ; GFX9-GISEL-NEXT:    s_mov_b32 s1, s0
-; GFX9-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
 ; GFX9-GISEL-NEXT:    s_xor_b32 s1, s1, 1
 ; GFX9-GISEL-NEXT:    s_and_b32 s1, s1, 1
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX9-GISEL-NEXT:    s_cbranch_scc1 .LBB2_2
+; GFX9-GISEL-NEXT:  ; %bb.1: ; %body
+; GFX9-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT:  .LBB2_2: ; %exit
 ; GFX9-GISEL-NEXT:    ; return to shader part epilog
 ;
 ; GFX10-GISEL-LABEL: test_if:
@@ -92,6 +101,11 @@ define amdgpu_ps i32 @test_if(i1 inreg %cond) {
 ; GFX10-GISEL-NEXT:    s_xor_b32 s0, s0, 1
 ; GFX10-GISEL-NEXT:    s_and_b32 s1, s0, 1
 ; GFX10-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX10-GISEL-NEXT:    s_cbranch_scc1 .LBB2_2
+; GFX10-GISEL-NEXT:  ; %bb.1: ; %body
+; GFX10-GISEL-NEXT:    s_mov_b32 s0, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT:  .LBB2_2: ; %exit
 ; GFX10-GISEL-NEXT:    ; return to shader part epilog
 entry:
   %id1 = call i32 @llvm.amdgcn.pops.exiting.wave.id()

``````````

</details>


https://github.com/llvm/llvm-project/pull/96190


More information about the llvm-commits mailing list