[llvm] [AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id (PR #89612)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed May 1 03:21:24 PDT 2024


https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/89612

>From 53979d18ce7c04875abbb51c29c92a6f1a82b22a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Mon, 22 Apr 2024 15:20:23 +0100
Subject: [PATCH] [AMDGPU] New intrinsic llvm.amdgcn.pops.exiting.wave.id

This provides access to the special scalar source value
SRC_POPS_EXITING_WAVE_ID on GFX9 and GFX10.
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      |  5 +++
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  2 ++
 llvm/lib/Target/AMDGPU/SOPInstructions.td     | 11 ++++++
 .../llvm.amdgcn.pops.exiting.wave.id.ll       | 34 +++++++++++++++++++
 4 files changed, 52 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index ee9a5d7a343980..101d10c23ef948 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2485,6 +2485,11 @@ class AMDGPUGlobalLoadLDS : Intrinsic <
   "", [SDNPMemOperand]>;
 def int_amdgcn_global_load_lds : AMDGPUGlobalLoadLDS;
 
+// Use read/write of inaccessible memory to model the fact that this reads a
+// volatile value.
+def int_amdgcn_pops_exiting_wave_id :
+  DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>;
+
 //===----------------------------------------------------------------------===//
 // GFX10 Intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 56345d14a331ca..dbb42a60f71fe4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -5132,6 +5132,8 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
       break;
     }
+    case Intrinsic::amdgcn_pops_exiting_wave_id:
+      return getDefaultMappingSOP(MI);
     default:
       return getInvalidInstructionMapping();
     }
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 93b7e86b5f2973..7475353a70699c 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -213,6 +213,11 @@ let isMoveImm = 1 in {
   } // End Uses = [SCC]
 } // End isMoveImm = 1
 
+// Variant of S_MOV_B32 used for reading volatile source values like
+// SRC_POPS_EXITING_WAVE_ID.
+let mayLoad = 1, mayStore = 1, maybeAtomic = 0 in
+def S_MOV_B32_loadstore : SOP1_32 <"s_mov_b32">;
+
 let Defs = [SCC] in {
   def S_NOT_B32 : SOP1_32 <"s_not_b32",
     [(set i32:$sdst, (UniformUnaryFrag<not> i32:$src0))]
@@ -1865,6 +1870,12 @@ let SubtargetPredicate = isNotGFX9Plus in {
 def : GetFPModePat<fpmode_mask_gfx6plus>;
 }
 
+let SubtargetPredicate = isGFX9GFX10 in
+def : GCNPat<
+  (int_amdgcn_pops_exiting_wave_id),
+  (S_MOV_B32_loadstore (i32 SRC_POPS_EXITING_WAVE_ID))
+>;
+
 //===----------------------------------------------------------------------===//
 // SOP2 Patterns
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
new file mode 100644
index 00000000000000..4927c2ffcdf30d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.pops.exiting.wave.id.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefix=GFX9-GISEL
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=SDAG
+; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck %s -check-prefix=GFX10-GISEL
+
+define amdgpu_ps void @test(ptr addrspace(1) inreg %ptr) {
+; SDAG-LABEL: test:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_mov_b32 s2, src_pops_exiting_wave_id
+; SDAG-NEXT:    v_mov_b32_e32 v0, 0
+; SDAG-NEXT:    v_mov_b32_e32 v1, s2
+; SDAG-NEXT:    global_store_dword v0, v1, s[0:1]
+; SDAG-NEXT:    s_endpgm
+;
+; GFX9-GISEL-LABEL: test:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX9-GISEL-NEXT:    s_endpgm
+;
+; GFX10-GISEL-LABEL: test:
+; GFX10-GISEL:       ; %bb.0:
+; GFX10-GISEL-NEXT:    s_mov_b32 s2, src_pops_exiting_wave_id
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10-GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10-GISEL-NEXT:    s_endpgm
+  %id = call i32 @llvm.amdgcn.pops.exiting.wave.id()
+  store i32 %id, ptr addrspace(1) %ptr
+  ret void
+}



More information about the llvm-commits mailing list