[llvm] [AMDGPU] Set GRANULATED_WAVEFRONT_SGPR_COUNT of compute_pgm_rsrc1 to 0 for gfx10+ (PR #154666)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 20 21:17:22 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-globalisel

Author: None (Shoreshen)

<details>
<summary>Changes</summary>

According to `llvm-project/llvm/docs/AMDGPUUsage.rst::L5212` the `GRANULATED_WAVEFRONT_SGPR_COUNT`, which is `compute_pgm_rsrc1[6:9]` has to be 0 for gfx10+ arch

---
Full diff: https://github.com/llvm/llvm-project/pull/154666.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIProgramInfo.cpp (+7-3) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll (+3-3) 
- (added) llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll (+7) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 93ba0a337d7dd..d34970f0790db 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -170,9 +170,13 @@ const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
                                                 MCContext &Ctx) const {
   uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
   const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
-  const MCExpr *Res = MCBinaryExpr::createOr(
-      MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
-      MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
+  const MCExpr *Res = nullptr;
+  if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
+    Res = MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx);
+  else
+    Res = MCBinaryExpr::createOr(
+        MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
+        MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
   return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index e6e98fb6edf26..206011adf0213 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3202,7 +3202,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
 ; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
 ; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
 ; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
-; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
+; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
 ; GFX10-NEXT:     priority = 0
 ; GFX10-NEXT:     float_mode = 240
 ; GFX10-NEXT:     priv = 0
@@ -4206,7 +4206,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
 ; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
 ; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
 ; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
-; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
+; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
 ; GFX10-NEXT:     priority = 0
 ; GFX10-NEXT:     float_mode = 240
 ; GFX10-NEXT:     priv = 0
@@ -4560,7 +4560,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
 ; GFX10-NEXT:     kernel_code_entry_byte_offset = 256
 ; GFX10-NEXT:     kernel_code_prefetch_byte_size = 0
 ; GFX10-NEXT:     granulated_workitem_vgpr_count = 0
-; GFX10-NEXT:     granulated_wavefront_sgpr_count = 1
+; GFX10-NEXT:     granulated_wavefront_sgpr_count = 0
 ; GFX10-NEXT:     priority = 0
 ; GFX10-NEXT:     float_mode = 240
 ; GFX10-NEXT:     priv = 0
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
new file mode 100644
index 0000000000000..5fc8205fd9e68
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -filetype=obj < %s | llvm-objdump -d --section=.rodata - | FileCheck %s
+
+
+; CHECK-NOT: error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT reserved bits in range (9:6) set, must be zero on gfx10+
+define amdgpu_kernel void @test(i128 inreg) { 
+    ret void 
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/154666


More information about the llvm-commits mailing list