[llvm] [AMDGPU] Set GRANULATED_WAVEFRONT_SGPR_COUNT of compute_pgm_rsrc1 to 0 for gfx10+ (PR #154666)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 25 20:57:37 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/154666
>From 85c04c2fc5eedab75bab208fe5db54eb242107bc Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Thu, 21 Aug 2025 12:14:16 +0800
Subject: [PATCH 1/4] set GRANULATED_WAVEFRONT_SGPR_COUNT of compute_pgm_rsrc1
to 0 for gfx10+ arch
---
llvm/lib/Target/AMDGPU/SIProgramInfo.cpp | 10 +++++++---
llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll | 6 +++---
.../CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll | 7 +++++++
3 files changed, 17 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 93ba0a337d7dd..d34970f0790db 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -170,9 +170,13 @@ const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
MCContext &Ctx) const {
uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
- const MCExpr *Res = MCBinaryExpr::createOr(
- MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
- MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
+ const MCExpr *Res = nullptr;
+ if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
+ Res = MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx);
+ else
+ Res = MCBinaryExpr::createOr(
+ MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
+ MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
index e6e98fb6edf26..206011adf0213 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement.ll
@@ -3202,7 +3202,7 @@ define amdgpu_kernel void @dyn_extract_v5f64_s_s(ptr addrspace(1) %out, i32 %sel
; GFX10-NEXT: kernel_code_entry_byte_offset = 256
; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
; GFX10-NEXT: granulated_workitem_vgpr_count = 0
-; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
+; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
; GFX10-NEXT: priority = 0
; GFX10-NEXT: float_mode = 240
; GFX10-NEXT: priv = 0
@@ -4206,7 +4206,7 @@ define amdgpu_kernel void @dyn_extract_v4f32_s_s_s(ptr addrspace(1) %out, i32 %s
; GFX10-NEXT: kernel_code_entry_byte_offset = 256
; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
; GFX10-NEXT: granulated_workitem_vgpr_count = 0
-; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
+; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
; GFX10-NEXT: priority = 0
; GFX10-NEXT: float_mode = 240
; GFX10-NEXT: priv = 0
@@ -4560,7 +4560,7 @@ define amdgpu_kernel void @dyn_extract_v4f64_s_s_s(ptr addrspace(1) %out, i32 %s
; GFX10-NEXT: kernel_code_entry_byte_offset = 256
; GFX10-NEXT: kernel_code_prefetch_byte_size = 0
; GFX10-NEXT: granulated_workitem_vgpr_count = 0
-; GFX10-NEXT: granulated_wavefront_sgpr_count = 1
+; GFX10-NEXT: granulated_wavefront_sgpr_count = 0
; GFX10-NEXT: priority = 0
; GFX10-NEXT: float_mode = 240
; GFX10-NEXT: priv = 0
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
new file mode 100644
index 0000000000000..5fc8205fd9e68
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
@@ -0,0 +1,7 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -filetype=obj < %s | llvm-objdump -d --section=.rodata - | FileCheck %s
+
+
+; CHECK-NOT: error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT reserved bits in range (9:6) set, must be zero on gfx10+
+define amdgpu_kernel void @test(i128 inreg) {
+ ret void
+}
>From f12eda3617784ae27180aea5257789912592d75b Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 22 Aug 2025 09:27:41 +0800
Subject: [PATCH 2/4] fix comments
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 8 +++++---
llvm/lib/Target/AMDGPU/SIProgramInfo.cpp | 10 +++-------
.../CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll | 8 ++++++--
3 files changed, 14 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 36c0d1cbcea22..ac5b7e4eca19f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1143,9 +1143,11 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
return SubGPR;
};
-
- ProgInfo.SGPRBlocks = GetNumGPRBlocks(ProgInfo.NumSGPRsForWavesPerEU,
- IsaInfo::getSGPREncodingGranule(&STM));
+ if (STM.getGeneration() >= AMDGPUSubtarget::GFX10)
+ ProgInfo.SGPRBlocks = CreateExpr(0ul);
+ else
+ ProgInfo.SGPRBlocks = GetNumGPRBlocks(
+ ProgInfo.NumSGPRsForWavesPerEU, IsaInfo::getSGPREncodingGranule(&STM));
ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,
IsaInfo::getVGPREncodingGranule(&STM));
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index d34970f0790db..93ba0a337d7dd 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -170,13 +170,9 @@ const MCExpr *SIProgramInfo::getComputePGMRSrc1(const GCNSubtarget &ST,
MCContext &Ctx) const {
uint64_t Reg = getComputePGMRSrc1Reg(*this, ST);
const MCExpr *RegExpr = MCConstantExpr::create(Reg, Ctx);
- const MCExpr *Res = nullptr;
- if (ST.getGeneration() >= AMDGPUSubtarget::GFX10)
- Res = MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx);
- else
- Res = MCBinaryExpr::createOr(
- MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
- MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
+ const MCExpr *Res = MCBinaryExpr::createOr(
+ MaskShift(VGPRBlocks, /*Mask=*/0x3F, /*Shift=*/0, Ctx),
+ MaskShift(SGPRBlocks, /*Mask=*/0xF, /*Shift=*/6, Ctx), Ctx);
return MCBinaryExpr::createOr(RegExpr, Res, Ctx);
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
index 5fc8205fd9e68..f0dbcf4ac65ed 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
@@ -1,5 +1,9 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -filetype=obj < %s | llvm-objdump -d --section=.rodata - | FileCheck %s
-
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
; CHECK-NOT: error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT reserved bits in range (9:6) set, must be zero on gfx10+
define amdgpu_kernel void @test(i128 inreg) {
>From 5438cd505893913915b0d98b2114ace25d1159d4 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 25 Aug 2025 11:28:58 +0800
Subject: [PATCH 3/4] fix comments
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 6 ++++--
llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll | 4 ++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index ac5b7e4eca19f..b1a514739ad05 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1143,11 +1143,13 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
const MCExpr *SubGPR = MCBinaryExpr::createSub(DivGPR, OneConst, Ctx);
return SubGPR;
};
- if (STM.getGeneration() >= AMDGPUSubtarget::GFX10)
+ // GFX10+ will always allocate 128 SGPRs and this field must be 0
+ if (STM.getGeneration() >= AMDGPUSubtarget::GFX10) {
ProgInfo.SGPRBlocks = CreateExpr(0ul);
- else
+ } else {
ProgInfo.SGPRBlocks = GetNumGPRBlocks(
ProgInfo.NumSGPRsForWavesPerEU, IsaInfo::getSGPREncodingGranule(&STM));
+ }
ProgInfo.VGPRBlocks = GetNumGPRBlocks(ProgInfo.NumVGPRsForWavesPerEU,
IsaInfo::getVGPREncodingGranule(&STM));
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
index f0dbcf4ac65ed..632e89d92ca69 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
@@ -5,7 +5,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
-; CHECK-NOT: error decoding test.kd: kernel descriptor COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT reserved bits in range (9:6) set, must be zero on gfx10+
+; CHECK-NOT: {{[Ee]rror.*COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT}}
define amdgpu_kernel void @test(i128 inreg) {
- ret void
+ ret void
}
>From b60dad87a77ea00063806cddbed2717ae2d8093e Mon Sep 17 00:00:00 2001
From: Shoreshen <372660931 at qq.com>
Date: Tue, 26 Aug 2025 11:57:29 +0800
Subject: [PATCH 4/4] Update
llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
Co-authored-by: Matt Arsenault <Matthew.Arsenault at amd.com>
---
llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
index 632e89d92ca69..1826a51868ecf 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx10plus-wavefront-sgpr-count.ll
@@ -5,7 +5,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 -filetype=obj < %s 2>&1 | llvm-objdump -d --section=.rodata - | FileCheck %s
-; CHECK-NOT: {{[Ee]rror.*COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT}}
+; CHECK-NOT: error
define amdgpu_kernel void @test(i128 inreg) {
ret void
}
More information about the llvm-commits
mailing list