[llvm] [AMDGPU] Do not propagate vgpr count in dVGPR mode (PR #187078)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 17 10:53:06 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
When dVGPR mode is enabled simply set the .vgpr_count to local
number of VGPRs instead of module wide maximum.
Keep the maximum for retry_vgpr_alloc and first_retry_wrapper.
---
Full diff: https://github.com/llvm/llvm-project/pull/187078.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp (+21-10)
- (added) llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll (+62)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
index 8186c329c4daf..92c148f268a60 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCResourceInfo.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "AMDGPUMCResourceInfo.h"
+#include "SIMachineFunctionInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -298,9 +299,27 @@ void MCResourceInfo::gatherResourceInfo(
}
};
+ auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
+ MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
+ LLVM_DEBUG(
+ dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue
+ << ", no further propagation as indirect callee found within\n");
+ Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
+ };
+
+ // When DynamicVGPR is enabled do not propagate VGPR counts from callees.
+ bool SkipVGPRPropagation =
+ MF.getInfo<SIMachineFunctionInfo>()->isDynamicVGPREnabled() &&
+ MF.getFunction().getCallingConv() == CallingConv::AMDGPU_CS_Chain;
+
LLVM_DEBUG(dbgs() << "MCResUse: " << FnSym->getName() << '\n');
- SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR);
- SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR);
+ if (SkipVGPRPropagation) {
+ SetToLocal(FRI.NumVGPR, RIK_NumVGPR);
+ SetToLocal(FRI.NumAGPR, RIK_NumAGPR);
+ } else {
+ SetMaxReg(MaxVGPRSym, FRI.NumVGPR, RIK_NumVGPR);
+ SetMaxReg(MaxAGPRSym, FRI.NumAGPR, RIK_NumAGPR);
+ }
SetMaxReg(MaxSGPRSym, FRI.NumExplicitSGPR, RIK_NumSGPR);
SetMaxReg(MaxNamedBarrierSym, FRI.NumNamedBarrier, RIK_NumNamedBarrier);
@@ -355,14 +374,6 @@ void MCResourceInfo::gatherResourceInfo(
Sym->setVariableValue(localConstExpr);
}
- auto SetToLocal = [&](int64_t LocalValue, ResourceInfoKind RIK) {
- MCSymbol *Sym = getSymbol(FnSym->getName(), RIK, OutContext, IsLocal);
- LLVM_DEBUG(
- dbgs() << "MCResUse: " << Sym->getName() << ": Adding " << LocalValue
- << ", no further propagation as indirect callee found within\n");
- Sym->setVariableValue(MCConstantExpr::create(LocalValue, OutContext));
- };
-
if (!FRI.HasIndirectCall) {
assignResourceInfoExpr(FRI.UsesVCC, ResourceInfoKind::RIK_UsesVCC,
AMDGPUMCExpr::AGVK_Or, MF, FRI.Callees, OutContext);
diff --git a/llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll b/llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll
new file mode 100644
index 0000000000000..2b3ca42eb04d8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/dvgpr-vgpr-count-propagation.ll
@@ -0,0 +1,62 @@
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=DVGPR %s
+; RUN: sed 's/"amdgpu-dynamic-vgpr-block-size"="16"/nounwind/' %s \
+; RUN: | llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 | FileCheck -check-prefix=NODVGPR %s
+
+; DVGPR-DAG: .set func.0.num_vgpr, 15
+; DVGPR-DAG: .set func.0.has_indirect_call, 1
+; DVGPR-DAG: .set func.1.num_vgpr, 55
+; DVGPR-DAG: .set func.1.has_indirect_call, 1
+; DVGPR-DAG: .set retry_vgpr_alloc.num_vgpr, max(11, amdgpu.max_num_vgpr)
+; DVGPR-DAG: .set retry_vgpr_alloc.has_indirect_call, 1
+; DVGPR-DAG: .set first_retry_wrapper.num_vgpr, max(11, amdgpu.max_num_vgpr)
+; DVGPR-DAG: .set first_retry_wrapper.has_indirect_call, 1
+; DVGPR-DAG: .set amdgpu.max_num_vgpr, 55
+
+; DVGPR: .shader_functions:
+; DVGPR: func.0:
+; DVGPR: .vgpr_count: 0xf
+; DVGPR: func.1:
+; DVGPR: .vgpr_count: 0x37
+
+; NODVGPR-DAG: .set func.0.num_vgpr, max(15, amdgpu.max_num_vgpr)
+; NODVGPR-DAG: .set func.0.has_indirect_call, 1
+; NODVGPR-DAG: .set func.1.num_vgpr, max(55, amdgpu.max_num_vgpr)
+; NODVGPR-DAG: .set func.1.has_indirect_call, 1
+; NODVGPR-DAG: .set retry_vgpr_alloc.num_vgpr, max(11, amdgpu.max_num_vgpr)
+; NODVGPR-DAG: .set retry_vgpr_alloc.has_indirect_call, 1
+; NODVGPR-DAG: .set first_retry_wrapper.num_vgpr, max(11, amdgpu.max_num_vgpr)
+; NODVGPR-DAG: .set first_retry_wrapper.has_indirect_call, 1
+; NODVGPR-DAG: .set amdgpu.max_num_vgpr, 55
+
+; NODVGPR: .shader_functions:
+; NODVGPR: func.0:
+; NODVGPR: .vgpr_count: 0x37
+; NODVGPR: func.1:
+; NODVGPR: .vgpr_count: 0x37
+
+define amdgpu_cs_chain void @func.0(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 {
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}"()
+ %fptr = load ptr, ptr inttoptr(i64 0 to ptr)
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> %vgprs, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper)
+ unreachable
+}
+
+define amdgpu_cs_chain void @func.1(<3 x i32> inreg %sgprs, <3 x i32> %vgprs) #0 {
+ call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39},~{v40},~{v41},~{v42},~{v43},~{v44},~{v45},~{v46},~{v47},~{v48},~{v49}"()
+ %fptr = load ptr, ptr inttoptr(i64 0 to ptr)
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> %vgprs, i32 1, i32 0, i32 -1, ptr @first_retry_wrapper)
+ unreachable
+}
+
+define amdgpu_cs_chain_preserve void @retry_vgpr_alloc(<3 x i32> inreg %sgprs) #0 {
+ %fptr = load ptr, ptr inttoptr(i64 0 to ptr)
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg %fptr, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @retry_vgpr_alloc)
+ unreachable
+}
+
+define amdgpu_cs_chain_preserve void @first_retry_wrapper(<3 x i32> inreg %sgprs) #0 {
+ call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr inreg @retry_vgpr_alloc, i32 inreg 0, <3 x i32> inreg %sgprs, <3 x i32> zeroinitializer, i32 1, i32 0, i32 -1, ptr @retry_vgpr_alloc)
+ unreachable
+}
+
+attributes #0 = { "amdgpu-dynamic-vgpr-block-size"="16" }
``````````
</details>
https://github.com/llvm/llvm-project/pull/187078
More information about the llvm-commits
mailing list