[llvm] 82380f3 - [AMDGPU] Prioritize allocation of low 256 VGPR classes (#167978)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 19 16:00:50 PST 2025
Author: Stanislav Mekhanoshin
Date: 2025-11-19T16:00:46-08:00
New Revision: 82380f33de0ef22e645cf53ba4bf859e38df6623
URL: https://github.com/llvm/llvm-project/commit/82380f33de0ef22e645cf53ba4bf859e38df6623
DIFF: https://github.com/llvm/llvm-project/commit/82380f33de0ef22e645cf53ba4bf859e38df6623.diff
LOG: [AMDGPU] Prioritize allocation of low 256 VGPR classes (#167978)
If we have 1024 VGPRs available we need to give priority to the
allocation of these registers where operands can only use low 256.
That is noteably scale operands of V_WMMA_SCALE instructions.
Otherwise large tuples will be allocated first and take all low
registers, so we would have to spill to get a room for these
scale registers.
Allocation priority itself does not eliminate spilling completely
in large kernels, although helps to some degree. Increasing spill
weight of a restricted class on top of it helps.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 1402291539ff8..bb8a80f811d4c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -496,6 +496,17 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
SmallVector<StringLiteral>
getVRegFlagsOfReg(Register Reg, const MachineFunction &MF) const override;
+
+ float
+ getSpillWeightScaleFactor(const TargetRegisterClass *RC) const override {
+ // Prioritize VGPR_32_Lo256 over other classes which may occupy registers
+ // beyond v256.
+ return AMDGPUGenRegisterInfo::getSpillWeightScaleFactor(RC) *
+ ((RC == &AMDGPU::VGPR_32_Lo256RegClass ||
+ RC == &AMDGPU::VReg_64_Lo256_Align2RegClass)
+ ? 2.0
+ : 1.0);
+ }
};
namespace AMDGPU {
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index abe12c17ae76c..5cff5f2248b02 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -644,7 +644,7 @@ def VGPR_32_Lo128 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg1
// Identical to VGPR_32 except it only contains the low 256 (Lo256) registers.
def VGPR_32_Lo256 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types.types), 32,
(add (sequence "VGPR%u", 0, 255))> {
- let AllocationPriority = 0;
+ let AllocationPriority = !add(3, !mul(BaseClassPriority, BaseClassScaleFactor));
let GeneratePressureSet = 0;
let Size = 32;
let Weight = 1;
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll b/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll
index 1ac3da3b930f9..eafe54ebc98f8 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-spill-wmma-scale.ll
@@ -1,9 +1,8 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1250 < %s | FileCheck %s
-; FIXME: Scale operands of WMMA are limited to low 256 VGPRs
-; currently we are spilling it because all low VGPRs are occupied even though our budget is higher.
+; Scale operands of WMMA are limited to low 256 VGPRs
; Make sure we do not spill scale operands because of the low 256 restriction.
-; CHECK: ; ScratchSize: 12
+; CHECK: ; ScratchSize: 0
; CHECK: ; Occupancy: 1
define amdgpu_kernel void @spill_scale_test(float %arg, float %arg1, float %arg2, float %arg3, float %arg4, float %arg5, float %arg6, float %arg7, <16 x i32> %arg8, float %arg9, <16 x i32> %arg10, float %arg11, <16 x i8> %arg12) #0 {
More information about the llvm-commits
mailing list