[llvm] [Draft][AMDGPU] Rematerialize VGPR candidates when SGPR spills results in VGPR Excess (PR #168079)

Juan Manuel Martinez CaamaƱo via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 03:05:32 PST 2025


================
@@ -0,0 +1,266 @@
+; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 %s -o - | FileCheck %s
+; CHECK: Occupancy: 16
+
+%struct.CIntersectionData.26.38.62.146.157.294.312.816.1410.2265.15360.15392.15432.15440.15448.15464.15480.15780.16380.16398.16422.16440.16458.16464.16476.16494.16536.16542.16554.16572.16590 = type { i32, i32, float, float, i32, i32, i32 }
+%struct.HIP_vector_type.30.42.66.150.161.298.316.820.1414.2269.15364.15396.15436.15444.15452.15468.15484.15782.16382.16400.16424.16442.16460.16466.16478.16496.16538.16544.16556.16574.16592 = type { %struct.HIP_vector_base.29.41.65.149.160.297.315.819.1413.2268.15363.15395.15435.15443.15451.15467.15483.15781.16381.16399.16423.16441.16459.16465.16477.16495.16537.16543.16555.16573.16591 }
+%struct.HIP_vector_base.29.41.65.149.160.297.315.819.1413.2268.15363.15395.15435.15443.15451.15467.15483.15781.16381.16399.16423.16441.16459.16465.16477.16495.16537.16543.16555.16573.16591 = type { float, float, float, float }
+%struct.RSArrayTextureObject1D.31.43.67.151.162.299.317.821.1415.2270.15365.15397.15437.15445.15453.15469.15485.15783.16383.16401.16425.16443.16461.16467.16479.16497.16539.16545.16557.16575.16593 = type { ptr }
+%struct.CTextureHWSampler.33.45.69.153.164.301.319.823.1417.2272.15367.15399.15439.15447.15455.15471.15487.15785.16385.16403.16427.16445.16463.16469.16481.16499.16541.16547.16559.16577.16595 = type { %struct.RSTextureObject2D.32.44.68.152.163.300.318.822.1416.2271.15366.15398.15438.15446.15454.15470.15486.15784.16384.16402.16426.16444.16462.16468.16480.16498.16540.16546.16558.16576.16594 }
+%struct.RSTextureObject2D.32.44.68.152.163.300.318.822.1416.2271.15366.15398.15438.15446.15454.15470.15486.15784.16384.16402.16426.16444.16462.16468.16480.16498.16540.16546.16558.16576.16594 = type { ptr }
+
+define amdgpu_kernel void @the_kernel(i32 %0, i32 %rem.i925, i32 %notmask.i, i8 %coerce.sroa.9.0.copyload, i32 %coerce.sroa.11788.0.copyload, i32 %and120.i, i1 %cmp121.i, i1 %loadedv89.i, ptr addrspace(1) %1, i32 %traceSetPaletteOffset.2.i, i1 %hasAlreadyBeenQueuedToUnfinishedList.3.off0.i, i32 %2, i1 %3, i1 %.not, i1 %cmp16.i, i1 %brmerge.not, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %arrayidx107.i, ptr addrspace(1) %6, ptr addrspace(1) %arrayidx114.i, ptr addrspace(1) %7, ptr addrspace(1) %arrayidx238.i, ptr addrspace(1) %8, ptr addrspace(1) %arrayidx256.i, ptr addrspace(1) %9, ptr addrspace(1) %10, ptr addrspace(1) %arrayidx274.i, ptr addrspace(1) %11, ptr addrspace(1) %arrayidx281.i, ptr addrspace(1) %12, ptr addrspace(1) %arrayidx289.i, float %13, float %spec.select4288.i, float %RAYDATA_CLOSESTINTERSECTION.i.10, float %RAYDATA_CLOSESTINTERSECTION.i.11, float %14, ptr addrspace(1) %15, ptr addrspace(1) %arrayidx607.i, ptr addrspace(1) %origin604.sroa.4.0.arrayidx607.sroa_idx.i, <4 x i32> %16, float %17, <4 x i32> %18, ptr addrspace(1) %19, ptr addrspace(1) %20, i1 %cmp.i10516.i) {
+entry:
+  %21 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %arrayidx10.i = getelementptr %struct.CIntersectionData.26.38.62.146.157.294.312.816.1410.2265.15360.15392.15432.15440.15448.15464.15480.15780.16380.16398.16422.16440.16458.16464.16476.16494.16536.16542.16554.16572.16590, ptr addrspace(3) null, i32 %21
+  %22 = addrspacecast ptr addrspace(3) %arrayidx10.i to ptr
+  %arrayidx24.i = getelementptr i8, ptr addrspace(3) null, i32 %21
+  %23 = addrspacecast ptr addrspace(3) %arrayidx24.i to ptr
+  %arrayidx50.i = getelementptr i32, ptr addrspace(3) null, i32 %21
+  %24 = addrspacecast ptr addrspace(3) %arrayidx50.i to ptr
+  %instanceID.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 20
+  %25 = addrspacecast ptr addrspace(3) %instanceID.i to ptr
+  %primID900.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 4
+  %26 = addrspacecast ptr addrspace(3) %primID900.i to ptr
+  %matID_relID901.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 16
+  %27 = addrspacecast ptr addrspace(3) %matID_relID901.i to ptr
+  %barycentricV1469.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 12
+  %28 = addrspacecast ptr addrspace(3) %barycentricV1469.i to ptr
----------------
jmmartinez wrote:

Fixed (I've completely updated the test and run the meta-renamer on it)

https://github.com/llvm/llvm-project/pull/168079


More information about the llvm-commits mailing list