[llvm] [Draft][AMDGPU] Rematerialize VGPR candidates when SGPR spills results in VGPR Excess (PR #168079)

Juan Manuel Martinez CaamaƱo via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 01:21:48 PST 2025


================
@@ -0,0 +1,266 @@
+; RUN: llc -O3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 %s -o - | FileCheck %s
+; CHECK: Occupancy: 16
+
+%struct.CIntersectionData.26.38.62.146.157.294.312.816.1410.2265.15360.15392.15432.15440.15448.15464.15480.15780.16380.16398.16422.16440.16458.16464.16476.16494.16536.16542.16554.16572.16590 = type { i32, i32, float, float, i32, i32, i32 }
+%struct.HIP_vector_type.30.42.66.150.161.298.316.820.1414.2269.15364.15396.15436.15444.15452.15468.15484.15782.16382.16400.16424.16442.16460.16466.16478.16496.16538.16544.16556.16574.16592 = type { %struct.HIP_vector_base.29.41.65.149.160.297.315.819.1413.2268.15363.15395.15435.15443.15451.15467.15483.15781.16381.16399.16423.16441.16459.16465.16477.16495.16537.16543.16555.16573.16591 }
+%struct.HIP_vector_base.29.41.65.149.160.297.315.819.1413.2268.15363.15395.15435.15443.15451.15467.15483.15781.16381.16399.16423.16441.16459.16465.16477.16495.16537.16543.16555.16573.16591 = type { float, float, float, float }
+%struct.RSArrayTextureObject1D.31.43.67.151.162.299.317.821.1415.2270.15365.15397.15437.15445.15453.15469.15485.15783.16383.16401.16425.16443.16461.16467.16479.16497.16539.16545.16557.16575.16593 = type { ptr }
+%struct.CTextureHWSampler.33.45.69.153.164.301.319.823.1417.2272.15367.15399.15439.15447.15455.15471.15487.15785.16385.16403.16427.16445.16463.16469.16481.16499.16541.16547.16559.16577.16595 = type { %struct.RSTextureObject2D.32.44.68.152.163.300.318.822.1416.2271.15366.15398.15438.15446.15454.15470.15486.15784.16384.16402.16426.16444.16462.16468.16480.16498.16540.16546.16558.16576.16594 }
+%struct.RSTextureObject2D.32.44.68.152.163.300.318.822.1416.2271.15366.15398.15438.15446.15454.15470.15486.15784.16384.16402.16426.16444.16462.16468.16480.16498.16540.16546.16558.16576.16594 = type { ptr }
+
+define amdgpu_kernel void @the_kernel(i32 %0, i32 %rem.i925, i32 %notmask.i, i8 %coerce.sroa.9.0.copyload, i32 %coerce.sroa.11788.0.copyload, i32 %and120.i, i1 %cmp121.i, i1 %loadedv89.i, ptr addrspace(1) %1, i32 %traceSetPaletteOffset.2.i, i1 %hasAlreadyBeenQueuedToUnfinishedList.3.off0.i, i32 %2, i1 %3, i1 %.not, i1 %cmp16.i, i1 %brmerge.not, ptr addrspace(1) %4, ptr addrspace(1) %5, ptr addrspace(1) %arrayidx107.i, ptr addrspace(1) %6, ptr addrspace(1) %arrayidx114.i, ptr addrspace(1) %7, ptr addrspace(1) %arrayidx238.i, ptr addrspace(1) %8, ptr addrspace(1) %arrayidx256.i, ptr addrspace(1) %9, ptr addrspace(1) %10, ptr addrspace(1) %arrayidx274.i, ptr addrspace(1) %11, ptr addrspace(1) %arrayidx281.i, ptr addrspace(1) %12, ptr addrspace(1) %arrayidx289.i, float %13, float %spec.select4288.i, float %RAYDATA_CLOSESTINTERSECTION.i.10, float %RAYDATA_CLOSESTINTERSECTION.i.11, float %14, ptr addrspace(1) %15, ptr addrspace(1) %arrayidx607.i, ptr addrspace(1) %origin604.sroa.4.0.arrayidx607.sroa_idx.i, <4 x i32> %16, float %17, <4 x i32> %18, ptr addrspace(1) %19, ptr addrspace(1) %20, i1 %cmp.i10516.i) {
+entry:
+  %21 = tail call i32 @llvm.amdgcn.workitem.id.x()
+  %arrayidx10.i = getelementptr %struct.CIntersectionData.26.38.62.146.157.294.312.816.1410.2265.15360.15392.15432.15440.15448.15464.15480.15780.16380.16398.16422.16440.16458.16464.16476.16494.16536.16542.16554.16572.16590, ptr addrspace(3) null, i32 %21
+  %22 = addrspacecast ptr addrspace(3) %arrayidx10.i to ptr
+  %arrayidx24.i = getelementptr i8, ptr addrspace(3) null, i32 %21
+  %23 = addrspacecast ptr addrspace(3) %arrayidx24.i to ptr
+  %arrayidx50.i = getelementptr i32, ptr addrspace(3) null, i32 %21
+  %24 = addrspacecast ptr addrspace(3) %arrayidx50.i to ptr
+  %instanceID.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 20
+  %25 = addrspacecast ptr addrspace(3) %instanceID.i to ptr
+  %primID900.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 4
+  %26 = addrspacecast ptr addrspace(3) %primID900.i to ptr
+  %matID_relID901.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 16
+  %27 = addrspacecast ptr addrspace(3) %matID_relID901.i to ptr
+  %barycentricV1469.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 12
+  %28 = addrspacecast ptr addrspace(3) %barycentricV1469.i to ptr
+  %barycentricU_or_hairM1468.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 8
+  %29 = addrspacecast ptr addrspace(3) %barycentricU_or_hairM1468.i to ptr
+  %booleanMeshID.i = getelementptr i8, ptr addrspace(3) %arrayidx10.i, i32 24
+  %30 = addrspacecast ptr addrspace(3) %booleanMeshID.i to ptr
+  br label %while.cond.i
+
+while.cond.i:                                     ; preds = %if.end1706.i, %entry
+  %v.i.0 = phi float [ 0.000000e+00, %entry ], [ %v.i.2, %if.end1706.i ]
+  %t1122.i.052 = phi float [ 0.000000e+00, %entry ], [ %spec.select4288.i, %if.end1706.i ]
+  %time.0.i = phi float [ 0.000000e+00, %entry ], [ %time.2.i, %if.end1706.i ]
+  %materialCullingMask.0.i = phi i32 [ 0, %entry ], [ %materialCullingMask.8.i, %if.end1706.i ]
+  br i1 %.not, label %_Z13__ballot_syncIyEyT_i.exit, label %do.body13.i
+
+do.body13.i:                                      ; preds = %while.cond.i
+  %31 = tail call i32 @llvm.amdgcn.ballot.i32(i1 false)
+  %cmp15.i = icmp eq i32 0, %31
+  ret void
+
+_Z13__ballot_syncIyEyT_i.exit:                    ; preds = %while.cond.i
+  br i1 %cmp16.i, label %if.then17.i, label %if.end352.i
+
+if.then17.i:                                      ; preds = %_Z13__ballot_syncIyEyT_i.exit
+  br i1 %3, label %land.lhs.true.i, label %if.end28.i
+
+land.lhs.true.i:                                  ; preds = %if.then17.i
+  %32 = load i8, ptr %23, align 1
+  %loadedv.i = trunc i8 %32 to i1
+  br i1 %loadedv.i, label %_ZL5TracePV17CIntersectionDataPVbPVibbbjbRK17RayTraceArguments.exit, label %if.end28.i
+
+if.end28.i:                                       ; preds = %land.lhs.true.i, %if.then17.i
+  br i1 %brmerge.not, label %if.then44.i, label %if.end65.i
+
+if.then44.i:                                      ; preds = %if.end28.i
+  %33 = atomicrmw add ptr addrspace(1) %4, i32 0 monotonic, align 4
+  store i8 1, ptr null, align 1
+  br label %if.end65.i
+
+if.end65.i:                                       ; preds = %if.then44.i, %if.end28.i
+  br i1 %cmp121.i, label %if.then72.i, label %if.end352.i
+
+if.then72.i:                                      ; preds = %if.end65.i
+  %34 = load volatile i32, ptr %24, align 4
----------------
jmmartinez wrote:

Sadly there are. I'll ask around why and if we can remove them.

https://github.com/llvm/llvm-project/pull/168079


More information about the llvm-commits mailing list