[llvm-branch-commits] [llvm] [AMDGPU] Physical register tracking in GCN trackers. (PR #184275)

Dhruva Chakrabarti via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Mar 24 23:10:28 PDT 2026


================
@@ -0,0 +1,599 @@
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
+; REQUIRES: asserts
+
+; Test that GCN trackers correctly track physical register pressure from inline asm
+
+; GCN-DEBUG-LABEL: test_single_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_single_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+entry:
+  %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+  store i32 %val, ptr addrspace(1) %out
+  ret void
+}
+
+; Test multiple physical registers
+
+; GCN-DEBUG-LABEL: test_multiple_physregs
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_multiple_physregs
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+entry:
+  %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+  %r0 = extractvalue { i32, i32 } %result, 0
+  %r1 = extractvalue { i32, i32 } %result, 1
+  %sum = add i32 %r0, %r1
+  store i32 %sum, ptr addrspace(1) %out
+  ret void
+}
+
+; Test physical register with virtual registers
+
+; GCN-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+entry:
+  %asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+  %val = load i32, ptr addrspace(1) %in
+  %sum = add i32 %asm_val, %val
+  store i32 %sum, ptr addrspace(1) %out
+  ret void
+}
+
+; Test early-clobber constraint
+
+; GCN-DEBUG-LABEL: test_early_clobber
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_early_clobber
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+entry:
+  %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
----------------
dhruvachak wrote:

Added. But note that the physical RP tracking functionality does not factor in early-clobber, so the pressure will be the same w/ or w/o it.

https://github.com/llvm/llvm-project/pull/184275


More information about the llvm-branch-commits mailing list