[llvm-branch-commits] [llvm] [AMDGPU] Physical register tracking in GCN trackers. (PR #184275)
Dhruva Chakrabarti via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Mar 24 23:10:48 PDT 2026
================
@@ -0,0 +1,599 @@
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GENERIC-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 -debug-only=machine-scheduler < %s 2>&1 | FileCheck --check-prefix=GCN-NOPHYS-DEBUG %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 < %s | FileCheck --check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=0 < %s | FileCheck --check-prefix=NO-GCN %s
+; RUN: llc -mtriple=amdgcn -mcpu=tahiti -amdgpu-use-amdgpu-trackers=1 -amdgpu-trackers-physical-register-tracking=0 < %s | FileCheck --check-prefix=GCN-NOPHYS %s
+; REQUIRES: asserts
+
+; Test that GCN trackers correctly track physical register pressure from inline asm
+
+; GCN-DEBUG-LABEL: test_single_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_single_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_single_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_single_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test multiple physical registers
+
+; GCN-DEBUG-LABEL: test_multiple_physregs
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_multiple_physregs
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_multiple_physregs
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_multiple_physregs(ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ %r0 = extractvalue { i32, i32 } %result, 0
+ %r1 = extractvalue { i32, i32 } %result, 1
+ %sum = add i32 %r0, %r1
+ store i32 %sum, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register with virtual registers
+
+; GCN-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 10, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_physreg_with_vreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_with_vreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 2 AGPRs: 0, SGPRs: 9, LVGPR WT: 0, LSGPR WT: 12
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 2 AGPRs: 0, SGPRs: 7, LVGPR WT: 0, LSGPR WT: 12
+
+define amdgpu_kernel void @test_physreg_with_vreg(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+entry:
+ %asm_val = call i32 asm sideeffect "s_mov_b32 $0, 0", "={s10}"()
+ %val = load i32, ptr addrspace(1) %in
+ %sum = add i32 %asm_val, %val
+ store i32 %sum, ptr addrspace(1) %out
+ ret void
+}
+
+; Test early-clobber constraint
+
+; GCN-DEBUG-LABEL: test_early_clobber
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_early_clobber
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_early_clobber
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_early_clobber(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 $0, 0", "=&{s10}"()
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register input
+
+; GCN-DEBUG-LABEL: test_physreg_input
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GENERIC-DEBUG-LABEL: test_physreg_input
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_physreg_input
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 5, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_physreg_input(ptr addrspace(1) %out) {
+entry:
+ %val = call i32 asm sideeffect "s_mov_b32 s10, 5; s_add_u32 $0, s10, 1", "={s11}"()
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register pressure for tuple (64-bit) registers.
+; GCN tracker counts the 2 SGPRs.
+
+; GCN-DEBUG-LABEL: test_tuple_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+; GENERIC-DEBUG-LABEL: test_tuple_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+; GCN-NOPHYS-DEBUG-LABEL: test_tuple_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 6, LVGPR WT: 0, LSGPR WT: 6
+
+define amdgpu_kernel void @test_tuple_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i64 asm sideeffect "s_mov_b64 $0, 0", "={s[10:11]}"()
+ %lo = trunc i64 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
+ ret void
+}
+
+; Test physical register pressure for 128-bit tuple.
+; GCN tracker counts the 4 SGPRs.
+
+; GCN-DEBUG-LABEL: test_tuple128_physreg
+; GCN-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 12
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 12
+
+; GENERIC-DEBUG-LABEL: test_tuple128_physreg
+; GENERIC-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+; GCN-NOPHYS-DEBUG-LABEL: test_tuple128_physreg
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 1 AGPRs: 0, SGPRs: 8, LVGPR WT: 0, LSGPR WT: 8
+
+define amdgpu_kernel void @test_tuple128_physreg(ptr addrspace(1) %out) {
+entry:
+ %val = call i128 asm sideeffect "s_mov_b64 $0, 0; s_mov_b64 $0+2, 0", "={s[8:11]}"()
+ %lo = trunc i128 %val to i32
+ store i32 %lo, ptr addrspace(1) %out
+ ret void
+}
+
+; Test virtual and physical register overlap
+
+; GCN-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GCN-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 16, LVGPR WT: 0, LSGPR WT: 16
+; GCN-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+
+; GENERIC-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GENERIC-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GENERIC-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+
+; GCN-NOPHYS-DEBUG-LABEL: test_vreg_and_physreg_live_range_overlap
+; GCN-NOPHYS-DEBUG: Region register pressure: VGPRs: 3 AGPRs: 0, SGPRs: 14, LVGPR WT: 0, LSGPR WT: 16
+; GCN-NOPHYS-DEBUG: Pressure after scheduling: VGPRs: 3 AGPRs: 0, SGPRs: 12, LVGPR WT: 0, LSGPR WT: 16
+
+define amdgpu_kernel void @test_vreg_and_physreg_live_range_overlap(ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(1) %out) {
+entry:
+ %result = call { i32, i32 } asm sideeffect "s_mov_b32 $0, 0; s_mov_b32 $1, 1", "={s10},={s11}"()
+ %val1 = load i32, ptr addrspace(1) %in1
+ %val2 = load i32, ptr addrspace(1) %in2
+ %sum = add i32 %val1, %val2
+ %r0 = extractvalue { i32, i32 } %result, 0
+ %r1 = extractvalue { i32, i32 } %result, 1
+ %with_asm = add i32 %sum, %r0
+ %final = add i32 %with_asm, %r1
+ store i32 %final, ptr addrspace(1) %out
+ ret void
+}
+
+; Verify assembly output for GCN trackers
+; GCN-LABEL: test_single_physreg:
----------------
dhruvachak wrote:
Done.
https://github.com/llvm/llvm-project/pull/184275
More information about the llvm-branch-commits
mailing list