[llvm] 9e1d656 - AMDGPU: Remove MIMG special case in adjustAllocatableRegClass (#158184)

Fri Sep 12 02:02:27 PDT 2025

Author: Matt Arsenault
Date: 2025-09-12T09:02:24Z
New Revision: 9e1d656c682cd0bf6f123fba2064ffdb8861f790

URL: https://github.com/llvm/llvm-project/commit/9e1d656c682cd0bf6f123fba2064ffdb8861f790
DIFF: https://github.com/llvm/llvm-project/commit/9e1d656c682cd0bf6f123fba2064ffdb8861f790.diff

LOG: AMDGPU: Remove MIMG special case in adjustAllocatableRegClass (#158184)

I have no idea why this was here. MIMG atomics use tied operands
for the input and output, so AV classes should have always worked.
We have poor test coverage for AGPRs with atomics, so add a partial
set. Everything seems to work OK, although it seems image cmpswap
always uses VGPRs unnecessarily.

Added: 
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 23a124fecddad..5c3340703ba3b 100644

--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5977,8 +5977,7 @@ static const TargetRegisterClass *
 adjustAllocatableRegClass(const GCNSubtarget &ST, const SIRegisterInfo &RI,
                           const MCInstrDesc &TID, unsigned RCID) {
   if (!ST.hasGFX90AInsts() && (((TID.mayLoad() || TID.mayStore()) &&
-                                !(TID.TSFlags & SIInstrFlags::Spill)) ||
-                               (TID.TSFlags & SIInstrFlags::MIMG))) {
+                                !(TID.TSFlags & SIInstrFlags::Spill)))) {
     switch (RCID) {
     case AMDGPU::AV_32RegClassID:
       RCID = AMDGPU::VGPR_32RegClassID;

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
new file mode 100644
index 0000000000000..49607e320bd0a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.atomic.dim.gfx90a.ll
@@ -0,0 +1,170 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
+
+define amdgpu_ps void @atomic_swap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_swap a0, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_add_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GFX90A-LABEL: atomic_add_2d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_add a0, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+; FIXME: This should directly use the AGPRs
+define amdgpu_ps void @atomic_cmpswap_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a1
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i32 asm "; def $0", "=a"()
+  %swap = call i32 asm "; def $0", "=a"()
+  %v = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i32 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_i64_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    image_atomic_swap a[0:1], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i64 asm "; def $0", "=a"()
+  %v = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i64 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i64 asm "; def $0", "=a"()
+  %swap = call i64 asm "; def $0", "=a"()
+  %v = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm "; use $0", "a"(i64 %v)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v1, a0
+; GFX90A-NEXT:    image_atomic_swap v1, v0, s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.swap.1d.i32.i32(i32 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_add_2d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GFX90A-LABEL: atomic_add_2d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    image_atomic_add v2, v[0:1], s[0:7] dmask:0x1 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.add.2d.i32.i32(i32 %data, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a0
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a1
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i32 asm "; def $0", "=a"()
+  %swap = call i32 asm "; def $0", "=a"()
+  %unused = call i32 @llvm.amdgcn.image.atomic.cmpswap.1d.i32.i32(i32 %cmp, i32 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_swap_1d_i64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_swap_1d_i64_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    image_atomic_swap v[2:3], v0, s[0:7] dmask:0x3 unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %data = call i64 asm "; def $0", "=a"()
+  %unused = call i64 @llvm.amdgcn.image.atomic.swap.1d.i64.i32(i64 %data, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @atomic_cmpswap_1d_64_agpr_noret(<8 x i32> inreg %rsrc, i32 %s) {
+; GFX90A-LABEL: atomic_cmpswap_1d_64_agpr_noret:
+; GFX90A:       ; %bb.0:
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v3, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v2, a0
+; GFX90A-NEXT:    ;;#ASMSTART
+; GFX90A-NEXT:    ; def a[0:1]
+; GFX90A-NEXT:    ;;#ASMEND
+; GFX90A-NEXT:    v_accvgpr_read_b32 v5, a1
+; GFX90A-NEXT:    v_accvgpr_read_b32 v4, a0
+; GFX90A-NEXT:    image_atomic_cmpswap v[2:5], v0, s[0:7] dmask:0xf unorm glc
+; GFX90A-NEXT:    s_endpgm
+  %cmp = call i64 asm "; def $0", "=a"()
+  %swap = call i64 asm "; def $0", "=a"()
+  %unused = call i64 @llvm.amdgcn.image.atomic.cmpswap.1d.i64.i32(i64 %cmp, i64 %swap, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
index dcac419f8591d..bb4a607fc62d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.gfx90a.ll
@@ -418,6 +418,114 @@ main_body:
   ret <4 x float> %v
 }
 
+define amdgpu_ps void @load_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: load_1d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GCN-LABEL: load_2d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; GCN-LABEL: load_3d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @load_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; GCN-LABEL: load_cube_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    image_load a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GCN-NEXT:    s_waitcnt vmcnt(0)
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; use a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    s_endpgm
+  %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  call void asm sideeffect "; use $0", "a"(<4 x float> %v)
+  ret  void
+}
+
+define amdgpu_ps void @store_1d_agpr(<8 x i32> inreg %rsrc, i32 %s) {
+; GCN-LABEL: store_1d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v0, s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_2d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t) {
+; GCN-LABEL: store_2d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:1], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_3d_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) {
+; GCN-LABEL: store_3d_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
+define amdgpu_ps void @store_cube_agpr(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) {
+; GCN-LABEL: store_cube_agpr:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    ;;#ASMSTART
+; GCN-NEXT:    ; def a[0:3]
+; GCN-NEXT:    ;;#ASMEND
+; GCN-NEXT:    image_store a[0:3], v[0:2], s[0:7] dmask:0xf unorm da
+; GCN-NEXT:    s_endpgm
+  %vdata = call <4 x float> asm "; def $0", "=a"()
+  call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0)
+  ret void
+}
+
 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1
 declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1
 declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1