[llvm] AMDGPU: Figure out required AGPR count for inline asm (PR #150910)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 7 08:12:58 PDT 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/150910

>From dbc8af175d99478f553263a8690662ac35f0ff37 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 28 Jul 2025 16:11:28 +0900
Subject: [PATCH 1/6] AMDGPU: Figure out required AGPR count for inline asm

For now just try to compute the minimum number of AGPRs required
to allocate the asm. Leave the attributor changes to turn this
into an integer value for later.
---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   |  59 +++++-
 .../AMDGPU/amdgpu-attributor-no-agpr.ll       | 199 ++++++++++++++++++
 2 files changed, 251 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index cb49936871e74..12190db0e8edd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1211,16 +1211,61 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
   llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
 }
 
-static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
-  for (const auto &CI : IA->ParseConstraints()) {
+/// Compute the minimum number of AGPRs required to allocate the inline asm.
+static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
+                                             const CallBase &Call) {
+  unsigned ArgNo = 0;
+  unsigned ResNo = 0;
+  unsigned AGPRDefCount = 0;
+  unsigned AGPRUseCount = 0;
+  unsigned MaxPhysReg = 0;
+  const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
+
+  for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+    Type *Ty = nullptr;
+    switch (CI.Type) {
+    case InlineAsm::isOutput: {
+      Ty = Call.getType();
+      if (auto *STy = dyn_cast<StructType>(Ty))
+        Ty = STy->getElementType(ResNo);
+      ++ResNo;
+      break;
+    }
+    case InlineAsm::isInput: {
+      Ty = Call.getArgOperand(ArgNo++)->getType();
+      break;
+    }
+    case InlineAsm::isLabel:
+      continue;
+    case InlineAsm::isClobber:
+      // Parse the physical register reference.
+      break;
+    }
+
     for (StringRef Code : CI.Codes) {
-      Code.consume_front("{");
-      if (Code.starts_with("a"))
-        return true;
+      if (Code.starts_with("a")) {
+        // Virtual register, compute number of registers based on the type.
+        //
+        // We ought to be going through TargetLowering to get the number of
+        // registers, but we should avoid the dependence on CodeGen here.
+        unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
+        if (CI.Type == InlineAsm::isOutput) {
+          AGPRDefCount += RegCount;
+          if (CI.isEarlyClobber)
+            AGPRUseCount += RegCount;
+        } else
+          AGPRUseCount += RegCount;
+      } else {
+        // Physical register reference
+        auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
+        if (Kind == 'a')
+          MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
+      }
     }
   }
 
-  return false;
+  unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
+  return std::min(MaxVirtReg + MaxPhysReg, 256u);
 }
 
 // TODO: Migrate to range merge of amdgpu-agpr-alloc.
@@ -1259,7 +1304,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
       const Function *Callee = dyn_cast<Function>(CalleeOp);
       if (!Callee) {
         if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
-          return !inlineAsmUsesAGPRs(IA);
+          return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
         return false;
       }
 
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 664dfa21759cf..a67f6cd955612 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -251,6 +251,205 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
   ret void
 }
 
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(ptr poison)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call ptr asm sideeffect "; def $0", "=a"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a4}"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a256}"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; clobber $0", "~{a255}"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+  ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+  ret void
+}
+
+define amdgpu_kernel void @vreg_use_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+  ret void
+}
+
+define amdgpu_kernel void @vreg_def_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+  ret void
+}
+
+define amdgpu_kernel void @multiple() {
+; CHECK-LABEL: define amdgpu_kernel void @multiple(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+  ret void
+}
+
+define amdgpu_kernel void @earlyclobber_0() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+  ret void
+}
+
+define amdgpu_kernel void @earlyclobber_1() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    ret void
+;
+  %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+  ret void
+}
 
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.

>From d408587eb765259900a64b8baaab76effab86a2f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Oct 2025 19:34:08 +0900
Subject: [PATCH 2/6] reduce test noise

---
 .../AMDGPU/amdgpu-attributor-no-agpr.ll       | 232 ++++++++++++++----
 1 file changed, 180 insertions(+), 52 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index a67f6cd955612..49bcc9de2b644 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -1,103 +1,166 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
 ; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
 
+; Shrink result attribute list by preventing use of most attributes.
+define internal void @use_most() {
+; CHECK-LABEL: define internal void @use_most(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
+; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
+; CHECK-NEXT:    [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT:    [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+; CHECK-NEXT:    [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
+; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT:    [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false)
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca [256 x i8], addrspace(5)
+  %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
+  call i32 @llvm.amdgcn.workitem.id.x()
+  call i32 @llvm.amdgcn.workitem.id.y()
+  call i32 @llvm.amdgcn.workitem.id.z()
+  call i32 @llvm.amdgcn.workgroup.id.x()
+  call i32 @llvm.amdgcn.workgroup.id.y()
+  call i32 @llvm.amdgcn.workgroup.id.z()
+  call i32 @llvm.amdgcn.cluster.id.x()
+  call i32 @llvm.amdgcn.cluster.id.y()
+  call i32 @llvm.amdgcn.cluster.id.z()
+  call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+  call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+  call i64 @llvm.amdgcn.dispatch.id()
+  call i32 @llvm.amdgcn.lds.kernel.id()
+  %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+  call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false)
+  ret void
+}
+
 define amdgpu_kernel void @kernel_uses_asm_virtreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call i32 asm sideeffect "; def $0", "=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "v"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+  call void @use_most()
   ret void
 }
 
 define void @func_uses_asm_virtreg_agpr() {
 ; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define void @func_uses_asm_physreg_agpr() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define void @func_uses_asm_physreg_agpr_tuple() {
 ; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+  call void @use_most()
   ret void
 }
 
@@ -105,99 +168,119 @@ declare void @unknown()
 
 define amdgpu_kernel void @kernel_calls_extern() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR2]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR6:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void @unknown() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown() #0
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    call void [[INDIRECT]]()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void %indirect()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR6]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR5]]
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void %indirect() #0
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @func_uses_asm_physreg_agpr()
+  call void @use_most()
   ret void
 }
 
 define void @empty() {
 ; CHECK-LABEL: define void @empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
+  call void @use_most()
   ret void
 }
 
 define void @also_empty() {
 ; CHECK-LABEL: define void @also_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_empty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @empty()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @empty()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void @empty()
 ; CHECK-NEXT:    call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @empty()
   call void @func_uses_asm_physreg_agpr()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
-; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
+  call void @use_most()
   ret void
 }
 
@@ -205,31 +288,35 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>
 
 define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
 ; CHECK-NEXT:    store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
   store <32 x float> %result, ptr addrspace(1) %out
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
 ; CHECK-NEXT:    store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %result = call i32 @llvm.amdgcn.workitem.id.x()
   store i32 %result, ptr addrspace(1) %out
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -244,220 +331,261 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
 ; CHECK:       5:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       6:
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %fptr = select i1 %cond, ptr @empty, ptr @also_empty
   call void %fptr()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(ptr poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call ptr asm sideeffect "; def $0", "=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_clobber() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; clobber $0", "~{a4}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; clobber $0", "~{a256}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; clobber $0", "~{a255}"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @vreg_use_exceeds_register_file() {
 ; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @vreg_def_exceeds_register_file() {
 ; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @multiple() {
 ; CHECK-LABEL: define amdgpu_kernel void @multiple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @earlyclobber_0() {
 ; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+  call void @use_most()
   ret void
 }
 
 define amdgpu_kernel void @earlyclobber_1() {
 ; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
 ; CHECK-NEXT:    [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
   %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+  call void @use_most()
   ret void
 }
 
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
 ;.

>From ef56e0f86eaad7f271244ab46cc93dd6dfaf1f69 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Oct 2025 19:39:35 +0900
Subject: [PATCH 3/6] More tests

---
 .../AMDGPU/amdgpu-attributor-no-agpr.ll       | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 49bcc9de2b644..f19d563067eb2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -580,6 +580,42 @@ define amdgpu_kernel void @earlyclobber_1() {
   ret void
 }
 
+define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"()
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }

>From 9579debf5b31d631b6dfe713069713cf19473bf3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Oct 2025 19:53:08 +0900
Subject: [PATCH 4/6] Rework accounting of mixed physical and virtual registers

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 23 +++++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 12190db0e8edd..155b1a6a380dd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1243,29 +1243,34 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
     }
 
     for (StringRef Code : CI.Codes) {
+      unsigned RegCount = 0;
+
       if (Code.starts_with("a")) {
         // Virtual register, compute number of registers based on the type.
         //
         // We ought to be going through TargetLowering to get the number of
         // registers, but we should avoid the dependence on CodeGen here.
-        unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
-        if (CI.Type == InlineAsm::isOutput) {
-          AGPRDefCount += RegCount;
-          if (CI.isEarlyClobber)
-            AGPRUseCount += RegCount;
-        } else
-          AGPRUseCount += RegCount;
+        RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
       } else {
         // Physical register reference
         auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
-        if (Kind == 'a')
+        if (Kind == 'a') {
+          RegCount = NumRegs;
           MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
+        }
       }
+
+      if (CI.Type == InlineAsm::isOutput) {
+        AGPRDefCount += RegCount;
+        if (CI.isEarlyClobber)
+          AGPRUseCount += RegCount;
+      } else
+        AGPRUseCount += RegCount;
     }
   }
 
   unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
-  return std::min(MaxVirtReg + MaxPhysReg, 256u);
+  return std::min(std::max(MaxVirtReg, MaxPhysReg), 256u);
 }
 
 // TODO: Migrate to range merge of amdgpu-agpr-alloc.

>From 3cafb045fd10fd12bd7d616f1d3e58b7e773d4fd Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 7 Oct 2025 22:00:37 +0900
Subject: [PATCH 5/6] Add another test

---
 .../AMDGPU/amdgpu-attributor-no-agpr.ll       | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index f19d563067eb2..afe2e3289aea4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -616,6 +616,30 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
   ret void
 }
 
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }

>From 43894ee90dc3ecd0a038638bc20e8a691b455af8 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 8 Oct 2025 00:06:45 +0900
Subject: [PATCH 6/6] Another test

---
 .../test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index afe2e3289aea4..df4cbd85000d4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -640,6 +640,18 @@ define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
   ret void
 }
 
+define amdgpu_kernel void @physreg_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT:    call void asm sideeffect "
+; CHECK-NEXT:    call void @use_most()
+; CHECK-NEXT:    ret void
+;
+  call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison)
+  call void @use_most()
+  ret void
+}
+
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }



More information about the llvm-commits mailing list