[llvm] AMDGPU: Figure out required AGPR count for inline asm (PR #150910)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 7 09:28:38 PDT 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/150910
>From b9b321450d6d94dd217444bdc328d3610862555a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 28 Jul 2025 16:11:28 +0900
Subject: [PATCH 01/10] AMDGPU: Figure out required AGPR count for inline asm
For now just try to compute the minimum number of AGPRs required
to allocate the asm. Leave the attributor changes to turn this
into an integer value for later.
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 59 +++++-
.../AMDGPU/amdgpu-attributor-no-agpr.ll | 199 ++++++++++++++++++
2 files changed, 251 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 65d049ed9a0aa..add336c97f2dd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1211,16 +1211,61 @@ AAAMDWavesPerEU &AAAMDWavesPerEU::createForPosition(const IRPosition &IRP,
llvm_unreachable("AAAMDWavesPerEU is only valid for function position");
}
-static bool inlineAsmUsesAGPRs(const InlineAsm *IA) {
- for (const auto &CI : IA->ParseConstraints()) {
+/// Compute the minimum number of AGPRs required to allocate the inline asm.
+static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
+ const CallBase &Call) {
+ unsigned ArgNo = 0;
+ unsigned ResNo = 0;
+ unsigned AGPRDefCount = 0;
+ unsigned AGPRUseCount = 0;
+ unsigned MaxPhysReg = 0;
+ const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
+
+ for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
+ Type *Ty = nullptr;
+ switch (CI.Type) {
+ case InlineAsm::isOutput: {
+ Ty = Call.getType();
+ if (auto *STy = dyn_cast<StructType>(Ty))
+ Ty = STy->getElementType(ResNo);
+ ++ResNo;
+ break;
+ }
+ case InlineAsm::isInput: {
+ Ty = Call.getArgOperand(ArgNo++)->getType();
+ break;
+ }
+ case InlineAsm::isLabel:
+ continue;
+ case InlineAsm::isClobber:
+ // Parse the physical register reference.
+ break;
+ }
+
for (StringRef Code : CI.Codes) {
- Code.consume_front("{");
- if (Code.starts_with("a"))
- return true;
+ if (Code.starts_with("a")) {
+ // Virtual register, compute number of registers based on the type.
+ //
+ // We ought to be going through TargetLowering to get the number of
+ // registers, but we should avoid the dependence on CodeGen here.
+ unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
+ if (CI.Type == InlineAsm::isOutput) {
+ AGPRDefCount += RegCount;
+ if (CI.isEarlyClobber)
+ AGPRUseCount += RegCount;
+ } else
+ AGPRUseCount += RegCount;
+ } else {
+ // Physical register reference
+ auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
+ if (Kind == 'a')
+ MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
+ }
}
}
- return false;
+ unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
+ return std::min(MaxVirtReg + MaxPhysReg, 256u);
}
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
@@ -1259,7 +1304,7 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
const Function *Callee = dyn_cast<Function>(CalleeOp);
if (!Callee) {
if (const InlineAsm *IA = dyn_cast<InlineAsm>(CalleeOp))
- return !inlineAsmUsesAGPRs(IA);
+ return inlineAsmGetNumRequiredAGPRs(IA, CB) == 0;
return false;
}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 664dfa21759cf..a67f6cd955612 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -251,6 +251,205 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
ret void
}
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(ptr poison)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call ptr asm sideeffect "; def $0", "=a"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a4}"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a256}"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a255}"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+ ret void
+}
+
+define amdgpu_kernel void @vreg_use_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+ ret void
+}
+
+define amdgpu_kernel void @vreg_def_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+ ret void
+}
+
+define amdgpu_kernel void @multiple() {
+; CHECK-LABEL: define amdgpu_kernel void @multiple(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+ ret void
+}
+
+define amdgpu_kernel void @earlyclobber_0() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+ ret void
+}
+
+define amdgpu_kernel void @earlyclobber_1() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: ret void
+;
+ %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+ ret void
+}
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
>From 5b77442b940aefd24e270616286df87dd7b28f2d Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Oct 2025 19:34:08 +0900
Subject: [PATCH 02/10] reduce test noise
---
.../AMDGPU/amdgpu-attributor-no-agpr.ll | 232 ++++++++++++++----
1 file changed, 180 insertions(+), 52 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index a67f6cd955612..49bcc9de2b644 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -1,103 +1,166 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+; Shrink result attribute list by preventing use of most attributes.
+define internal void @use_most() {
+; CHECK-LABEL: define internal void @use_most(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5)
+; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
+; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false)
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca [256 x i8], addrspace(5)
+ %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
+ call i32 @llvm.amdgcn.workitem.id.x()
+ call i32 @llvm.amdgcn.workitem.id.y()
+ call i32 @llvm.amdgcn.workitem.id.z()
+ call i32 @llvm.amdgcn.workgroup.id.x()
+ call i32 @llvm.amdgcn.workgroup.id.y()
+ call i32 @llvm.amdgcn.workgroup.id.z()
+ call i32 @llvm.amdgcn.cluster.id.x()
+ call i32 @llvm.amdgcn.cluster.id.y()
+ call i32 @llvm.amdgcn.cluster.id.z()
+ call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+ call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+ call i64 @llvm.amdgcn.dispatch.id()
+ call i32 @llvm.amdgcn.lds.kernel.id()
+ %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false)
+ ret void
+}
+
define amdgpu_kernel void @kernel_uses_asm_virtreg() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call i32 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "v"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+ call void @use_most()
ret void
}
define void @func_uses_asm_virtreg_agpr() {
; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void @use_most()
ret void
}
define void @func_uses_asm_physreg_agpr() {
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+ call void @use_most()
ret void
}
define void @func_uses_asm_physreg_agpr_tuple() {
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+ call void @use_most()
ret void
}
@@ -105,99 +168,119 @@ declare void @unknown()
define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @unknown()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR2]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @unknown() #0
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: call void [[INDIRECT]]()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void %indirect()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]]
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void %indirect() #0
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @func_uses_asm_physreg_agpr()
+ call void @use_most()
ret void
}
define void @empty() {
; CHECK-LABEL: define void @empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
+ call void @use_most()
ret void
}
define void @also_empty() {
; CHECK-LABEL: define void @also_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_empty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @empty()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @empty()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @empty()
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @empty()
call void @func_uses_asm_physreg_agpr()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
-; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
+ call void @use_most()
ret void
}
@@ -205,31 +288,35 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>
define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
store <32 x float> %result, ptr addrspace(1) %out
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%result = call i32 @llvm.amdgcn.workitem.id.x()
store i32 %result, ptr addrspace(1) %out
+ call void @use_most()
ret void
}
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -244,220 +331,261 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK: 5:
; CHECK-NEXT: unreachable
; CHECK: 6:
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%fptr = select i1 %cond, ptr @empty, ptr @also_empty
call void %fptr()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(ptr poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call ptr asm sideeffect "; def $0", "=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_clobber() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; clobber $0", "~{a4}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; clobber $0", "~{a256}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; clobber $0", "~{a255}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @vreg_use_exceeds_register_file() {
; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @vreg_def_exceeds_register_file() {
; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @multiple() {
; CHECK-LABEL: define amdgpu_kernel void @multiple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @earlyclobber_0() {
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @earlyclobber_1() {
; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+ call void @use_most()
ret void
}
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" }
;.
>From 43b0045d7e9562b5aeb58053ba43de8a9054ff74 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Oct 2025 19:39:35 +0900
Subject: [PATCH 03/10] More tests
---
.../AMDGPU/amdgpu-attributor-no-agpr.ll | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 49bcc9de2b644..f19d563067eb2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -580,6 +580,42 @@ define amdgpu_kernel void @earlyclobber_1() {
ret void
}
+define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
>From 3db31b542790806417d1ed3fea80e06661fe280a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Sat, 4 Oct 2025 19:53:08 +0900
Subject: [PATCH 04/10] Rework accounting of mixed physical and virtual
registers
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 23 +++++++++++++--------
1 file changed, 14 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index add336c97f2dd..d28370e125014 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1243,29 +1243,34 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
}
for (StringRef Code : CI.Codes) {
+ unsigned RegCount = 0;
+
if (Code.starts_with("a")) {
// Virtual register, compute number of registers based on the type.
//
// We ought to be going through TargetLowering to get the number of
// registers, but we should avoid the dependence on CodeGen here.
- unsigned RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
- if (CI.Type == InlineAsm::isOutput) {
- AGPRDefCount += RegCount;
- if (CI.isEarlyClobber)
- AGPRUseCount += RegCount;
- } else
- AGPRUseCount += RegCount;
+ RegCount = divideCeil(DL.getTypeSizeInBits(Ty), 32);
} else {
// Physical register reference
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
- if (Kind == 'a')
+ if (Kind == 'a') {
+ RegCount = NumRegs;
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
+ }
}
+
+ if (CI.Type == InlineAsm::isOutput) {
+ AGPRDefCount += RegCount;
+ if (CI.isEarlyClobber)
+ AGPRUseCount += RegCount;
+ } else
+ AGPRUseCount += RegCount;
}
}
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
- return std::min(MaxVirtReg + MaxPhysReg, 256u);
+ return std::min(std::max(MaxVirtReg, MaxPhysReg), 256u);
}
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
>From a6fa5fd521af477b9a732bd706a3ef41c299cd80 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 7 Oct 2025 22:00:37 +0900
Subject: [PATCH 05/10] Add another test
---
.../AMDGPU/amdgpu-attributor-no-agpr.ll | 24 +++++++++++++++++++
1 file changed, 24 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index f19d563067eb2..afe2e3289aea4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -616,6 +616,30 @@ define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
ret void
}
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
>From 352ee93f21264c5c85de146a1197650cde537c74 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 8 Oct 2025 00:06:45 +0900
Subject: [PATCH 06/10] Another test
---
.../test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index afe2e3289aea4..df4cbd85000d4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -640,6 +640,18 @@ define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
ret void
}
+define amdgpu_kernel void @physreg_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
>From ccd6a1f2d291a498b5118f8a3157bab300964b3a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 8 Oct 2025 00:59:29 +0900
Subject: [PATCH 07/10] Add todos
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index d28370e125014..c782b2cb4b90e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1221,6 +1221,8 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
unsigned MaxPhysReg = 0;
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
+ // TODO: Underestimates due to not accounting for tuple alignment requirements
+ // TODO: Overestimates due to not accounting for tied operands
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
Type *Ty = nullptr;
switch (CI.Type) {
>From 9e03860d25053e48aadc155f55cb25d9d1fc1e95 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 8 Oct 2025 01:01:36 +0900
Subject: [PATCH 08/10] Add test
---
.../CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index df4cbd85000d4..6d292f1a25bf2 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -652,6 +652,19 @@ define amdgpu_kernel void @physreg_raises_limit() {
ret void
}
+; FIXME: This should require 9. We cannot allocate an a128 at a0.
+define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[1:4]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
>From 85a507b7a6176267b80ffc9d31a7e6c29709b4b1 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 8 Oct 2025 01:04:05 +0900
Subject: [PATCH 09/10] Be very conservative with physregs
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 12 +++++--
.../AMDGPU/amdgpu-attributor-no-agpr.ll | 36 +++++++++++++++++++
2 files changed, 46 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index c782b2cb4b90e..2dae919bebcc1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1221,7 +1221,6 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
unsigned MaxPhysReg = 0;
const DataLayout &DL = Call.getFunction()->getParent()->getDataLayout();
- // TODO: Underestimates due to not accounting for tuple alignment requirements
// TODO: Overestimates due to not accounting for tied operands
for (const InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
Type *Ty = nullptr;
@@ -1258,6 +1257,11 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
if (Kind == 'a') {
RegCount = NumRegs;
+
+ // Apply physreg alignment requirement
+ //
+ // TODO: This is more conservative than necessary.
+ MaxPhysReg = alignTo(MaxPhysReg, NumRegs);
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
}
}
@@ -1272,7 +1276,11 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
}
unsigned MaxVirtReg = std::max(AGPRUseCount, AGPRDefCount);
- return std::min(std::max(MaxVirtReg, MaxPhysReg), 256u);
+
+ // TODO: This is overly conservative. If there are any physical registers,
+ // allocate any virtual registers after them so we don't have to solve optimal
+ // packing.
+ return std::min(MaxVirtReg + MaxPhysReg, 256u);
}
// TODO: Migrate to range merge of amdgpu-agpr-alloc.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 6d292f1a25bf2..b8126caa8b80c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -665,6 +665,42 @@ define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
ret void
}
+define amdgpu_kernel void @align3_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align3_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align2_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
attributes #0 = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
>From 7234f29417d92e65f775639f8fc4e631bd34e1d3 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 8 Oct 2025 01:15:28 +0900
Subject: [PATCH 10/10] Try to apply alignment requirement
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 21 +++++++++++++--------
1 file changed, 13 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 2dae919bebcc1..a52c79f4302d1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1245,7 +1245,6 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
for (StringRef Code : CI.Codes) {
unsigned RegCount = 0;
-
if (Code.starts_with("a")) {
// Virtual register, compute number of registers based on the type.
//
@@ -1257,21 +1256,27 @@ static unsigned inlineAsmGetNumRequiredAGPRs(const InlineAsm *IA,
auto [Kind, RegIdx, NumRegs] = AMDGPU::parseAsmConstraintPhysReg(Code);
if (Kind == 'a') {
RegCount = NumRegs;
-
- // Apply physreg alignment requirement
- //
- // TODO: This is more conservative than necessary.
- MaxPhysReg = alignTo(MaxPhysReg, NumRegs);
MaxPhysReg = std::max(MaxPhysReg, std::min(RegIdx + NumRegs, 256u));
}
+
+ continue;
}
if (CI.Type == InlineAsm::isOutput) {
+ // Apply tuple alignment requirement
+ //
+ // TODO: This is more conservative than necessary.
+ AGPRDefCount = alignTo(AGPRDefCount, RegCount);
+
AGPRDefCount += RegCount;
- if (CI.isEarlyClobber)
+ if (CI.isEarlyClobber) {
+ AGPRUseCount = alignTo(AGPRUseCount, RegCount);
AGPRUseCount += RegCount;
- } else
+ }
+ } else {
+ AGPRUseCount = alignTo(AGPRUseCount, RegCount);
AGPRUseCount += RegCount;
+ }
}
}
More information about the llvm-commits
mailing list