[llvm] AMDGPU: Do not infer implicit inputs for intrinsics with !nocallback (PR #175230)
Akash Dutta via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 21 10:05:27 PST 2026
https://github.com/akadutta updated https://github.com/llvm/llvm-project/pull/175230
>From 7cfb1078f1b97d1d0f529b1ee78d3e734133ffc7 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 9 Jan 2026 13:51:31 -0600
Subject: [PATCH 1/5] do not infer implicit args for missing nocallback
intrinsics;add trap like functions to whitelist
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 61 ++++++++++++++--
...attributor-intrinsic-missing-nocallback.ll | 31 ++++++++
...amdgpu-attributor-nocallback-intrinsics.ll | 73 +++++++++++++++++++
.../AMDGPU/amdgpu-attributor-trap-leaf.ll | 38 ++++++++++
4 files changed, 197 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 821a7198e38c8..98130630b4a98 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,9 +38,10 @@ enum ImplicitArgumentPositions {
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
enum ImplicitArgumentMask {
- NOT_IMPLICIT_INPUT = 0,
+ UNKNOWN_INTRINSIC = 0,
#include "AMDGPUAttributes.def"
- ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
+ NOT_IMPLICIT_INPUT
};
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
- return NOT_IMPLICIT_INPUT;
+ return UNKNOWN_INTRINSIC;
}
}
@@ -140,6 +141,26 @@ static bool hasSanitizerAttributes(const Function &F) {
F.hasFnAttribute(Attribute::SanitizeMemTag);
}
+/// Returns true if F looks like a leaf-style trap intrinsic that cannot
+/// possibly callback into user code.
+static bool isTrapLikeLeafIntrinsic(const Function &F) {
+ if (!F.isIntrinsic())
+ return false;
+
+ // If we already know this intrinsic is nocallback, there is nothing left to
+ // classify.
+ if (F.hasFnAttribute(Attribute::NoCallback))
+ return true;
+
+ if (!F.hasFnAttribute(Attribute::NoReturn))
+ return false;
+
+ if (F.doesNotAccessMemory())
+ return true;
+
+ return F.onlyAccessesInaccessibleMemory();
+}
+
namespace {
class AMDGPUInformationCache : public InformationCache {
public:
@@ -525,6 +546,22 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);
+
+ if (AttrMask == UNKNOWN_INTRINSIC) {
+ // Assume not-nocallback intrinsics may invoke a function which accesses
+ // implicit arguments.
+ //
+ // FIXME: This isn't really the correct check. We want to ensure it
+ // isn't calling any function that may use implicit arguments regardless
+ // of whether it's internal to the module or not.
+ //
+ // TODO: Ignoring callsite attributes.
+ if (!isTrapLikeLeafIntrinsic(*Callee) &&
+ !Callee->hasFnAttribute(Attribute::NoCallback))
+ return indicatePessimisticFixpoint();
+ continue;
+ }
+
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
@@ -1345,10 +1382,22 @@ struct AAAMDGPUMinAGPRAlloc
return true;
}
- default:
+ case Intrinsic::debugtrap:
+ // llvm.debugtrap currently lacks the attributes required for
+ // isTrapLikeLeafIntrinsic, so kept explicitly whitelisted.
+ return true;
+ default: {
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
- return true;
+
+ if (const Function *CalledFunc = CB.getCalledFunction())
+ if (isTrapLikeLeafIntrinsic(*CalledFunc))
+ return true;
+
+ // Assume !nocallback intrinsics may call a function which requires
+ // AGPRs.
+ return CB.hasFnAttr(Attribute::NoCallback);
+ }
}
// TODO: Handle callsite attributes
@@ -1584,7 +1633,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AC.DefaultInitializeLiveInternals = false;
AC.IndirectCalleeSpecializationCallback =
[](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
- Function &Callee, unsigned NumAssumedCallees) {
+ Function &Callee, unsigned NumAssumedCallees) {
return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
(NumAssumedCallees <= IndirectCallSpecializationThreshold);
};
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
new file mode 100644
index 0000000000000..892bfa12140d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Make sure we do not infer anything about implicit inputs through an
+; intrinsic call which is not nocallback.
+
+declare zeroext i32 @return_i32()
+
+define i32 @test_i32_return() gc "statepoint-example" {
+; CHECK-LABEL: define i32 @test_i32_return(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
+; CHECK-NEXT: ret i32 [[CALL1]]
+;
+entry:
+ %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
+ %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
+ ret i32 %call1
+}
+
+declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
+declare i32 @llvm.experimental.gc.result.i32(token) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
+;.
+; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
new file mode 100644
index 0000000000000..81fd84dce31d8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
+
+; Make sure we infer no inputs are used through some intrinsics
+
+define void @use_fake_use(i32 %arg) {
+; CHECK-LABEL: define void @use_fake_use(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
+; CHECK-NEXT: ret void
+;
+ call void (...) @llvm.fake.use(i32 %arg)
+ ret void
+}
+
+define void @use_donothing() {
+; CHECK-LABEL: define void @use_donothing(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.donothing()
+ ret void
+}
+
+define void @use_assume(i1 %arg) {
+; CHECK-LABEL: define void @use_assume(
+; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.assume(i1 %arg)
+ ret void
+}
+
+define void @use_trap() {
+; CHECK-LABEL: define void @use_trap(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap()
+ ret void
+}
+
+define void @use_debugtrap() {
+; CHECK-LABEL: define void @use_debugtrap(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.debugtrap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.debugtrap()
+ ret void
+}
+
+define void @use_ubsantrap() {
+; CHECK-LABEL: define void @use_ubsantrap(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.ubsantrap(i8 0)
+ ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
new file mode 100644
index 0000000000000..620b232f58e97
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have the
+; nocallback attribute, so the AMDGPU attributor used to conservatively drop
+; all implicitly-known inputs and AGPR allocation information. Make sure we
+; still infer that no implicit inputs are required and that the AGPR allocation
+; stays at zero.
+
+declare void @llvm.trap()
+
+declare void @llvm.debugtrap()
+
+define amdgpu_kernel void @trap_kernel() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap()
+ ret void
+}
+
+define amdgpu_kernel void @debugtrap_kernel() {
+; CHECK-LABEL: define amdgpu_kernel void @debugtrap_kernel(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void @llvm.debugtrap()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.debugtrap()
+ ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+;.
>From 798ef42c878649b762a7cd04c624f11f73b3abca Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 9 Jan 2026 15:23:10 -0600
Subject: [PATCH 2/5] fix format issue
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 98130630b4a98..fb8afd6356bbb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1633,7 +1633,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
AC.DefaultInitializeLiveInternals = false;
AC.IndirectCalleeSpecializationCallback =
[](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
- Function &Callee, unsigned NumAssumedCallees) {
+ Function &Callee, unsigned NumAssumedCallees) {
return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
(NumAssumedCallees <= IndirectCallSpecializationThreshold);
};
>From 391e06a841e683dee16f4872e226ae6cbf6282d0 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 16 Jan 2026 13:25:36 -0600
Subject: [PATCH 3/5] fix incorrect merge
---
...amdgpu-attributor-nocallback-intrinsics.ll | 20 -------------------
1 file changed, 20 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
index b87cc2747c582..71c509afa8e64 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -35,11 +35,7 @@ define void @use_assume(i1 %arg) {
define void @use_trap() {
; CHECK-LABEL: define void @use_trap(
-<<<<<<< HEAD
-; CHECK-SAME: ) #[[ATTR0]] {
-=======
; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
->>>>>>> main
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
@@ -49,11 +45,7 @@ define void @use_trap() {
define void @use_debugtrap() {
; CHECK-LABEL: define void @use_debugtrap(
-<<<<<<< HEAD
-; CHECK-SAME: ) #[[ATTR0]] {
-=======
; CHECK-SAME: ) #[[ATTR1]] {
->>>>>>> main
; CHECK-NEXT: call void @llvm.debugtrap()
; CHECK-NEXT: ret void
;
@@ -63,11 +55,7 @@ define void @use_debugtrap() {
define void @use_ubsantrap() {
; CHECK-LABEL: define void @use_ubsantrap(
-<<<<<<< HEAD
-; CHECK-SAME: ) #[[ATTR0]] {
-=======
; CHECK-SAME: ) #[[ATTR1]] {
->>>>>>> main
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
; CHECK-NEXT: ret void
;
@@ -77,18 +65,10 @@ define void @use_ubsantrap() {
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-<<<<<<< HEAD
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-=======
; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
->>>>>>> main
;.
>From 40cdc1d942154bc254a0503300fa25e2aabdb66f Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Mon, 19 Jan 2026 11:17:29 -0600
Subject: [PATCH 4/5] add handling for trap-func-name
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 39 ++++++++++-----
.../amdgpu-attributor-min-agpr-alloc.ll | 50 +++++++++----------
...amdgpu-attributor-nocallback-intrinsics.ll | 32 ++++++++----
.../AMDGPU/amdgpu-attributor-trap-leaf.ll | 17 ++++++-
4 files changed, 90 insertions(+), 48 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index bd6071c22d619..ced4c13eaa39d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -42,8 +42,6 @@ enum ImplicitArgumentMask {
#include "AMDGPUAttributes.def"
ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
NOT_IMPLICIT_INPUT
- ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
- NOT_IMPLICIT_INPUT
};
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -518,6 +516,17 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// The current assumed state used to determine a change.
auto OrigAssumed = getAssumed();
+ SmallPtrSet<const Function *, 4> TrapHandlers;
+ for (Instruction &I : instructions(F)) {
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB || !CB->hasFnAttr("trap-func-name"))
+ continue;
+
+ if (const Function *CalledFunc = CB->getCalledFunction())
+ if (isTrapLikeLeafIntrinsic(*CalledFunc))
+ TrapHandlers.insert(CalledFunc);
+ }
+
// Check for Intrinsics and propagate attributes.
const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
*this, this->getIRPosition(), DepClassTy::REQUIRED);
@@ -549,6 +558,12 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);
+ if (TrapHandlers.contains(Callee)) {
+ if (!Callee->hasFnAttribute(Attribute::NoCallback))
+ return indicatePessimisticFixpoint();
+ continue;
+ }
+
if (AttrMask == UNKNOWN_INTRINSIC) {
// Assume not-nocallback intrinsics may invoke a function which accesses
// implicit arguments.
@@ -558,7 +573,8 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// of whether it's internal to the module or not.
//
// TODO: Ignoring callsite attributes.
- if (!isTrapLikeLeafIntrinsic(*Callee) &&
+ const bool IsTrapLikeTrapIntrinsic = isTrapLikeLeafIntrinsic(*Callee);
+ if (!IsTrapLikeTrapIntrinsic &&
!Callee->hasFnAttribute(Attribute::NoCallback))
return indicatePessimisticFixpoint();
continue;
@@ -1367,6 +1383,14 @@ struct AAAMDGPUMinAGPRAlloc
return true;
}
+ if (const Function *CalledFunc = CB.getCalledFunction()) {
+ if (isTrapLikeLeafIntrinsic(*CalledFunc)) {
+ if (CB.hasFnAttr("trap-func-name"))
+ return CB.hasFnAttr(Attribute::NoCallback);
+ return true;
+ }
+ }
+
switch (CB.getIntrinsicID()) {
case Intrinsic::not_intrinsic:
break;
@@ -1384,18 +1408,9 @@ struct AAAMDGPUMinAGPRAlloc
return true;
}
- case Intrinsic::debugtrap:
- // llvm.debugtrap currently lacks the attributes required for
- // isTrapLikeLeafIntrinsic, so kept explicitly whitelisted.
- return true;
default: {
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
-
- if (const Function *CalledFunc = CB.getCalledFunction())
- if (isTrapLikeLeafIntrinsic(*CalledFunc))
- return true;
-
// Assume !nocallback intrinsics may call a function which requires
// AGPRs.
return CB.hasFnAttr(Attribute::NoCallback);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index 4db668e05cb21..3684f0df77780 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -181,7 +181,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
; CHECK-SAME: ) #[[ATTR3]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR29:[0-9]+]]
+; CHECK-NEXT: call void @unknown() #[[ATTR27:[0-9]+]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR29]]
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR27]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -724,7 +724,7 @@ define amdgpu_kernel void @kernel_uses_write_register_v55() {
define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
-; CHECK-SAME: ) #[[ATTR18:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -736,7 +736,7 @@ define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: call void @use_most()
@@ -750,7 +750,7 @@ define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr)
define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR18:[0-9]+]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: call void @use_most()
@@ -764,7 +764,7 @@ define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(
define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void @use_most()
@@ -778,7 +778,7 @@ define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %pt
define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
-; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -897,7 +897,7 @@ define void @kernel_max_callgraph(i1 %cond) {
define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs(
-; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR19:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -909,7 +909,7 @@ define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1(
-; CHECK-SAME: ) #[[ATTR21]] {
+; CHECK-SAME: ) #[[ATTR19]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -921,7 +921,7 @@ define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
define void @recursive() {
; CHECK-LABEL: define void @recursive(
-; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR20:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: call void @recursive()
@@ -935,7 +935,7 @@ define void @recursive() {
define void @indirect_0() {
; CHECK-LABEL: define void @indirect_0(
-; CHECK-SAME: ) #[[ATTR22]] {
+; CHECK-SAME: ) #[[ATTR20]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -947,7 +947,7 @@ define void @indirect_0() {
define void @indirect_1() {
; CHECK-LABEL: define void @indirect_1(
-; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -959,7 +959,7 @@ define void @indirect_1() {
define amdgpu_kernel void @knowable_indirect_call(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR20]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -1044,19 +1044,17 @@ attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR22:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR23:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR25:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR27]] = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: [[META0]] = !{!"a55"}
; CHECK: [[META1]] = !{!"v55"}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
index 71c509afa8e64..bd723f696558b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -35,7 +35,7 @@ define void @use_assume(i1 %arg) {
define void @use_trap() {
; CHECK-LABEL: define void @use_trap(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.trap()
; CHECK-NEXT: ret void
;
@@ -43,9 +43,19 @@ define void @use_trap() {
ret void
}
+define void @use_trap_with_handler() {
+; CHECK-LABEL: define void @use_trap_with_handler(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap() #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap() #0
+ ret void
+}
+
define void @use_debugtrap() {
; CHECK-LABEL: define void @use_debugtrap(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: call void @llvm.debugtrap()
; CHECK-NEXT: ret void
;
@@ -55,7 +65,7 @@ define void @use_debugtrap() {
define void @use_ubsantrap() {
; CHECK-LABEL: define void @use_ubsantrap(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
; CHECK-NEXT: ret void
;
@@ -63,12 +73,16 @@ define void @use_ubsantrap() {
ret void
}
+
+attributes #0 = { "trap-func-name"="handler" }
;.
; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8]] = { "trap-func-name"="handler" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
index 620b232f58e97..0010b7ba3e28c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
@@ -21,9 +21,19 @@ define amdgpu_kernel void @trap_kernel() {
ret void
}
+define amdgpu_kernel void @trap_kernel_with_handler() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel_with_handler(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.trap() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: ret void
+;
+ call void @llvm.trap() #0
+ ret void
+}
+
define amdgpu_kernel void @debugtrap_kernel() {
; CHECK-LABEL: define amdgpu_kernel void @debugtrap_kernel(
-; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: call void @llvm.debugtrap()
; CHECK-NEXT: ret void
;
@@ -31,8 +41,13 @@ define amdgpu_kernel void @debugtrap_kernel() {
ret void
}
+
+attributes #0 = { "trap-func-name"="handler" }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "trap-func-name"="handler" }
;.
>From 1a38625c824225667642cc18ad0d63947dc89cd5 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Wed, 21 Jan 2026 12:05:02 -0600
Subject: [PATCH 5/5] code cleanup
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index ced4c13eaa39d..a257f9db9dfd6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -573,8 +573,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
// of whether it's internal to the module or not.
//
// TODO: Ignoring callsite attributes.
- const bool IsTrapLikeTrapIntrinsic = isTrapLikeLeafIntrinsic(*Callee);
- if (!IsTrapLikeTrapIntrinsic &&
+ if (!isTrapLikeLeafIntrinsic(*Callee) &&
!Callee->hasFnAttribute(Attribute::NoCallback))
return indicatePessimisticFixpoint();
continue;
@@ -1408,14 +1407,13 @@ struct AAAMDGPUMinAGPRAlloc
return true;
}
- default: {
+ default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
// Assume !nocallback intrinsics may call a function which requires
// AGPRs.
return CB.hasFnAttr(Attribute::NoCallback);
}
- }
// TODO: Handle callsite attributes
auto *CBEdges = A.getAAFor<AACallEdges>(
More information about the llvm-commits
mailing list