[llvm] AMDGPU: Do not infer implicit inputs for intrinsics with !nocallback (PR #175230)

Akash Dutta via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 21 10:05:27 PST 2026


https://github.com/akadutta updated https://github.com/llvm/llvm-project/pull/175230

>From 7cfb1078f1b97d1d0f529b1ee78d3e734133ffc7 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 9 Jan 2026 13:51:31 -0600
Subject: [PATCH 1/5] do not infer implicit args for missing nocallback
 intrinsics;add trap like functions to whitelist

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 61 ++++++++++++++--
 ...attributor-intrinsic-missing-nocallback.ll | 31 ++++++++
 ...amdgpu-attributor-nocallback-intrinsics.ll | 73 +++++++++++++++++++
 .../AMDGPU/amdgpu-attributor-trap-leaf.ll     | 38 ++++++++++
 4 files changed, 197 insertions(+), 6 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 821a7198e38c8..98130630b4a98 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,9 +38,10 @@ enum ImplicitArgumentPositions {
 #define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
 
 enum ImplicitArgumentMask {
-  NOT_IMPLICIT_INPUT = 0,
+  UNKNOWN_INTRINSIC = 0,
 #include "AMDGPUAttributes.def"
-  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
+  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
+  NOT_IMPLICIT_INPUT
 };
 
 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -115,7 +116,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
     NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
     return QUEUE_PTR;
   default:
-    return NOT_IMPLICIT_INPUT;
+    return UNKNOWN_INTRINSIC;
   }
 }
 
@@ -140,6 +141,26 @@ static bool hasSanitizerAttributes(const Function &F) {
          F.hasFnAttribute(Attribute::SanitizeMemTag);
 }
 
+/// Returns true if F looks like a leaf-style trap intrinsic that cannot
+/// possibly callback into user code.
+static bool isTrapLikeLeafIntrinsic(const Function &F) {
+  if (!F.isIntrinsic())
+    return false;
+
+  // If we already know this intrinsic is nocallback, there is nothing left to
+  // classify.
+  if (F.hasFnAttribute(Attribute::NoCallback))
+    return true;
+
+  if (!F.hasFnAttribute(Attribute::NoReturn))
+    return false;
+
+  if (F.doesNotAccessMemory())
+    return true;
+
+  return F.onlyAccessesInaccessibleMemory();
+}
+
 namespace {
 class AMDGPUInformationCache : public InformationCache {
 public:
@@ -525,6 +546,22 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
       ImplicitArgumentMask AttrMask =
           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
                               HasApertureRegs, SupportsGetDoorbellID, COV);
+
+      if (AttrMask == UNKNOWN_INTRINSIC) {
+        // Assume not-nocallback intrinsics may invoke a function which accesses
+        // implicit arguments.
+        //
+        // FIXME: This isn't really the correct check. We want to ensure it
+        // isn't calling any function that may use implicit arguments regardless
+        // of whether it's internal to the module or not.
+        //
+        // TODO: Ignoring callsite attributes.
+        if (!isTrapLikeLeafIntrinsic(*Callee) &&
+            !Callee->hasFnAttribute(Attribute::NoCallback))
+          return indicatePessimisticFixpoint();
+        continue;
+      }
+
       if (AttrMask != NOT_IMPLICIT_INPUT) {
         if ((IsNonEntryFunc || !NonKernelOnly))
           removeAssumedBits(AttrMask);
@@ -1345,10 +1382,22 @@ struct AAAMDGPUMinAGPRAlloc
 
         return true;
       }
-      default:
+      case Intrinsic::debugtrap:
+        // llvm.debugtrap currently lacks the attributes required for
+        // isTrapLikeLeafIntrinsic, so kept explicitly whitelisted.
+        return true;
+      default: {
         // Some intrinsics may use AGPRs, but if we have a choice, we are not
         // required to use AGPRs.
-        return true;
+
+        if (const Function *CalledFunc = CB.getCalledFunction())
+          if (isTrapLikeLeafIntrinsic(*CalledFunc))
+            return true;
+
+        // Assume !nocallback intrinsics may call a function which requires
+        // AGPRs.
+        return CB.hasFnAttr(Attribute::NoCallback);
+      }
       }
 
       // TODO: Handle callsite attributes
@@ -1584,7 +1633,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   AC.DefaultInitializeLiveInternals = false;
   AC.IndirectCalleeSpecializationCallback =
       [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
-         Function &Callee, unsigned NumAssumedCallees) {
+            Function &Callee, unsigned NumAssumedCallees) {
         return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
                (NumAssumedCallees <= IndirectCallSpecializationThreshold);
       };
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
new file mode 100644
index 0000000000000..892bfa12140d4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
@@ -0,0 +1,31 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Make sure we do not infer anything about implicit inputs through an
+; intrinsic call which is not nocallback.
+
+declare zeroext i32 @return_i32()
+
+define i32 @test_i32_return() gc "statepoint-example" {
+; CHECK-LABEL: define i32 @test_i32_return(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
+; CHECK-NEXT:    [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
+; CHECK-NEXT:    ret i32 [[CALL1]]
+;
+entry:
+  %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
+  %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
+  ret i32 %call1
+}
+
+declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
+declare i32 @llvm.experimental.gc.result.i32(token) #0
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
+;.
+; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
new file mode 100644
index 0000000000000..81fd84dce31d8
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
+
+; Make sure we infer no inputs are used through some intrinsics
+
+define void @use_fake_use(i32 %arg) {
+; CHECK-LABEL: define void @use_fake_use(
+; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    call void (...) @llvm.fake.use(i32 [[ARG]])
+; CHECK-NEXT:    ret void
+;
+  call void (...) @llvm.fake.use(i32 %arg)
+  ret void
+}
+
+define void @use_donothing() {
+; CHECK-LABEL: define void @use_donothing(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.donothing()
+  ret void
+}
+
+define void @use_assume(i1 %arg) {
+; CHECK-LABEL: define void @use_assume(
+; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.assume(i1 [[ARG]])
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.assume(i1 %arg)
+  ret void
+}
+
+define void @use_trap() {
+; CHECK-LABEL: define void @use_trap(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.trap()
+  ret void
+}
+
+define void @use_debugtrap() {
+; CHECK-LABEL: define void @use_debugtrap(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.debugtrap()
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.debugtrap()
+  ret void
+}
+
+define void @use_ubsantrap() {
+; CHECK-LABEL: define void @use_ubsantrap(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 0)
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.ubsantrap(i8 0)
+  ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
new file mode 100644
index 0000000000000..620b232f58e97
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Trap-like intrinsics such as llvm.trap and llvm.debugtrap do not have the
+; nocallback attribute, so the AMDGPU attributor used to conservatively drop
+; all implicitly-known inputs and AGPR allocation information. Make sure we
+; still infer that no implicit inputs are required and that the AGPR allocation
+; stays at zero.
+
+declare void @llvm.trap()
+
+declare void @llvm.debugtrap()
+
+define amdgpu_kernel void @trap_kernel() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.trap()
+  ret void
+}
+
+define amdgpu_kernel void @debugtrap_kernel() {
+; CHECK-LABEL: define amdgpu_kernel void @debugtrap_kernel(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT:    call void @llvm.debugtrap()
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.debugtrap()
+  ret void
+}
+
+;.
+; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+;.

>From 798ef42c878649b762a7cd04c624f11f73b3abca Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 9 Jan 2026 15:23:10 -0600
Subject: [PATCH 2/5] fix format issue

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 98130630b4a98..fb8afd6356bbb 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1633,7 +1633,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
   AC.DefaultInitializeLiveInternals = false;
   AC.IndirectCalleeSpecializationCallback =
       [](Attributor &A, const AbstractAttribute &AA, CallBase &CB,
-            Function &Callee, unsigned NumAssumedCallees) {
+         Function &Callee, unsigned NumAssumedCallees) {
         return !AMDGPU::isEntryFunctionCC(Callee.getCallingConv()) &&
                (NumAssumedCallees <= IndirectCallSpecializationThreshold);
       };

>From 391e06a841e683dee16f4872e226ae6cbf6282d0 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Fri, 16 Jan 2026 13:25:36 -0600
Subject: [PATCH 3/5] fix incorrect merge

---
 ...amdgpu-attributor-nocallback-intrinsics.ll | 20 -------------------
 1 file changed, 20 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
index b87cc2747c582..71c509afa8e64 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -35,11 +35,7 @@ define void @use_assume(i1 %arg) {
 
 define void @use_trap() {
 ; CHECK-LABEL: define void @use_trap(
-<<<<<<< HEAD
-; CHECK-SAME: ) #[[ATTR0]] {
-=======
 ; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
->>>>>>> main
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    ret void
 ;
@@ -49,11 +45,7 @@ define void @use_trap() {
 
 define void @use_debugtrap() {
 ; CHECK-LABEL: define void @use_debugtrap(
-<<<<<<< HEAD
-; CHECK-SAME: ) #[[ATTR0]] {
-=======
 ; CHECK-SAME: ) #[[ATTR1]] {
->>>>>>> main
 ; CHECK-NEXT:    call void @llvm.debugtrap()
 ; CHECK-NEXT:    ret void
 ;
@@ -63,11 +55,7 @@ define void @use_debugtrap() {
 
 define void @use_ubsantrap() {
 ; CHECK-LABEL: define void @use_ubsantrap(
-<<<<<<< HEAD
-; CHECK-SAME: ) #[[ATTR0]] {
-=======
 ; CHECK-SAME: ) #[[ATTR1]] {
->>>>>>> main
 ; CHECK-NEXT:    call void @llvm.ubsantrap(i8 0)
 ; CHECK-NEXT:    ret void
 ;
@@ -77,18 +65,10 @@ define void @use_ubsantrap() {
 
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-<<<<<<< HEAD
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-=======
 ; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
->>>>>>> main
 ;.

>From 40cdc1d942154bc254a0503300fa25e2aabdb66f Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Mon, 19 Jan 2026 11:17:29 -0600
Subject: [PATCH 4/5] add handling for trap-func-name

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 39 ++++++++++-----
 .../amdgpu-attributor-min-agpr-alloc.ll       | 50 +++++++++----------
 ...amdgpu-attributor-nocallback-intrinsics.ll | 32 ++++++++----
 .../AMDGPU/amdgpu-attributor-trap-leaf.ll     | 17 ++++++-
 4 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index bd6071c22d619..ced4c13eaa39d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -42,8 +42,6 @@ enum ImplicitArgumentMask {
 #include "AMDGPUAttributes.def"
   ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
   NOT_IMPLICIT_INPUT
-  ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
-  NOT_IMPLICIT_INPUT
 };
 
 #define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -518,6 +516,17 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
     // The current assumed state used to determine a change.
     auto OrigAssumed = getAssumed();
 
+    SmallPtrSet<const Function *, 4> TrapHandlers;
+    for (Instruction &I : instructions(F)) {
+      auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB || !CB->hasFnAttr("trap-func-name"))
+        continue;
+
+      if (const Function *CalledFunc = CB->getCalledFunction())
+        if (isTrapLikeLeafIntrinsic(*CalledFunc))
+          TrapHandlers.insert(CalledFunc);
+    }
+
     // Check for Intrinsics and propagate attributes.
     const AACallEdges *AAEdges = A.getAAFor<AACallEdges>(
         *this, this->getIRPosition(), DepClassTy::REQUIRED);
@@ -549,6 +558,12 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
           intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
                               HasApertureRegs, SupportsGetDoorbellID, COV);
 
+      if (TrapHandlers.contains(Callee)) {
+        if (!Callee->hasFnAttribute(Attribute::NoCallback))
+          return indicatePessimisticFixpoint();
+        continue;
+      }
+
       if (AttrMask == UNKNOWN_INTRINSIC) {
         // Assume not-nocallback intrinsics may invoke a function which accesses
         // implicit arguments.
@@ -558,7 +573,8 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
         // of whether it's internal to the module or not.
         //
         // TODO: Ignoring callsite attributes.
-        if (!isTrapLikeLeafIntrinsic(*Callee) &&
+        const bool IsTrapLikeTrapIntrinsic = isTrapLikeLeafIntrinsic(*Callee);
+        if (!IsTrapLikeTrapIntrinsic &&
             !Callee->hasFnAttribute(Attribute::NoCallback))
           return indicatePessimisticFixpoint();
         continue;
@@ -1367,6 +1383,14 @@ struct AAAMDGPUMinAGPRAlloc
         return true;
       }
 
+      if (const Function *CalledFunc = CB.getCalledFunction()) {
+        if (isTrapLikeLeafIntrinsic(*CalledFunc)) {
+          if (CB.hasFnAttr("trap-func-name"))
+            return CB.hasFnAttr(Attribute::NoCallback);
+          return true;
+        }
+      }
+
       switch (CB.getIntrinsicID()) {
       case Intrinsic::not_intrinsic:
         break;
@@ -1384,18 +1408,9 @@ struct AAAMDGPUMinAGPRAlloc
 
         return true;
       }
-      case Intrinsic::debugtrap:
-        // llvm.debugtrap currently lacks the attributes required for
-        // isTrapLikeLeafIntrinsic, so kept explicitly whitelisted.
-        return true;
       default: {
         // Some intrinsics may use AGPRs, but if we have a choice, we are not
         // required to use AGPRs.
-
-        if (const Function *CalledFunc = CB.getCalledFunction())
-          if (isTrapLikeLeafIntrinsic(*CalledFunc))
-            return true;
-
         // Assume !nocallback intrinsics may call a function which requires
         // AGPRs.
         return CB.hasFnAttr(Attribute::NoCallback);
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index 4db668e05cb21..3684f0df77780 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -181,7 +181,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
 define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
 ; CHECK-SAME: ) #[[ATTR3]] {
-; CHECK-NEXT:    call void @unknown() #[[ATTR29:[0-9]+]]
+; CHECK-NEXT:    call void @unknown() #[[ATTR27:[0-9]+]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
 define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
 ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR29]]
+; CHECK-NEXT:    call void [[INDIRECT]]() #[[ATTR27]]
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
 ;
@@ -724,7 +724,7 @@ define amdgpu_kernel void @kernel_uses_write_register_v55() {
 
 define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
-; CHECK-SAME: ) #[[ATTR18:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -736,7 +736,7 @@ define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
 
 define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
 ; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    call void @use_most()
@@ -750,7 +750,7 @@ define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr)
 
 define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR18:[0-9]+]] {
 ; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
 ; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    call void @use_most()
@@ -764,7 +764,7 @@ define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(
 
 define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] {
 ; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
 ; CHECK-NEXT:    store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
 ; CHECK-NEXT:    call void @use_most()
@@ -778,7 +778,7 @@ define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %pt
 
 define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
-; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -897,7 +897,7 @@ define void @kernel_max_callgraph(i1 %cond) {
 
 define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs(
-; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR19:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -909,7 +909,7 @@ define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
 
 define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1(
-; CHECK-SAME: ) #[[ATTR21]] {
+; CHECK-SAME: ) #[[ATTR19]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -921,7 +921,7 @@ define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
 
 define void @recursive() {
 ; CHECK-LABEL: define void @recursive(
-; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR20:[0-9]+]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    call void @recursive()
@@ -935,7 +935,7 @@ define void @recursive() {
 
 define void @indirect_0() {
 ; CHECK-LABEL: define void @indirect_0(
-; CHECK-SAME: ) #[[ATTR22]] {
+; CHECK-SAME: ) #[[ATTR20]] {
 ; CHECK-NEXT:    call void asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -947,7 +947,7 @@ define void @indirect_0() {
 
 define void @indirect_1() {
 ; CHECK-LABEL: define void @indirect_1(
-; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
 ; CHECK-NEXT:    [[TMP1:%.*]] = call <3 x i32> asm sideeffect "
 ; CHECK-NEXT:    call void @use_most()
 ; CHECK-NEXT:    ret void
@@ -959,7 +959,7 @@ define void @indirect_1() {
 
 define amdgpu_kernel void @knowable_indirect_call(i1 %cond) {
 ; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR20]] {
 ; CHECK-NEXT:    [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -1044,19 +1044,17 @@ attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
 ; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR22:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR23:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR25:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR27]] = { "amdgpu-agpr-alloc"="0" }
 ;.
 ; CHECK: [[META0]] = !{!"a55"}
 ; CHECK: [[META1]] = !{!"v55"}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
index 71c509afa8e64..bd723f696558b 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -35,7 +35,7 @@ define void @use_assume(i1 %arg) {
 
 define void @use_trap() {
 ; CHECK-LABEL: define void @use_trap(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.trap()
 ; CHECK-NEXT:    ret void
 ;
@@ -43,9 +43,19 @@ define void @use_trap() {
   ret void
 }
 
+define void @use_trap_with_handler() {
+; CHECK-LABEL: define void @use_trap_with_handler(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.trap() #[[ATTR8:[0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.trap() #0
+  ret void
+}
+
 define void @use_debugtrap() {
 ; CHECK-LABEL: define void @use_debugtrap(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.debugtrap()
 ; CHECK-NEXT:    ret void
 ;
@@ -55,7 +65,7 @@ define void @use_debugtrap() {
 
 define void @use_ubsantrap() {
 ; CHECK-LABEL: define void @use_ubsantrap(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
 ; CHECK-NEXT:    call void @llvm.ubsantrap(i8 0)
 ; CHECK-NEXT:    ret void
 ;
@@ -63,12 +73,16 @@ define void @use_ubsantrap() {
   ret void
 }
 
+
+attributes #0 = { "trap-func-name"="handler" }
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8]] = { "trap-func-name"="handler" }
 ;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
index 620b232f58e97..0010b7ba3e28c 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
@@ -21,9 +21,19 @@ define amdgpu_kernel void @trap_kernel() {
   ret void
 }
 
+define amdgpu_kernel void @trap_kernel_with_handler() {
+; CHECK-LABEL: define amdgpu_kernel void @trap_kernel_with_handler(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    call void @llvm.trap() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    ret void
+;
+  call void @llvm.trap() #0
+  ret void
+}
+
 define amdgpu_kernel void @debugtrap_kernel() {
 ; CHECK-LABEL: define amdgpu_kernel void @debugtrap_kernel(
-; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
 ; CHECK-NEXT:    call void @llvm.debugtrap()
 ; CHECK-NEXT:    ret void
 ;
@@ -31,8 +41,13 @@ define amdgpu_kernel void @debugtrap_kernel() {
   ret void
 }
 
+
+attributes #0 = { "trap-func-name"="handler" }
 ;.
 ; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "trap-func-name"="handler" }
 ;.

>From 1a38625c824225667642cc18ad0d63947dc89cd5 Mon Sep 17 00:00:00 2001
From: akadutta <Akash.Dutta at amd.com>
Date: Wed, 21 Jan 2026 12:05:02 -0600
Subject: [PATCH 5/5] code cleanup

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index ced4c13eaa39d..a257f9db9dfd6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -573,8 +573,7 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
         // of whether it's internal to the module or not.
         //
         // TODO: Ignoring callsite attributes.
-        const bool IsTrapLikeTrapIntrinsic = isTrapLikeLeafIntrinsic(*Callee);
-        if (!IsTrapLikeTrapIntrinsic &&
+        if (!isTrapLikeLeafIntrinsic(*Callee) &&
             !Callee->hasFnAttribute(Attribute::NoCallback))
           return indicatePessimisticFixpoint();
         continue;
@@ -1408,14 +1407,13 @@ struct AAAMDGPUMinAGPRAlloc
 
         return true;
       }
-      default: {
+      default:
         // Some intrinsics may use AGPRs, but if we have a choice, we are not
         // required to use AGPRs.
         // Assume !nocallback intrinsics may call a function which requires
         // AGPRs.
         return CB.hasFnAttr(Attribute::NoCallback);
       }
-      }
 
       // TODO: Handle callsite attributes
       auto *CBEdges = A.getAAFor<AACallEdges>(



More information about the llvm-commits mailing list