[llvm] Revert "AMDGPU: Do not infer implicit inputs for !nocallback intrinsics" (PR #174224)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 2 09:18:41 PST 2026
https://github.com/ronlieb created https://github.com/llvm/llvm-project/pull/174224
Reverts llvm/llvm-project#131759
seeing regressions in : Pytorch UT- 8 test cases failed in "test_ops" test suite
>From 674620670e09d0c9ff044ee76aa5f50311e82212 Mon Sep 17 00:00:00 2001
From: theRonShark <rlieberm at amd.com>
Date: Fri, 2 Jan 2026 12:17:40 -0500
Subject: [PATCH] Revert "AMDGPU: Do not infer implicit inputs for !nocallback
intrinsics"
This reverts commit 849038cad16f18d77b5cd277980c93e8efbf1bbc.
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 27 +------
...attributor-intrinsic-missing-nocallback.ll | 31 --------
...amdgpu-attributor-nocallback-intrinsics.ll | 74 -------------------
3 files changed, 4 insertions(+), 128 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
delete mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 0b2ee6371da06..821a7198e38c8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -38,10 +38,9 @@ enum ImplicitArgumentPositions {
#define AMDGPU_ATTRIBUTE(Name, Str) Name = 1 << Name##_POS,
enum ImplicitArgumentMask {
- UNKNOWN_INTRINSIC = 0,
+ NOT_IMPLICIT_INPUT = 0,
#include "AMDGPUAttributes.def"
- ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1,
- NOT_IMPLICIT_INPUT
+ ALL_ARGUMENT_MASK = (1 << LAST_ARG_POS) - 1
};
#define AMDGPU_ATTRIBUTE(Name, Str) {Name, Str},
@@ -116,7 +115,7 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
NeedsImplicit = (CodeObjectVersion >= AMDGPU::AMDHSA_COV5);
return QUEUE_PTR;
default:
- return UNKNOWN_INTRINSIC;
+ return NOT_IMPLICIT_INPUT;
}
}
@@ -526,21 +525,6 @@ struct AAAMDAttributesFunction : public AAAMDAttributes {
ImplicitArgumentMask AttrMask =
intrinsicToAttrMask(IID, NonKernelOnly, NeedsImplicit,
HasApertureRegs, SupportsGetDoorbellID, COV);
-
- if (AttrMask == UNKNOWN_INTRINSIC) {
- // Assume not-nocallback intrinsics may invoke a function which accesses
- // implicit arguments.
- //
- // FIXME: This isn't really the correct check. We want to ensure it
- // isn't calling any function that may use implicit arguments regardless
- // of whether it's internal to the module or not.
- //
- // TODO: Ignoring callsite attributes.
- if (!Callee->hasFnAttribute(Attribute::NoCallback))
- return indicatePessimisticFixpoint();
- continue;
- }
-
if (AttrMask != NOT_IMPLICIT_INPUT) {
if ((IsNonEntryFunc || !NonKernelOnly))
removeAssumedBits(AttrMask);
@@ -1364,10 +1348,7 @@ struct AAAMDGPUMinAGPRAlloc
default:
// Some intrinsics may use AGPRs, but if we have a choice, we are not
// required to use AGPRs.
-
- // Assume !nocallback intrinsics may call a function which requires
- // AGPRs.
- return CB.hasFnAttr(Attribute::NoCallback);
+ return true;
}
// TODO: Handle callsite attributes
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
deleted file mode 100644
index d7d623ac89146..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-intrinsic-missing-nocallback.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
-
-; Make sure we do not infer anything about implicit inputs through an
-; intrinsic call which is not nocallback.
-
-declare zeroext i32 @return_i32()
-
-define i32 @test_i32_return() gc "statepoint-example" {
-; CHECK-LABEL: define i32 @test_i32_return(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] gc "statepoint-example" {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[SAFEPOINT_TOKEN:%.*]] = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
-; CHECK-NEXT: [[CALL1:%.*]] = call zeroext i32 @llvm.experimental.gc.result.i32(token [[SAFEPOINT_TOKEN]])
-; CHECK-NEXT: ret i32 [[CALL1]]
-;
-entry:
- %safepoint_token = tail call token (i64, i32, ptr, i32, i32, ...) @llvm.experimental.gc.statepoint.p0(i64 0, i32 0, ptr elementtype(i32 ()) @return_i32, i32 0, i32 0, i32 0, i32 0)
- %call1 = call zeroext i32 @llvm.experimental.gc.result.i32(token %safepoint_token)
- ret i32 %call1
-}
-
-declare token @llvm.experimental.gc.statepoint.p0(i64 immarg, i32 immarg, ptr, i32 immarg, i32 immarg, ...)
-declare i32 @llvm.experimental.gc.result.i32(token) #0
-
-attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
-;.
-; CHECK: attributes #[[ATTR0]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1:[0-9]+]] = { "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
deleted file mode 100644
index 71c509afa8e64..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-attributor -mcpu=gfx90a %s | FileCheck %s
-
-; Make sure we infer no inputs are used through some intrinsics
-
-define void @use_fake_use(i32 %arg) {
-; CHECK-LABEL: define void @use_fake_use(
-; CHECK-SAME: i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: call void (...) @llvm.fake.use(i32 [[ARG]])
-; CHECK-NEXT: ret void
-;
- call void (...) @llvm.fake.use(i32 %arg)
- ret void
-}
-
-define void @use_donothing() {
-; CHECK-LABEL: define void @use_donothing(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: ret void
-;
- call void @llvm.donothing()
- ret void
-}
-
-define void @use_assume(i1 %arg) {
-; CHECK-LABEL: define void @use_assume(
-; CHECK-SAME: i1 [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: call void @llvm.assume(i1 [[ARG]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.assume(i1 %arg)
- ret void
-}
-
-define void @use_trap() {
-; CHECK-LABEL: define void @use_trap(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: call void @llvm.trap()
-; CHECK-NEXT: ret void
-;
- call void @llvm.trap()
- ret void
-}
-
-define void @use_debugtrap() {
-; CHECK-LABEL: define void @use_debugtrap(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @llvm.debugtrap()
-; CHECK-NEXT: ret void
-;
- call void @llvm.debugtrap()
- ret void
-}
-
-define void @use_ubsantrap() {
-; CHECK-LABEL: define void @use_ubsantrap(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @llvm.ubsantrap(i8 0)
-; CHECK-NEXT: ret void
-;
- call void @llvm.ubsantrap(i8 0)
- ret void
-}
-
-;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
-;.
More information about the llvm-commits
mailing list