[llvm] 1f6e13a - Revert "[AMDGPU][Attributor] Infer inreg attribute in `AMDGPUAttributor` (#146720)"
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 18 20:00:18 PDT 2025
Author: Shilei Tian
Date: 2025-08-18T22:59:52-04:00
New Revision: 1f6e13a161abe25e00e4410c550e5163abee2290
URL: https://github.com/llvm/llvm-project/commit/1f6e13a161abe25e00e4410c550e5163abee2290
DIFF: https://github.com/llvm/llvm-project/commit/1f6e13a161abe25e00e4410c550e5163abee2290.diff
LOG: Revert "[AMDGPU][Attributor] Infer inreg attribute in `AMDGPUAttributor` (#146720)"
This reverts commit 84ab301554f8b8b16b94263a57b091b07e9204f2 because it breaks
several AMDGPU test bots.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
Removed:
llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index cad9a14661bf4..59cc1df292f46 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -13,7 +13,6 @@
#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "Utils/AMDGPUBaseInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/Target/TargetMachine.h"
@@ -1297,116 +1296,6 @@ struct AAAMDGPUNoAGPR
const char AAAMDGPUNoAGPR::ID = 0;
-struct AAAMDGPUUniform : public StateWrapper<BooleanState, AbstractAttribute> {
- using Base = StateWrapper<BooleanState, AbstractAttribute>;
- AAAMDGPUUniform(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
-
- /// Create an abstract attribute view for the position \p IRP.
- static AAAMDGPUUniform &createForPosition(const IRPosition &IRP,
- Attributor &A);
-
- /// See AbstractAttribute::getName()
- StringRef getName() const override { return "AAAMDGPUUniform"; }
-
- const std::string getAsStr(Attributor *A) const override {
- return getAssumed() ? "uniform" : "divergent";
- }
-
- void trackStatistics() const override {}
-
- /// See AbstractAttribute::getIdAddr()
- const char *getIdAddr() const override { return &ID; }
-
- /// This function should return true if the type of the \p AA is
- /// AAAMDGPUUniform
- static bool classof(const AbstractAttribute *AA) {
- return (AA->getIdAddr() == &ID);
- }
-
- /// Unique ID (due to the unique address)
- static const char ID;
-};
-
-const char AAAMDGPUUniform::ID = 0;
-
-/// This AA is to infer the inreg attribute for a function argument.
-struct AAAMDGPUUniformArgument : public AAAMDGPUUniform {
- AAAMDGPUUniformArgument(const IRPosition &IRP, Attributor &A)
- : AAAMDGPUUniform(IRP, A) {}
-
- void initialize(Attributor &A) override {
- Argument *Arg = getAssociatedArgument();
- CallingConv::ID CC = Arg->getParent()->getCallingConv();
- if (Arg->hasAttribute(Attribute::InReg)) {
- indicateOptimisticFixpoint();
- return;
- }
-
- if (AMDGPU::isEntryFunctionCC(CC)) {
- // We only use isArgPassedInSGPR on kernel entry function argument, so
- // even if we will use SPGR for non-uniform i1 argument passing, it will
- // not affect this.
- if (AMDGPU::isArgPassedInSGPR(Arg))
- indicateOptimisticFixpoint();
- else
- indicatePessimisticFixpoint();
- }
- }
-
- ChangeStatus updateImpl(Attributor &A) override {
- unsigned ArgNo = getAssociatedArgument()->getArgNo();
-
- auto isUniform = [&](AbstractCallSite ACS) -> bool {
- CallBase *CB = ACS.getInstruction();
- Value *V = CB->getArgOperand(ArgNo);
- if (isa<Constant>(V))
- return true;
- if (auto *Arg = dyn_cast<Argument>(V)) {
- auto *AA = A.getOrCreateAAFor<AAAMDGPUUniform>(
- IRPosition::argument(*Arg), this, DepClassTy::REQUIRED);
- return AA && AA->isValidState();
- }
- const TargetTransformInfo *TTI =
- A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(
- *CB->getFunction());
- return TTI->isAlwaysUniform(V);
- };
-
- bool UsedAssumedInformation = true;
- if (!A.checkForAllCallSites(isUniform, *this, /*RequireAllCallSites=*/true,
- UsedAssumedInformation))
- return indicatePessimisticFixpoint();
-
- if (!UsedAssumedInformation)
- return indicateOptimisticFixpoint();
-
- return ChangeStatus::UNCHANGED;
- }
-
- ChangeStatus manifest(Attributor &A) override {
- Argument *Arg = getAssociatedArgument();
- // If the argument already has inreg attribute, we will not do anything
- // about it.
- if (Arg->hasAttribute(Attribute::InReg))
- return ChangeStatus::UNCHANGED;
- if (AMDGPU::isEntryFunctionCC(Arg->getParent()->getCallingConv()))
- return ChangeStatus::UNCHANGED;
- LLVMContext &Ctx = Arg->getContext();
- return A.manifestAttrs(getIRPosition(),
- {Attribute::get(Ctx, Attribute::InReg)});
- }
-};
-
-AAAMDGPUUniform &AAAMDGPUUniform::createForPosition(const IRPosition &IRP,
- Attributor &A) {
- switch (IRP.getPositionKind()) {
- case IRPosition::IRP_ARGUMENT:
- return *new (A.Allocator) AAAMDGPUUniformArgument(IRP, A);
- default:
- llvm_unreachable("not a valid position for AAAMDGPUUniform");
- }
-}
-
/// Performs the final check and updates the 'amdgpu-waves-per-eu' attribute
/// based on the finalized 'amdgpu-flat-work-group-size' attribute.
/// Both attributes start with narrow ranges that expand during iteration.
@@ -1493,7 +1382,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AANoAliasAddrSpace::ID, &AAAddressSpace::ID,
- &AAIndirectCallInfo::ID, &AAAMDGPUUniform::ID});
+ &AAIndirectCallInfo::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1546,11 +1435,6 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
A.getOrCreateAAFor<AAAddressSpace>(IRPosition::value(*Ptr));
A.getOrCreateAAFor<AANoAliasAddrSpace>(IRPosition::value(*Ptr));
}
-
- if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
- for (auto &Arg : F->args())
- A.getOrCreateAAFor<AAAMDGPUUniform>(IRPosition::argument(Arg));
- }
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll b/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll
deleted file mode 100644
index 22cfd4827e5da..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/aa-inreg-inference.ll
+++ /dev/null
@@ -1,85 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
-
- at g1 = protected addrspace(1) externally_initialized global i32 0, align 4
- at g2 = protected addrspace(1) externally_initialized global i32 0, align 4
- at g3 = protected addrspace(1) externally_initialized global i32 0, align 4
-
-define internal void @callee_with_always_uniform_argument(ptr addrspace(1) %x, i32 %y) {
-; CHECK-LABEL: define internal void @callee_with_always_uniform_argument(
-; CHECK-SAME: ptr addrspace(1) inreg [[X:%.*]], i32 inreg [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
-; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g2, align 4
-; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g3, align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %x.val = load i32, ptr addrspace(1) %x, align 4
- store i32 %x.val, ptr addrspace(1) @g2, align 4
- store i32 %y, ptr addrspace(1) @g3, align 4
- ret void
-}
-
-define amdgpu_kernel void @kernel_with_readfirstlane(ptr addrspace(1) %p, i32 %x) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_with_readfirstlane(
-; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[P0:%.*]] = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) [[P]])
-; CHECK-NEXT: call void @callee_with_always_uniform_argument(ptr addrspace(1) [[P0]], i32 [[X]])
-; CHECK-NEXT: ret void
-;
-entry:
- %p0 = call ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1) %p)
- call void @callee_with_always_uniform_argument(ptr addrspace(1) %p0, i32 %x)
- ret void
-}
-
-define amdgpu_kernel void @kernel_with_constant(i32 %x) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_with_constant(
-; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: call void @callee_with_always_uniform_argument(ptr addrspace(1) @g1, i32 [[X]])
-; CHECK-NEXT: ret void
-;
-entry:
- call void @callee_with_always_uniform_argument(ptr addrspace(1) @g1, i32 %x)
- ret void
-}
-
-define internal void @callee_without_always_uniform_argument(ptr addrspace(1) %x, i32 %y) {
-; CHECK-LABEL: define internal void @callee_without_always_uniform_argument(
-; CHECK-SAME: ptr addrspace(1) [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[X_VAL:%.*]] = load i32, ptr addrspace(1) [[X]], align 4
-; CHECK-NEXT: store i32 [[X_VAL]], ptr addrspace(1) @g2, align 4
-; CHECK-NEXT: store i32 [[Y]], ptr addrspace(1) @g3, align 4
-; CHECK-NEXT: ret void
-;
-entry:
- %x.val = load i32, ptr addrspace(1) %x, align 4
- store i32 %x.val, ptr addrspace(1) @g2, align 4
- store i32 %y, ptr addrspace(1) @g3, align 4
- ret void
-}
-
-define amdgpu_kernel void @kernel_with_divergent_callsite_argument(ptr addrspace(1) %p, i32 %x) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_with_divergent_callsite_argument(
-; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[ENTRY:.*:]]
-; CHECK-NEXT: [[ID_X:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr addrspace(1) [[P]], i32 [[ID_X]]
-; CHECK-NEXT: [[D:%.*]] = load i32, ptr addrspace(1) [[GEP]], align 4
-; CHECK-NEXT: call void @callee_without_always_uniform_argument(ptr addrspace(1) [[GEP]], i32 [[D]])
-; CHECK-NEXT: ret void
-;
-entry:
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- %gep = getelementptr i32, ptr addrspace(1) %p, i32 %id.x
- %d = load i32, ptr addrspace(1) %gep
- call void @callee_without_always_uniform_argument(ptr addrspace(1) %gep, i32 %d)
- ret void
-}
-
-declare ptr addrspace(1) @llvm.amdgcn.readfirstlane.p1(ptr addrspace(1))
-declare noundef i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
index d4e213fecddf8..d91b2117c7ad9 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-noalias-addrspace.ll
@@ -480,7 +480,7 @@ bb.2.end:
define internal void @callee_no_alias_addr_space_select(ptr %ptr1, ptr %ptr2, ptr %ptr3, i1 %cond1, i1 %cond2, i32 %val) #0 {
; CHECK-LABEL: define internal void @callee_no_alias_addr_space_select(
-; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 inreg [[COND1:%.*]], i1 inreg [[COND2:%.*]], i32 inreg [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[PTR4:%.*]] = select i1 [[COND1]], ptr addrspacecast (ptr addrspace(1) @gptr to ptr), ptr addrspacecast (ptr addrspace(4) @gptr2 to ptr)
; CHECK-NEXT: [[PTR5:%.*]] = select i1 [[COND2]], ptr [[PTR4]], ptr addrspacecast (ptr addrspace(3) @gptr3 to ptr)
; CHECK-NEXT: store i32 [[VAL]], ptr [[PTR5]], align 4, !noalias.addrspace [[META1:![0-9]+]]
@@ -516,7 +516,7 @@ define internal void @callee_no_alias_addr_space_select(ptr %ptr1, ptr %ptr2, pt
define internal void @callee_alias_addr_space_branch(ptr %ptr1, ptr %ptr2, ptr %ptr3, i1 %cond1, i1 %cond2, i32 %val) #0 {
; CHECK-LABEL: define internal void @callee_alias_addr_space_branch(
-; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 inreg [[COND1:%.*]], i1 inreg [[COND2:%.*]], i32 inreg [[VAL:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[PTR1:%.*]], ptr [[PTR2:%.*]], ptr [[PTR3:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]], i32 [[VAL:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: br i1 [[COND1]], label %[[BB_1_TRUE:.*]], label %[[BB_1_FALSE:.*]]
; CHECK: [[BB_1_TRUE]]:
; CHECK-NEXT: br label %[[BB_1_END:.*]]
More information about the llvm-commits
mailing list