[llvm] [AMDGPU] Add alignment attr & propagate alignment through make.buffer.rsrc inst (PR #145278)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 23 22:04:47 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/145278
>From 888df5412b37bd3f232bdb38c9f89786d042fe75 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 23 Jun 2025 14:12:15 +0800
Subject: [PATCH 1/2] Add alignment attr & propagate alignment through
make.buffer.rsrc inst
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 22 ++++++++++
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 37 ++++++++++++++++-
.../Transforms/IPO/AttributorAttributes.cpp | 14 ++++++-
llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll | 40 +++++++++++++++++++
4 files changed, 110 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index e6eb756df987d..64285c2114976 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1355,6 +1355,12 @@ struct InformationCache {
/// Return the flat address space if the associated target has.
LLVM_ABI std::optional<unsigned> getFlatAddressSpace() const;
+ virtual bool shouldTrackUse(const AbstractAttribute *QueryingAA,
+ Value &AssociatedValue, const Use *U,
+ const Instruction *I) const {
+ return false;
+ }
+
private:
struct FunctionInfo {
LLVM_ABI ~FunctionInfo();
@@ -2042,6 +2048,19 @@ struct Attributor {
SimplificationCallbacks[IRP].emplace_back(CB);
}
+ using AlignmentCallbackTy =
+ std::function<void(const IRPosition &, const AbstractAttribute *,
+ SmallVectorImpl<AA::ValueAndContext> &)>;
+ void registerAlignmentCallback(const IRPosition &IRP,
+ const AlignmentCallbackTy &CB) {
+ AlignmentCallBacks[IRP].emplace_back(CB);
+ }
+
+ SmallVector<AlignmentCallbackTy, 1>
+ getAlignmentCallback(const IRPosition &IRP) {
+ return AlignmentCallBacks.lookup(IRP);
+ }
+
/// Return true if there is a simplification callback for \p IRP.
bool hasSimplificationCallback(const IRPosition &IRP) {
return SimplificationCallbacks.count(IRP);
@@ -2093,6 +2112,9 @@ struct Attributor {
DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
SimplificationCallbacks;
+ /// The vector with AAAlign callbacks registered by outside AAs.
+ DenseMap<IRPosition, SmallVector<AlignmentCallbackTy, 1>> AlignmentCallBacks;
+
/// The vector with all simplification callbacks for global variables
/// registered by outside AAs.
DenseMap<const GlobalVariable *,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index f4d3a014f9921..0731dcfbcd05c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -138,6 +138,18 @@ static bool funcRequiresHostcallPtr(const Function &F) {
F.hasFnAttribute(Attribute::SanitizeMemTag);
}
+static bool isAlignAndMakeBuffer(const AbstractAttribute *AA,
+ const Instruction *I) {
+ if (isa<AAAlign>(AA)) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+ return true;
+ }
+ }
+
+ return false;
+}
+
namespace {
class AMDGPUInformationCache : public InformationCache {
public:
@@ -235,6 +247,12 @@ class AMDGPUInformationCache : public InformationCache {
return ST.getMaxWavesPerEU();
}
+ bool shouldTrackUse(const AbstractAttribute *QueryingAA,
+ Value &AssociatedValue, const Use *U,
+ const Instruction *I) const override {
+ return isAlignAndMakeBuffer(QueryingAA, I);
+ }
+
private:
/// Check if the ConstantExpr \p CE uses an addrspacecast from private or
/// local to flat. These casts may require the queue pointer.
@@ -1381,7 +1399,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
&AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
&AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
&AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
- &AAInstanceInfo::ID});
+ &AAInstanceInfo::ID, &AAAlign::ID});
AttributorConfig AC(CGUpdater);
AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1432,6 +1450,23 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
} else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
A.getOrCreateAAFor<AAAddressSpace>(
IRPosition::value(*CmpX->getPointerOperand()));
+ } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) {
+ IRPosition IRP = IRPosition::inst(*II);
+
+ Attributor::AlignmentCallbackTy ACB =
+ [](const IRPosition &IRP, const AbstractAttribute *AA,
+ SmallVectorImpl<AA::ValueAndContext> &Values) {
+ if (auto *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
+ if (isAlignAndMakeBuffer(AA, I)) {
+ Values.push_back(
+ AA::ValueAndContext{*I->getOperand(0), nullptr});
+ }
+ };
+ A.registerAlignmentCallback(IRP, ACB);
+
+ A.getOrCreateAAFor<AAAlign>(IRP);
+ }
}
}
}
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3799a696f67af..cca03b30e75c7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5202,6 +5202,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
TrackUse = true;
return 0;
}
+ if (A.getInfoCache().shouldTrackUse(&QueryingAA, AssociatedValue, U, I)) {
+ TrackUse = true;
+ return 0;
+ }
MaybeAlign MA;
if (const auto *CB = dyn_cast<CallBase>(I)) {
@@ -5369,8 +5373,14 @@ struct AAAlignFloating : AAAlignImpl {
bool Stripped;
bool UsedAssumedInformation = false;
SmallVector<AA::ValueAndContext> Values;
- if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
- AA::AnyScope, UsedAssumedInformation)) {
+ const auto &AligmentCBs = A.getAlignmentCallback(getIRPosition());
+ if (!AligmentCBs.empty()) {
+ for (const auto &CB : AligmentCBs) {
+ CB(getIRPosition(), this, Values);
+ }
+ } else if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+ AA::AnyScope,
+ UsedAssumedInformation)) {
Values.push_back({getAssociatedValue(), getCtxI()});
Stripped = false;
} else {
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
new file mode 100644
index 0000000000000..85f77735bf2b6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+define float @align_back_prop(ptr addrspace(1) align 4 %x) {
+; CHECK-LABEL: define float @align_back_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT: ret float [[Y]]
+;
+ %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+ %y = load float, ptr addrspace(7) %fat.ptr, align 8
+ ret float %y
+}
+
+define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
+; CHECK-LABEL: define float @align_foward_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT: ret float [[Y]]
+;
+ %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+ %y = load float, ptr addrspace(7) %fat.ptr, align 4
+ ret float %y
+}
+
+define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
+; CHECK-LABEL: define float @align_mix_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT: [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT: [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
+; CHECK-NEXT: ret float [[Z]]
+;
+ %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+ %y = load float, ptr addrspace(7) %fat.ptr, align 2
+ %z = load float, ptr addrspace(1) %x, align 8
+ ret float %z
+}
>From 598533bd714f22ee0811ba241d19b68163c27ef2 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 24 Jun 2025 13:04:31 +0800
Subject: [PATCH 2/2] fix shilei's comment
---
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 3dc733dee8dae..6d440f34c9721 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1457,11 +1457,14 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
Attributor::AlignmentCallbackTy ACB =
[](const IRPosition &IRP, const AbstractAttribute *AA,
SmallVectorImpl<AA::ValueAndContext> &Values) {
- if (auto *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
- if (isAlignAndMakeBuffer(AA, I)) {
+ Instruction *I = IRP.getCtxI();
+ if (!I)
+ return;
+ if (auto *II = dyn_cast<IntrinsicInst>(I))
+ if (II->getIntrinsicID() ==
+ Intrinsic::amdgcn_make_buffer_rsrc)
Values.push_back(
AA::ValueAndContext{*I->getOperand(0), nullptr});
- }
};
A.registerAlignmentCallback(IRP, ACB);
More information about the llvm-commits
mailing list