[llvm] [AMDGPU] Add alignment attr & propagate alignment through make.buffer.rsrc inst (PR #145278)

Mon Jun 23 22:04:47 PDT 2025

https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/145278

>From 888df5412b37bd3f232bdb38c9f89786d042fe75 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 23 Jun 2025 14:12:15 +0800
Subject: [PATCH 1/2] Add alignment attr & propagate alignment through
 make.buffer.rsrc inst

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 22 ++++++++++
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp   | 37 ++++++++++++++++-
 .../Transforms/IPO/AttributorAttributes.cpp   | 14 ++++++-
 llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll | 40 +++++++++++++++++++
 4 files changed, 110 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index e6eb756df987d..64285c2114976 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1355,6 +1355,12 @@ struct InformationCache {
   /// Return the flat address space if the associated target has.
   LLVM_ABI std::optional<unsigned> getFlatAddressSpace() const;
 
+  virtual bool shouldTrackUse(const AbstractAttribute *QueryingAA,
+                              Value &AssociatedValue, const Use *U,
+                              const Instruction *I) const {
+    return false;
+  }
+
 private:
   struct FunctionInfo {
     LLVM_ABI ~FunctionInfo();
@@ -2042,6 +2048,19 @@ struct Attributor {
     SimplificationCallbacks[IRP].emplace_back(CB);
   }
 
+  using AlignmentCallbackTy =
+      std::function<void(const IRPosition &, const AbstractAttribute *,
+                         SmallVectorImpl<AA::ValueAndContext> &)>;
+  void registerAlignmentCallback(const IRPosition &IRP,
+                                 const AlignmentCallbackTy &CB) {
+    AlignmentCallBacks[IRP].emplace_back(CB);
+  }
+
+  SmallVector<AlignmentCallbackTy, 1>
+  getAlignmentCallback(const IRPosition &IRP) {
+    return AlignmentCallBacks.lookup(IRP);
+  }
+
   /// Return true if there is a simplification callback for \p IRP.
   bool hasSimplificationCallback(const IRPosition &IRP) {
     return SimplificationCallbacks.count(IRP);
@@ -2093,6 +2112,9 @@ struct Attributor {
   DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
       SimplificationCallbacks;
 
+  /// The vector with AAAlign callbacks registered by outside AAs.
+  DenseMap<IRPosition, SmallVector<AlignmentCallbackTy, 1>> AlignmentCallBacks;
+
   /// The vector with all simplification callbacks for global variables
   /// registered by outside AAs.
   DenseMap<const GlobalVariable *,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index f4d3a014f9921..0731dcfbcd05c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -138,6 +138,18 @@ static bool funcRequiresHostcallPtr(const Function &F) {
          F.hasFnAttribute(Attribute::SanitizeMemTag);
 }
 
+static bool isAlignAndMakeBuffer(const AbstractAttribute *AA,
+                                 const Instruction *I) {
+  if (isa<AAAlign>(AA)) {
+    if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+      if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc)
+        return true;
+    }
+  }
+
+  return false;
+}
+
 namespace {
 class AMDGPUInformationCache : public InformationCache {
 public:
@@ -235,6 +247,12 @@ class AMDGPUInformationCache : public InformationCache {
     return ST.getMaxWavesPerEU();
   }
 
+  bool shouldTrackUse(const AbstractAttribute *QueryingAA,
+                      Value &AssociatedValue, const Use *U,
+                      const Instruction *I) const override {
+    return isAlignAndMakeBuffer(QueryingAA, I);
+  }
+
 private:
   /// Check if the ConstantExpr \p CE uses an addrspacecast from private or
   /// local to flat. These casts may require the queue pointer.
@@ -1381,7 +1399,7 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
        &AAAMDMaxNumWorkgroups::ID, &AAAMDWavesPerEU::ID, &AAAMDGPUNoAGPR::ID,
        &AACallEdges::ID, &AAPointerInfo::ID, &AAPotentialConstantValues::ID,
        &AAUnderlyingObjects::ID, &AAAddressSpace::ID, &AAIndirectCallInfo::ID,
-       &AAInstanceInfo::ID});
+       &AAInstanceInfo::ID, &AAAlign::ID});
 
   AttributorConfig AC(CGUpdater);
   AC.IsClosedWorldModule = Options.IsClosedWorld;
@@ -1432,6 +1450,23 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
       } else if (auto *CmpX = dyn_cast<AtomicCmpXchgInst>(&I)) {
         A.getOrCreateAAFor<AAAddressSpace>(
             IRPosition::value(*CmpX->getPointerOperand()));
+      } else if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+        if (II->getIntrinsicID() == Intrinsic::amdgcn_make_buffer_rsrc) {
+          IRPosition IRP = IRPosition::inst(*II);
+
+          Attributor::AlignmentCallbackTy ACB =
+              [](const IRPosition &IRP, const AbstractAttribute *AA,
+                 SmallVectorImpl<AA::ValueAndContext> &Values) {
+                if (auto *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
+                  if (isAlignAndMakeBuffer(AA, I)) {
+                    Values.push_back(
+                        AA::ValueAndContext{*I->getOperand(0), nullptr});
+                  }
+              };
+          A.registerAlignmentCallback(IRP, ACB);
+
+          A.getOrCreateAAFor<AAAlign>(IRP);
+        }
       }
     }
   }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3799a696f67af..cca03b30e75c7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5202,6 +5202,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
       TrackUse = true;
     return 0;
   }
+  if (A.getInfoCache().shouldTrackUse(&QueryingAA, AssociatedValue, U, I)) {
+    TrackUse = true;
+    return 0;
+  }
 
   MaybeAlign MA;
   if (const auto *CB = dyn_cast<CallBase>(I)) {
@@ -5369,8 +5373,14 @@ struct AAAlignFloating : AAAlignImpl {
     bool Stripped;
     bool UsedAssumedInformation = false;
     SmallVector<AA::ValueAndContext> Values;
-    if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
-                                      AA::AnyScope, UsedAssumedInformation)) {
+    const auto &AligmentCBs = A.getAlignmentCallback(getIRPosition());
+    if (!AligmentCBs.empty()) {
+      for (const auto &CB : AligmentCBs) {
+        CB(getIRPosition(), this, Values);
+      }
+    } else if (!A.getAssumedSimplifiedValues(getIRPosition(), *this, Values,
+                                             AA::AnyScope,
+                                             UsedAssumedInformation)) {
       Values.push_back({getAssociatedValue(), getCtxI()});
       Stripped = false;
     } else {
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
new file mode 100644
index 0000000000000..85f77735bf2b6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-align.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt  -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o - | FileCheck %s
+
+define float @align_back_prop(ptr addrspace(1) align 4 %x) {
+; CHECK-LABEL: define float @align_back_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT:    [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+  %y = load float, ptr addrspace(7) %fat.ptr, align 8
+  ret float %y
+}
+
+define float @align_foward_prop(ptr addrspace(1) align 8 %x) {
+; CHECK-LABEL: define float @align_foward_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT:    [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT:    ret float [[Y]]
+;
+  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+  %y = load float, ptr addrspace(7) %fat.ptr, align 4
+  ret float %y
+}
+
+define float @align_mix_prop(ptr addrspace(1) align 4 %x) {
+; CHECK-LABEL: define float @align_mix_prop(
+; CHECK-SAME: ptr addrspace(1) align 8 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[FAT_PTR:%.*]] = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) [[X]], i16 0, i32 256, i32 0)
+; CHECK-NEXT:    [[Y:%.*]] = load float, ptr addrspace(7) [[FAT_PTR]], align 8
+; CHECK-NEXT:    [[Z:%.*]] = load float, ptr addrspace(1) [[X]], align 8
+; CHECK-NEXT:    ret float [[Z]]
+;
+  %fat.ptr = call ptr addrspace(7) @llvm.amdgcn.make.buffer.rsrc.p7.p1(ptr addrspace(1) %x, i16 0, i32 256, i32 0)
+  %y = load float, ptr addrspace(7) %fat.ptr, align 2
+  %z = load float, ptr addrspace(1) %x, align 8
+  ret float %z
+}

>From 598533bd714f22ee0811ba241d19b68163c27ef2 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 24 Jun 2025 13:04:31 +0800
Subject: [PATCH 2/2] fix shilei's comment

---
 llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 3dc733dee8dae..6d440f34c9721 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1457,11 +1457,14 @@ static bool runImpl(Module &M, AnalysisGetter &AG, TargetMachine &TM,
           Attributor::AlignmentCallbackTy ACB =
               [](const IRPosition &IRP, const AbstractAttribute *AA,
                  SmallVectorImpl<AA::ValueAndContext> &Values) {
-                if (auto *I = dyn_cast<Instruction>(&IRP.getAssociatedValue()))
-                  if (isAlignAndMakeBuffer(AA, I)) {
+                Instruction *I = IRP.getCtxI();
+                if (!I)
+                  return;
+                if (auto *II = dyn_cast<IntrinsicInst>(I))
+                  if (II->getIntrinsicID() ==
+                      Intrinsic::amdgcn_make_buffer_rsrc)
                     Values.push_back(
                         AA::ValueAndContext{*I->getOperand(0), nullptr});
-                  }
               };
           A.registerAlignmentCallback(IRP, ACB);