[llvm] [AMDGPU] Add buffer.fat.ptr.load.lds intrinsic wrapping raw rsrc version (PR #133015)

Krzysztof Drewniak via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 1 16:46:17 PDT 2025


https://github.com/krzysz00 updated https://github.com/llvm/llvm-project/pull/133015

>From 2fd82272459f63fb3e367d18b9a385a30e1609ae Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Tue, 25 Mar 2025 22:58:17 +0000
Subject: [PATCH 1/2] [AMDGPU] Add buffer.fat.ptr.load.lds intrinsic wrapping
 raw rsrc version

Add a buffer_fat_ptr_load_lds intrinsic, by analogy with
global_loab_lds, which enables using `ptr addrspace(7)` to set the
rsrc and offset arguments to raw_ptr_buffer_load_lds.
---
 llvm/include/llvm/IR/IntrinsicsAMDGPU.td      | 21 +++++++++++++++++++
 .../AMDGPU/AMDGPULowerBufferFatPointers.cpp   | 21 +++++++++++++++++++
 .../lower-buffer-fat-pointers-mem-transfer.ll | 18 ++++++++++++++++
 3 files changed, 60 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index f53016f62abbe..15094ca37e550 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1939,6 +1939,27 @@ def int_amdgcn_s_buffer_prefetch_data : DefaultAttrsIntrinsic <
 
 } // defset AMDGPUBufferIntrinsics
 
+// A wrapper around raw_ptr_buffer_load_lds that takes the global offset
+// from the addrspace(7) pointer argument.
+def int_amdgcn_buffer_fat_ptr_load_lds : Intrinsic <
+  [],
+  [LLVMQualPointerType<7>,    // buffer fat pointer (SGPRx4 + VGPR)
+   LLVMQualPointerType<3>,    // LDS base offset
+   llvm_i32_ty,               // Data byte size: 1/2/4 (/12/16 for gfx950)
+   llvm_i32_ty,               // imm offset(imm, included in bounds checking and swizzling)
+   llvm_i32_ty],              // auxiliary/cachepolicy(imm):
+                              //                bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
+                              //                bit 3 = swz, bit 4 = scc (gfx90a)
+                              //        gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+                              //        gfx12+: bits [0-2] = th, bits [3-4] = scope,
+                              //                bit 6 = swz
+                              //           all: volatile op (bit 31, stripped at lowering)
+  [IntrWillReturn, IntrArgMemOnly,
+   ReadOnly<ArgIndex<0>>, NoCapture<ArgIndex<0>>,
+   WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
+   ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
+   ImmArg<ArgIndex<4>>, IntrNoCallback, IntrNoFree], "", [SDNPMemOperand]>;
+
 // Uses that do not set the done bit should set IntrWriteMem on the
 // call site.
 def int_amdgcn_exp : DefaultAttrsIntrinsic <[], [
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 5dd1fe14e5626..0ed4bab4d34da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -2157,6 +2157,7 @@ static bool isRemovablePointerIntrinsic(Intrinsic::ID IID) {
   case Intrinsic::memset:
   case Intrinsic::memset_inline:
   case Intrinsic::experimental_memset_pattern:
+  case Intrinsic::amdgcn_buffer_fat_ptr_load_lds:
     return true;
   }
 }
@@ -2245,6 +2246,26 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
     SplitUsers.insert(&I);
     return {NewRsrc, Off};
   }
+  case Intrinsic::amdgcn_buffer_fat_ptr_load_lds: {
+    Value *BufferPtr = I.getArgOperand(0);
+    assert(isSplitFatPtr(BufferPtr->getType()) &&
+           "amdgcn.buffer.fat.pointer.load.lds has a buffer fat pointer as "
+           "argument 0");
+    IRB.SetInsertPoint(&I);
+    auto [Rsrc, Off] = getPtrParts(BufferPtr);
+    Value *LDSPtr = I.getArgOperand(1);
+    Value *LoadSize = I.getArgOperand(2);
+    Value *ImmOff = I.getArgOperand(3);
+    Value *Aux = I.getArgOperand(4);
+    Value *SOffset = IRB.getInt32(0);
+    Instruction *NewLoad = IRB.CreateIntrinsic(
+        Intrinsic::amdgcn_raw_ptr_buffer_load_lds, {},
+        {Rsrc, LDSPtr, LoadSize, Off, SOffset, ImmOff, Aux});
+    copyMetadata(NewLoad, &I);
+    SplitUsers.insert(&I);
+    I.replaceAllUsesWith(NewLoad);
+    return {nullptr, nullptr};
+  }
   }
   return {nullptr, nullptr};
 }
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll
index e6c2d1907068f..1d019d737bde5 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll
@@ -1728,3 +1728,21 @@ define void @memset_pattern_unknown(ptr addrspace(7) inreg %ptr, i32 inreg %leng
   call void @llvm.experimental.memset.pattern.p7.i32.i32(ptr addrspace(7) %ptr, i32 1, i32 %length, i1 false)
   ret void
 }
+
+;;; Buffer load to LDS
+
+declare void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7), ptr addrspace(3), i32 immarg, i32 immarg, i32 immarg)
+
+define void @llvm_amdgcn_buffer_fat_ptr_load_lds(ptr addrspace(7) inreg %p, ptr addrspace(3) inreg %l, i32 %idx) {
+; CHECK-LABEL: define void @llvm_amdgcn_buffer_fat_ptr_load_lds(
+; CHECK-SAME: { ptr addrspace(8), i32 } inreg [[P:%.*]], ptr addrspace(3) inreg [[L:%.*]], i32 [[IDX:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0
+; CHECK-NEXT:    [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1
+; CHECK-NEXT:    [[Q:%.*]] = add i32 [[P_OFF]], [[IDX]]
+; CHECK-NEXT:    call void @llvm.amdgcn.raw.ptr.buffer.load.lds(ptr addrspace(8) [[P_RSRC]], ptr addrspace(3) [[L]], i32 4, i32 [[Q]], i32 0, i32 16, i32 0)
+; CHECK-NEXT:    ret void
+;
+  %q = getelementptr i8, ptr addrspace(7) %p, i32 %idx
+  call void @llvm.amdgcn.buffer.fat.ptr.load.lds(ptr addrspace(7) %q, ptr addrspace(3) %l, i32 4, i32 16, i32 0)
+  ret void
+}

>From 2bce633dd6497f610e8df71249b464f3c7589fe3 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Tue, 1 Apr 2025 23:46:00 +0000
Subject: [PATCH 2/2] Clean up assert error

---
 llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 0ed4bab4d34da..5f684afe55b59 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -2249,8 +2249,8 @@ PtrParts SplitPtrStructs::visitIntrinsicInst(IntrinsicInst &I) {
   case Intrinsic::amdgcn_buffer_fat_ptr_load_lds: {
     Value *BufferPtr = I.getArgOperand(0);
     assert(isSplitFatPtr(BufferPtr->getType()) &&
-           "amdgcn.buffer.fat.pointer.load.lds has a buffer fat pointer as "
-           "argument 0");
+           "amdgcn.buffer.fat.pointer.load.lds must have a buffer fat pointer "
+           "as argument 0");
     IRB.SetInsertPoint(&I);
     auto [Rsrc, Off] = getPtrParts(BufferPtr);
     Value *LDSPtr = I.getArgOperand(1);



More information about the llvm-commits mailing list