[llvm-branch-commits] [llvm] AMDGPU: Fix buffer load/store of pointers (PR #95379)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Thu Jun 13 02:17:28 PDT 2024


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/95379

Make sure we test all the address spaces since this support isn't
free in gisel.

>From b05179ed684e289ce31f7aee8b57939c7bf2809c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Wed, 12 Jun 2024 10:10:20 +0200
Subject: [PATCH] AMDGPU: Fix buffer load/store of pointers

Make sure we test all the address spaces since this support isn't
free in gisel.
---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  31 +-
 .../AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll | 596 ++++++++++++++++++
 .../llvm.amdgcn.raw.ptr.buffer.store.ll       | 144 +++++
 3 files changed, 759 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 81098201e9c0f..7a36c88b892c8 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1112,29 +1112,33 @@ unsigned SITargetLowering::getVectorTypeBreakdownForCallingConv(
     Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
 }
 
-static EVT memVTFromLoadIntrData(Type *Ty, unsigned MaxNumLanes) {
+static EVT memVTFromLoadIntrData(const SITargetLowering &TLI,
+                                 const DataLayout &DL, Type *Ty,
+                                 unsigned MaxNumLanes) {
   assert(MaxNumLanes != 0);
 
+  LLVMContext &Ctx = Ty->getContext();
   if (auto *VT = dyn_cast<FixedVectorType>(Ty)) {
     unsigned NumElts = std::min(MaxNumLanes, VT->getNumElements());
-    return EVT::getVectorVT(Ty->getContext(),
-                            EVT::getEVT(VT->getElementType()),
+    return EVT::getVectorVT(Ctx, TLI.getValueType(DL, VT->getElementType()),
                             NumElts);
   }
 
-  return EVT::getEVT(Ty);
+  return TLI.getValueType(DL, Ty);
 }
 
 // Peek through TFE struct returns to only use the data size.
-static EVT memVTFromLoadIntrReturn(Type *Ty, unsigned MaxNumLanes) {
+static EVT memVTFromLoadIntrReturn(const SITargetLowering &TLI,
+                                   const DataLayout &DL, Type *Ty,
+                                   unsigned MaxNumLanes) {
   auto *ST = dyn_cast<StructType>(Ty);
   if (!ST)
-    return memVTFromLoadIntrData(Ty, MaxNumLanes);
+    return memVTFromLoadIntrData(TLI, DL, Ty, MaxNumLanes);
 
   // TFE intrinsics return an aggregate type.
   assert(ST->getNumContainedTypes() == 2 &&
          ST->getContainedType(1)->isIntegerTy(32));
-  return memVTFromLoadIntrData(ST->getContainedType(0), MaxNumLanes);
+  return memVTFromLoadIntrData(TLI, DL, ST->getContainedType(0), MaxNumLanes);
 }
 
 /// Map address space 7 to MVT::v5i32 because that's its in-memory
@@ -1219,10 +1223,12 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
           MaxNumLanes = DMask == 0 ? 1 : llvm::popcount(DMask);
         }
 
-        Info.memVT = memVTFromLoadIntrReturn(CI.getType(), MaxNumLanes);
+        Info.memVT = memVTFromLoadIntrReturn(*this, MF.getDataLayout(),
+                                             CI.getType(), MaxNumLanes);
       } else {
-        Info.memVT = memVTFromLoadIntrReturn(
-            CI.getType(), std::numeric_limits<unsigned>::max());
+        Info.memVT =
+            memVTFromLoadIntrReturn(*this, MF.getDataLayout(), CI.getType(),
+                                    std::numeric_limits<unsigned>::max());
       }
 
       // FIXME: What does alignment mean for an image?
@@ -1235,9 +1241,10 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
       if (RsrcIntr->IsImage) {
         unsigned DMask = cast<ConstantInt>(CI.getArgOperand(1))->getZExtValue();
         unsigned DMaskLanes = DMask == 0 ? 1 : llvm::popcount(DMask);
-        Info.memVT = memVTFromLoadIntrData(DataTy, DMaskLanes);
+        Info.memVT = memVTFromLoadIntrData(*this, MF.getDataLayout(), DataTy,
+                                           DMaskLanes);
       } else
-        Info.memVT = EVT::getEVT(DataTy);
+        Info.memVT = getValueType(MF.getDataLayout(), DataTy);
 
       Info.flags |= MachineMemOperand::MOStore;
     } else {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
index 3e3371091ef72..4d557c76dc4d0 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.load.ll
@@ -1280,6 +1280,602 @@ define <2 x i64> @buffer_load_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, i
   ret <2 x i64> %data
 }
 
+define ptr @buffer_load_p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p0__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p0__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p0__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr %data
+}
+
+define <2 x ptr> @buffer_load_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p0__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p0__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p0__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr> @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr> %data
+}
+
+define ptr addrspace(1) @buffer_load_p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p1__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p1__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p1__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(1) %data
+}
+
+define <2 x ptr addrspace(1)> @buffer_load_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p1__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p1__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p1__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(1)> %data
+}
+
+define ptr addrspace(4) @buffer_load_p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p4__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p4__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p4__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(4) %data
+}
+
+define <2 x ptr addrspace(4)> @buffer_load_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p4__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p4__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p4__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(4)> @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(4)> %data
+}
+
+define ptr addrspace(999) @buffer_load_p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p999__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p999__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p999__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(999) @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(999) %data
+}
+
+define <2 x ptr addrspace(999)> @buffer_load_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p999__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p999__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p999__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(999)> @llvm.amdgcn.raw.ptr.buffer.load.p999(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(999)> %data
+}
+
+define ptr addrspace(2) @buffer_load_p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p2__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p2__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p2__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(2) %data
+}
+
+define <2 x ptr addrspace(2)> @buffer_load_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p2__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p2__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p2__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v2p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(2)> %data
+}
+
+define <3 x ptr addrspace(2)> @buffer_load_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; GFX10-LABEL: buffer_load_v3p2__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v3p2__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <3 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v3p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <3 x ptr addrspace(2)> %data
+}
+
+define <4 x ptr addrspace(2)> @buffer_load_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v4p2__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v4p2__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v4p2__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <4 x ptr addrspace(2)> @llvm.amdgcn.raw.ptr.buffer.load.v4p2(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <4 x ptr addrspace(2)> %data
+}
+
+define ptr addrspace(3) @buffer_load_p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p3__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p3__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p3__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(3) %data
+}
+
+define <2 x ptr addrspace(3)> @buffer_load_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p3__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p3__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p3__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v2p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(3)> %data
+}
+
+define <3 x ptr addrspace(3)> @buffer_load_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; GFX10-LABEL: buffer_load_v3p3__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v3p3__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <3 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v3p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <3 x ptr addrspace(3)> %data
+}
+
+define <4 x ptr addrspace(3)> @buffer_load_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v4p3__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v4p3__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v4p3__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <4 x ptr addrspace(3)> @llvm.amdgcn.raw.ptr.buffer.load.v4p3(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <4 x ptr addrspace(3)> %data
+}
+
+define ptr addrspace(5) @buffer_load_p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p5__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p5__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p5__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(5) %data
+}
+
+define <2 x ptr addrspace(5)> @buffer_load_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p5__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p5__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p5__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(5)> %data
+}
+
+define <3 x ptr addrspace(5)> @buffer_load_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; GFX10-LABEL: buffer_load_v3p5__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v3p5__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <3 x ptr addrspace(5)> %data
+}
+
+define <4 x ptr addrspace(5)> @buffer_load_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v4p5__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v4p5__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v4p5__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <4 x ptr addrspace(5)> %data
+}
+
+define ptr addrspace(6) @buffer_load_p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_p6__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_p6__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dword v0, v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_p6__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b32 v0, v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret ptr addrspace(6) %data
+}
+
+define <2 x ptr addrspace(6)> @buffer_load_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v2p6__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v2p6__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx2 v[0:1], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v2p6__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b64 v[0:1], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <2 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v2p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <2 x ptr addrspace(6)> %data
+}
+
+define <3 x ptr addrspace(6)> @buffer_load_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; GFX10-LABEL: buffer_load_v3p6__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx3 v[0:2], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v3p6__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b96 v[0:2], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <3 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v3p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <3 x ptr addrspace(6)> %data
+}
+
+define <4 x ptr addrspace(6)> @buffer_load_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, i32 %voffset) {
+; PREGFX10-LABEL: buffer_load_v4p6__voffset_add:
+; PREGFX10:       ; %bb.0:
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; PREGFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; PREGFX10-NEXT:    s_waitcnt vmcnt(0)
+; PREGFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: buffer_load_v4p6__voffset_add:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    buffer_load_dwordx4 v[0:3], v0, s[4:7], 0 offen offset:60
+; GFX10-NEXT:    s_waitcnt vmcnt(0)
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: buffer_load_v4p6__voffset_add:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT:    buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:60
+; GFX11-NEXT:    s_waitcnt vmcnt(0)
+; GFX11-NEXT:    s_setpc_b64 s[30:31]
+  %voffset.add = add i32 %voffset, 60
+  %data = call <4 x ptr addrspace(6)> @llvm.amdgcn.raw.ptr.buffer.load.v4p6(ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret <4 x ptr addrspace(6)> %data
+}
+
 declare float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8), i32, i32, i32) #0
 declare <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32) #0
 declare <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8), i32, i32, i32) #0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
index 8dd9b4ab61d4f..201d07e76b1ff 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.ll
@@ -424,6 +424,150 @@ define void @buffer_store_v2i64__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x
   ret void
 }
 
+define void @buffer_store_p0__voffset_add(ptr addrspace(8) inreg %rsrc, ptr %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p0__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p0(<2 x ptr> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p1__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(1) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p1__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(1)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p4__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(4) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p4__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(4)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p4(<2 x ptr addrspace(4)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p999__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(999) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p999(ptr addrspace(999) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p999__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(999)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p999(<2 x ptr addrspace(999)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p2__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(2) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p2(ptr addrspace(2) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p2__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(2)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p2(<2 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v3p2__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(2)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p2(<3 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v4p2__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(2)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p2(<4 x ptr addrspace(2)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p3__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(3) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p3(ptr addrspace(3) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p3__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(3)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p3(<2 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v3p3__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(3)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p3(<3 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v4p3__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(3)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p3(<4 x ptr addrspace(3)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p5__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(5) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p5__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(5)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v3p5__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(5)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v4p5__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(5)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_p6__voffset_add(ptr addrspace(8) inreg %rsrc, ptr addrspace(6) %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.p6(ptr addrspace(6) %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v2p6__voffset_add(ptr addrspace(8) inreg %rsrc, <2 x ptr addrspace(6)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v2p6(<2 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v3p6__voffset_add(ptr addrspace(8) inreg %rsrc, <3 x ptr addrspace(6)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v3p6(<3 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
+define void @buffer_store_v4p6__voffset_add(ptr addrspace(8) inreg %rsrc, <4 x ptr addrspace(6)> %data, i32 %voffset) #0 {
+  %voffset.add = add i32 %voffset, 60
+  call void @llvm.amdgcn.raw.ptr.buffer.store.v4p6(<4 x ptr addrspace(6)> %data, ptr addrspace(8) %rsrc, i32 %voffset.add, i32 0, i32 0)
+  ret void
+}
+
 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32) #0
 declare void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float>, ptr addrspace(8), i32, i32, i32) #0
 declare void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float>, ptr addrspace(8), i32, i32, i32) #0



More information about the llvm-branch-commits mailing list