[llvm] [LowerBufferFatPointers] Fix support for GEP T, p7, <N x T> idxs (PR #126126)

Krzysztof Drewniak via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 11 12:33:35 PST 2025


https://github.com/krzysz00 updated https://github.com/llvm/llvm-project/pull/126126

>From 86460fade22064a87d271a9a19408158c72f3fef Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Thu, 6 Feb 2025 04:06:58 +0000
Subject: [PATCH 1/3] [LowerBufferFatPointers] Fix support for GEP T, p7, <N x
 T> idxs

The lowering for GEP didn't properly support the case where the
pointer argument was being implicitly broadcast by a vector of
indices. Fix that.
---
 .../AMDGPU/AMDGPULowerBufferFatPointers.cpp    | 18 +++++++++++++++---
 .../lower-buffer-fat-pointers-pointer-ops.ll   | 18 ++++++++++++++++++
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index ccb874e6a934e..8ef491f657c15 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1804,14 +1804,26 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   bool IsNUW = GEP.hasNoUnsignedWrap();
   bool IsNUSW = GEP.hasNoUnsignedSignedWrap();
 
+  Type *ResTy = GEP.getType();
+  std::optional<ElementCount> ResEC;
+  if (auto *ResVT = dyn_cast<VectorType>(ResTy->getStructElementType(0)))
+    ResEC = ResVT->getElementCount();
+  bool HasPtrVecIn = isa<VectorType>(Off->getType());
+  bool BroadcastsPtr = ResEC.has_value() && !HasPtrVecIn;
+
   // In order to call emitGEPOffset() and thus not have to reimplement it,
   // we need the GEP result to have ptr addrspace(7) type.
   Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
-  if (auto *VT = dyn_cast<VectorType>(Off->getType()))
-    FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount());
+  if (ResEC.has_value())
+    FatPtrTy = VectorType::get(FatPtrTy, *ResEC);
   GEP.mutateType(FatPtrTy);
   Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
-  GEP.mutateType(Ptr->getType());
+  GEP.mutateType(ResTy);
+
+  if (BroadcastsPtr) {
+    Rsrc = IRB.CreateVectorSplat(*ResEC, Rsrc, Rsrc->getName());
+    Off = IRB.CreateVectorSplat(*ResEC, Off, Off->getName());
+  }
   if (match(OffAccum, m_Zero())) { // Constant-zero offset
     SplitUsers.insert(&GEP);
     return {Rsrc, Off};
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
index e7589690cd670..99fcbc595ff7f 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
@@ -59,6 +59,24 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64
   ret <2 x ptr addrspace(7)> %ret
 }
 
+define <2 x ptr addrspace(7)> @gep_scalar_vector(ptr addrspace(7) %in, <2 x i32> %idxs) {
+; CHECK-LABEL: define { <2 x ptr addrspace(8)>, <2 x i32> } @gep_scalar_vector
+; CHECK-SAME: ({ ptr addrspace(8), i32 } [[IN:%.*]], <2 x i32> [[IDXS:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[IN_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 0
+; CHECK-NEXT:    [[IN_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 1
+; CHECK-NEXT:    [[IN_RSRC_SPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(8)> poison, ptr addrspace(8) [[IN_RSRC]], i64 0
+; CHECK-NEXT:    [[IN_RSRC_SPLAT:%.*]] = shufflevector <2 x ptr addrspace(8)> [[IN_RSRC_SPLATINSERT]], <2 x ptr addrspace(8)> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[IN_OFF_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[IN_OFF]], i64 0
+; CHECK-NEXT:    [[IN_OFF_SPLAT:%.*]] = shufflevector <2 x i32> [[IN_OFF_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT:    [[RET:%.*]] = add <2 x i32> [[IN_OFF_SPLAT]], [[IDXS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC_SPLAT]], 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
+; CHECK-NEXT:    ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]]
+;
+  %ret = getelementptr inbounds i8, ptr addrspace(7) %in, <2 x i32> %idxs
+  ret <2 x ptr addrspace(7)> %ret
+}
+
 define ptr addrspace(7) @simple_gep(ptr addrspace(7) %ptr, i32 %off) {
 ; CHECK-LABEL: define { ptr addrspace(8), i32 } @simple_gep
 ; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]], i32 [[OFF:%.*]]) #[[ATTR0]] {

>From 28a28d89b36fb7bfa48004ea4a8bec0a3f0a9b81 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Mon, 10 Feb 2025 18:59:08 +0000
Subject: [PATCH 2/3] Review feedback

---
 .../AMDGPU/AMDGPULowerBufferFatPointers.cpp   | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 8ef491f657c15..6a5243d2e9ea4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1804,25 +1804,25 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   bool IsNUW = GEP.hasNoUnsignedWrap();
   bool IsNUSW = GEP.hasNoUnsignedSignedWrap();
 
-  Type *ResTy = GEP.getType();
-  std::optional<ElementCount> ResEC;
-  if (auto *ResVT = dyn_cast<VectorType>(ResTy->getStructElementType(0)))
-    ResEC = ResVT->getElementCount();
+  StructType *ResTy = cast<StructType>(GEP.getType());
+  Type *ResRsrcTy = ResTy->getElementType(0);
+  VectorType *ResRsrcVecTy = dyn_cast<VectorType>(ResRsrcTy);
   bool HasPtrVecIn = isa<VectorType>(Off->getType());
-  bool BroadcastsPtr = ResEC.has_value() && !HasPtrVecIn;
+  bool BroadcastsPtr = ResRsrcVecTy && !HasPtrVecIn;
 
   // In order to call emitGEPOffset() and thus not have to reimplement it,
   // we need the GEP result to have ptr addrspace(7) type.
-  Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
-  if (ResEC.has_value())
-    FatPtrTy = VectorType::get(FatPtrTy, *ResEC);
+  Type *FatPtrTy =
+      ResRsrcTy->getWithNewType(IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER));
   GEP.mutateType(FatPtrTy);
   Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
   GEP.mutateType(ResTy);
 
   if (BroadcastsPtr) {
-    Rsrc = IRB.CreateVectorSplat(*ResEC, Rsrc, Rsrc->getName());
-    Off = IRB.CreateVectorSplat(*ResEC, Off, Off->getName());
+    Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,
+                                 Rsrc->getName());
+    Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,
+                                Off->getName());
   }
   if (match(OffAccum, m_Zero())) { // Constant-zero offset
     SplitUsers.insert(&GEP);

>From 33fbcc53f339d30bea11ff7750fd473efa8c1b7e Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Tue, 11 Feb 2025 14:33:26 -0600
Subject: [PATCH 3/3] Update
 llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 6a5243d2e9ea4..b0b6c4df8e982 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1807,8 +1807,7 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   StructType *ResTy = cast<StructType>(GEP.getType());
   Type *ResRsrcTy = ResTy->getElementType(0);
   VectorType *ResRsrcVecTy = dyn_cast<VectorType>(ResRsrcTy);
-  bool HasPtrVecIn = isa<VectorType>(Off->getType());
-  bool BroadcastsPtr = ResRsrcVecTy && !HasPtrVecIn;
+  bool BroadcastsPtr = ResRsrcVecTy && !isa<VectorType>(Off->getType());
 
   // In order to call emitGEPOffset() and thus not have to reimplement it,
   // we need the GEP result to have ptr addrspace(7) type.



More information about the llvm-commits mailing list