[llvm] [LowerBufferFatPointers] Fix support for GEP T, p7, <N x T> idxs (PR #126126)
Krzysztof Drewniak via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 10:59:23 PST 2025
https://github.com/krzysz00 updated https://github.com/llvm/llvm-project/pull/126126
>From 86460fade22064a87d271a9a19408158c72f3fef Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Thu, 6 Feb 2025 04:06:58 +0000
Subject: [PATCH 1/2] [LowerBufferFatPointers] Fix support for GEP T, p7, <N x
T> idxs
The lowering for GEP didn't properly support the case where the
pointer argument was being implicitly broadcast by a vector of
indices. Fix that.
---
.../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 18 +++++++++++++++---
.../lower-buffer-fat-pointers-pointer-ops.ll | 18 ++++++++++++++++++
2 files changed, 33 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index ccb874e6a934e70..8ef491f657c1569 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1804,14 +1804,26 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
bool IsNUW = GEP.hasNoUnsignedWrap();
bool IsNUSW = GEP.hasNoUnsignedSignedWrap();
+ Type *ResTy = GEP.getType();
+ std::optional<ElementCount> ResEC;
+ if (auto *ResVT = dyn_cast<VectorType>(ResTy->getStructElementType(0)))
+ ResEC = ResVT->getElementCount();
+ bool HasPtrVecIn = isa<VectorType>(Off->getType());
+ bool BroadcastsPtr = ResEC.has_value() && !HasPtrVecIn;
+
// In order to call emitGEPOffset() and thus not have to reimplement it,
// we need the GEP result to have ptr addrspace(7) type.
Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
- if (auto *VT = dyn_cast<VectorType>(Off->getType()))
- FatPtrTy = VectorType::get(FatPtrTy, VT->getElementCount());
+ if (ResEC.has_value())
+ FatPtrTy = VectorType::get(FatPtrTy, *ResEC);
GEP.mutateType(FatPtrTy);
Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
- GEP.mutateType(Ptr->getType());
+ GEP.mutateType(ResTy);
+
+ if (BroadcastsPtr) {
+ Rsrc = IRB.CreateVectorSplat(*ResEC, Rsrc, Rsrc->getName());
+ Off = IRB.CreateVectorSplat(*ResEC, Off, Off->getName());
+ }
if (match(OffAccum, m_Zero())) { // Constant-zero offset
SplitUsers.insert(&GEP);
return {Rsrc, Off};
diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
index e7589690cd6702c..99fcbc595ff7f37 100644
--- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
+++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll
@@ -59,6 +59,24 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64
ret <2 x ptr addrspace(7)> %ret
}
+define <2 x ptr addrspace(7)> @gep_scalar_vector(ptr addrspace(7) %in, <2 x i32> %idxs) {
+; CHECK-LABEL: define { <2 x ptr addrspace(8)>, <2 x i32> } @gep_scalar_vector
+; CHECK-SAME: ({ ptr addrspace(8), i32 } [[IN:%.*]], <2 x i32> [[IDXS:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[IN_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 0
+; CHECK-NEXT: [[IN_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[IN]], 1
+; CHECK-NEXT: [[IN_RSRC_SPLATINSERT:%.*]] = insertelement <2 x ptr addrspace(8)> poison, ptr addrspace(8) [[IN_RSRC]], i64 0
+; CHECK-NEXT: [[IN_RSRC_SPLAT:%.*]] = shufflevector <2 x ptr addrspace(8)> [[IN_RSRC_SPLATINSERT]], <2 x ptr addrspace(8)> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[IN_OFF_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[IN_OFF]], i64 0
+; CHECK-NEXT: [[IN_OFF_SPLAT:%.*]] = shufflevector <2 x i32> [[IN_OFF_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF_SPLAT]], [[IDXS]]
+; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC_SPLAT]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1
+; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]]
+;
+ %ret = getelementptr inbounds i8, ptr addrspace(7) %in, <2 x i32> %idxs
+ ret <2 x ptr addrspace(7)> %ret
+}
+
define ptr addrspace(7) @simple_gep(ptr addrspace(7) %ptr, i32 %off) {
; CHECK-LABEL: define { ptr addrspace(8), i32 } @simple_gep
; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]], i32 [[OFF:%.*]]) #[[ATTR0]] {
>From 28a28d89b36fb7bfa48004ea4a8bec0a3f0a9b81 Mon Sep 17 00:00:00 2001
From: Krzysztof Drewniak <Krzysztof.Drewniak at amd.com>
Date: Mon, 10 Feb 2025 18:59:08 +0000
Subject: [PATCH 2/2] Review feedback
---
.../AMDGPU/AMDGPULowerBufferFatPointers.cpp | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
index 8ef491f657c1569..6a5243d2e9ea426 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp
@@ -1804,25 +1804,25 @@ PtrParts SplitPtrStructs::visitGetElementPtrInst(GetElementPtrInst &GEP) {
bool IsNUW = GEP.hasNoUnsignedWrap();
bool IsNUSW = GEP.hasNoUnsignedSignedWrap();
- Type *ResTy = GEP.getType();
- std::optional<ElementCount> ResEC;
- if (auto *ResVT = dyn_cast<VectorType>(ResTy->getStructElementType(0)))
- ResEC = ResVT->getElementCount();
+ StructType *ResTy = cast<StructType>(GEP.getType());
+ Type *ResRsrcTy = ResTy->getElementType(0);
+ VectorType *ResRsrcVecTy = dyn_cast<VectorType>(ResRsrcTy);
bool HasPtrVecIn = isa<VectorType>(Off->getType());
- bool BroadcastsPtr = ResEC.has_value() && !HasPtrVecIn;
+ bool BroadcastsPtr = ResRsrcVecTy && !HasPtrVecIn;
// In order to call emitGEPOffset() and thus not have to reimplement it,
// we need the GEP result to have ptr addrspace(7) type.
- Type *FatPtrTy = IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER);
- if (ResEC.has_value())
- FatPtrTy = VectorType::get(FatPtrTy, *ResEC);
+ Type *FatPtrTy =
+ ResRsrcTy->getWithNewType(IRB.getPtrTy(AMDGPUAS::BUFFER_FAT_POINTER));
GEP.mutateType(FatPtrTy);
Value *OffAccum = emitGEPOffset(&IRB, DL, &GEP);
GEP.mutateType(ResTy);
if (BroadcastsPtr) {
- Rsrc = IRB.CreateVectorSplat(*ResEC, Rsrc, Rsrc->getName());
- Off = IRB.CreateVectorSplat(*ResEC, Off, Off->getName());
+ Rsrc = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Rsrc,
+ Rsrc->getName());
+ Off = IRB.CreateVectorSplat(ResRsrcVecTy->getElementCount(), Off,
+ Off->getName());
}
if (match(OffAccum, m_Zero())) { // Constant-zero offset
SplitUsers.insert(&GEP);
More information about the llvm-commits
mailing list