[llvm] [AMDGPU] Extend promotion of alloca to vectors (PR #127973)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 20 02:42:27 PST 2025
================
@@ -735,23 +768,48 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
Type *AllocaTy = Alloca.getAllocatedType();
auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
- if (VectorType::isValidElementType(ArrayTy->getElementType()) &&
- ArrayTy->getNumElements() > 0)
- VectorTy = FixedVectorType::get(ArrayTy->getElementType(),
- ArrayTy->getNumElements());
+ uint64_t NumElems = 1;
+ Type *ElemTy;
+ do {
+ NumElems *= ArrayTy->getNumElements();
+ ElemTy = ArrayTy->getElementType();
+ } while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
+
+ // Check for array of vectors
+ auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
+ if (InnerVectorTy) {
+ NumElems *= InnerVectorTy->getNumElements();
+ ElemTy = InnerVectorTy->getElementType();
+ }
+
+ if (VectorType::isValidElementType(ElemTy) && NumElems > 0) {
+ unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
+ unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
+ // Expand vector if required to match padding of inner type,
+ // i.e. odd size subvectors.
+ // Storage size of new vector must match that of alloca for correct
+ // behaviour of byte offsets and GEP computation.
+ if (NumElems * ElementSize != AllocaSize)
+ NumElems = AllocaSize / ElementSize;
+ if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
+ VectorTy = FixedVectorType::get(ElemTy, NumElems);
+ }
}
- // FIXME: There is no reason why we can't support larger arrays, we
- // are just being conservative for now.
- // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or
- // equivalent. Potentially these could also be promoted but we don't currently
- // handle this case
if (!VectorTy) {
LLVM_DEBUG(dbgs() << " Cannot convert type to vector\n");
return false;
}
- if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
+ const unsigned MaxElements =
+ PromoteAllocaToVectorMaxElements.getNumOccurrences()
+ ? PromoteAllocaToVectorMaxElements
+ : Alloca.getParent()->getParent()->getFnAttributeAsParsedInteger(
----------------
Pierre-vh wrote:
same here
https://github.com/llvm/llvm-project/pull/127973
More information about the llvm-commits
mailing list