[llvm] [AMDGPU] Extend promotion of alloca to vectors (PR #127973)

Thu Feb 20 02:42:27 PST 2025

================
@@ -735,23 +768,48 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
   Type *AllocaTy = Alloca.getAllocatedType();
   auto *VectorTy = dyn_cast<FixedVectorType>(AllocaTy);
   if (auto *ArrayTy = dyn_cast<ArrayType>(AllocaTy)) {
-    if (VectorType::isValidElementType(ArrayTy->getElementType()) &&
-        ArrayTy->getNumElements() > 0)
-      VectorTy = FixedVectorType::get(ArrayTy->getElementType(),
-                                      ArrayTy->getNumElements());
+    uint64_t NumElems = 1;
+    Type *ElemTy;
+    do {
+      NumElems *= ArrayTy->getNumElements();
+      ElemTy = ArrayTy->getElementType();
+    } while ((ArrayTy = dyn_cast<ArrayType>(ElemTy)));
+
+    // Check for array of vectors
+    auto *InnerVectorTy = dyn_cast<FixedVectorType>(ElemTy);
+    if (InnerVectorTy) {
+      NumElems *= InnerVectorTy->getNumElements();
+      ElemTy = InnerVectorTy->getElementType();
+    }
+
+    if (VectorType::isValidElementType(ElemTy) && NumElems > 0) {
+      unsigned ElementSize = DL->getTypeSizeInBits(ElemTy) / 8;
+      unsigned AllocaSize = DL->getTypeStoreSize(AllocaTy);
+      // Expand vector if required to match padding of inner type,
+      // i.e. odd size subvectors.
+      // Storage size of new vector must match that of alloca for correct
+      // behaviour of byte offsets and GEP computation.
+      if (NumElems * ElementSize != AllocaSize)
+        NumElems = AllocaSize / ElementSize;
+      if (NumElems > 0 && (AllocaSize % ElementSize) == 0)
+        VectorTy = FixedVectorType::get(ElemTy, NumElems);
+    }
   }
 
-  // FIXME: There is no reason why we can't support larger arrays, we
-  // are just being conservative for now.
-  // FIXME: We also reject alloca's of the form [ 2 x [ 2 x i32 ]] or
-  // equivalent. Potentially these could also be promoted but we don't currently
-  // handle this case
   if (!VectorTy) {
     LLVM_DEBUG(dbgs() << "  Cannot convert type to vector\n");
     return false;
   }
 
-  if (VectorTy->getNumElements() > 16 || VectorTy->getNumElements() < 2) {
+  const unsigned MaxElements =
+      PromoteAllocaToVectorMaxElements.getNumOccurrences()
+          ? PromoteAllocaToVectorMaxElements
+          : Alloca.getParent()->getParent()->getFnAttributeAsParsedInteger(
----------------
Pierre-vh wrote:

same here

https://github.com/llvm/llvm-project/pull/127973