[llvm] [AMDGPU] Update PromoteAlloca to handle GEPs with variable offset. (PR #122342)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 08:06:50 PST 2025


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/122342

>From 1093642dcc9cfaf7b50bf314f18467df58ced7ab Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Thu, 9 Jan 2025 10:59:05 -0800
Subject: [PATCH 1/5] [AMDGPU] Update PromoteAlloca to handle GEPs with
 variable offset.

In case of variable offset of a GEP that can be optimized out, promote
alloca is updated to use the refereshed index to avoid an assertion.

Issue found by fuzzer.
---
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 41 ++++++++++++++++---
 .../AMDGPU/promote-alloca-array-aggregate.ll  | 28 +++++++++++++
 2 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c088..1e32743c3dfee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,13 +385,42 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
          match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }
 
+static bool hasVariableOffset(GetElementPtrInst *GEP) {
+  // Iterate over all operands starting from the first index (index 0 is the
+  // base pointer).
+  for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
+    Value *Op = GEP->getOperand(i);
+    // Check if the operand is not a constant integer value
+    if (!isa<ConstantInt>(Op)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 static Value *
-calculateVectorIndex(Value *Ptr,
-                     const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
+                     const DataLayout &DL) {
   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
   if (!GEP)
     return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
 
+  // If the index of this GEP is a variable that might be deleted,
+  // update the index with its latest value. We've already handled any GEPs
+  // with unsupported index types(in GEPToVectorIndex) at this point.
+  if (hasVariableOffset(GEP)) {
+    unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
+    SmallMapVector<Value *, APInt, 4> VarOffsets;
+    APInt ConstOffset(BW, 0);
+    if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
+      if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
+        auto *UpdatedValue = VarOffsets.front().first;
+        GEPIdx[GEP] = UpdatedValue;
+        return UpdatedValue;
+      }
+    }
+  }
+
   auto I = GEPIdx.find(GEP);
   assert(I != GEPIdx.end() && "Must have entry for GEP!");
   return I->second;
@@ -496,7 +525,7 @@ static Value *promoteAllocaUserToVector(
     }
 
     Value *Index = calculateVectorIndex(
-        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
+        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);
 
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
@@ -552,7 +581,8 @@ static Value *promoteAllocaUserToVector(
     // to know the current value. If this is a store of a single element, we
     // need to know the value.
     StoreInst *SI = cast<StoreInst>(Inst);
-    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
+    Value *Index =
+        calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
     Value *Val = SI->getValueOperand();
 
     // We're storing the full vector, we can handle this without knowing CurVal.
@@ -850,7 +880,8 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
         if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
           return nullptr;
 
-        return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
+        return dyn_cast<ConstantInt>(
+            calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
       };
 
       unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 05c727201bbf1..9db416041a5bc 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -122,6 +122,34 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
   ret void
 }
 
+%Block4 = type { [2 x i32], i32 }
+ at block4 = external addrspace(1) global %Block4
+%gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
+ at pv1 = external addrspace(1) global %gl_PV
+
+; This should should not crash on variable offset that can be
+; optimized out (variable foo4 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
+; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
+; CHECK-NEXT:    [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
+; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
+; CHECK-NEXT:    store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
+; CHECK-NEXT:    ret void
+;
+  %f1 = alloca [3 x i32], align 4, addrspace(5)
+  %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
+  %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
+  store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
+  %foo4 = load i32, ptr addrspace(5) %G1, align 4
+  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
+  %foo6 = load i32, ptr addrspace(5) %foo5, align 4
+  %foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
+  store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
+  ret void
+}
+
 define amdgpu_vs void @promote_memmove_aggr() #0 {
 ; CHECK-LABEL: @promote_memmove_aggr(
 ; CHECK-NEXT:    store float 1.000000e+00, ptr addrspace(1) @pv, align 4

>From 9b7403a60b7772469dd786fc26c02702ade92f7b Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Thu, 30 Jan 2025 13:42:59 -0800
Subject: [PATCH 2/5] [AMDGPU] Update PromoteAlloca to handle updated GEP
 Indexes.

The PromoteAlloca pass was using outdated cached GEP indices in some cases,
leading to an assertion failure when encountering aliased GEP indices that could
be optimized out. This commit fixes the issue by refreshing the cached index
before use.

Issue found by fuzzer.
---
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 95 +++++++++----------
 .../AMDGPU/promote-alloca-array-aggregate.ll  | 14 +--
 2 files changed, 51 insertions(+), 58 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 1e32743c3dfee..49b872506ccfc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,57 +385,20 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
          match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }
 
-static bool hasVariableOffset(GetElementPtrInst *GEP) {
-  // Iterate over all operands starting from the first index (index 0 is the
-  // base pointer).
-  for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i) {
-    Value *Op = GEP->getOperand(i);
-    // Check if the operand is not a constant integer value
-    if (!isa<ConstantInt>(Op)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-static Value *
-calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
-                     const DataLayout &DL) {
-  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
-  if (!GEP)
-    return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
-
-  // If the index of this GEP is a variable that might be deleted,
-  // update the index with its latest value. We've already handled any GEPs
-  // with unsupported index types(in GEPToVectorIndex) at this point.
-  if (hasVariableOffset(GEP)) {
-    unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
-    SmallMapVector<Value *, APInt, 4> VarOffsets;
-    APInt ConstOffset(BW, 0);
-    if (GEP->collectOffset(DL, BW, VarOffsets, ConstOffset)) {
-      if (VarOffsets.size() == 1 && ConstOffset.isZero()) {
-        auto *UpdatedValue = VarOffsets.front().first;
-        GEPIdx[GEP] = UpdatedValue;
-        return UpdatedValue;
-      }
-    }
-  }
-
-  auto I = GEPIdx.find(GEP);
-  assert(I != GEPIdx.end() && "Must have entry for GEP!");
-  return I->second;
-}
-
-static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
-                               Type *VecElemTy, const DataLayout &DL) {
+static Value *GEPToVectorIndex(GetElementPtrInst *GEP, Type *VecElemTy,
+                               const DataLayout &DL,
+                               AllocaInst *Alloca = nullptr) {
   // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
   // helper.
   unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
   SmallMapVector<Value *, APInt, 4> VarOffsets;
   APInt ConstOffset(BW, 0);
-  if (GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
-      !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
-    return nullptr;
+
+  bool CanCollect = GEP->collectOffset(DL, BW, VarOffsets, ConstOffset);
+
+  if (Alloca)
+    if (GEP->getPointerOperand()->stripPointerCasts() != Alloca || !CanCollect)
+      return nullptr;
 
   unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy);
   if (VarOffsets.size() > 1)
@@ -459,6 +422,36 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   return ConstantInt::get(GEP->getContext(), Quot);
 }
 
+// Function to check if a Value is an operand of a GetElementPtrInst.
+static bool isValueInGEP(GetElementPtrInst *GEP, Value *ValueToCheck) {
+  if (!GEP || !ValueToCheck)
+    return false;
+
+  for (unsigned i = 0; i < GEP->getNumOperands(); ++i)
+    if (GEP->getOperand(i) == ValueToCheck)
+      return true;
+
+  return false;
+}
+
+static Value *
+calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
+                     Type *VecElemTy, const DataLayout &DL) {
+  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
+  if (!GEP)
+    return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+  // Update the cached index if the value is changed.
+  if (!isValueInGEP(GEP, GEPIdx[GEP])) {
+    Value *Index = GEPToVectorIndex(GEP, VecElemTy, DL);
+    GEPIdx[GEP] = Index;
+  }
+
+  auto I = GEPIdx.find(GEP);
+  assert(I != GEPIdx.end() && "Must have entry for GEP!");
+  return I->second;
+}
+
 /// Promotes a single user of the alloca to a vector form.
 ///
 /// \param Inst           Instruction to be promoted.
@@ -525,7 +518,7 @@ static Value *promoteAllocaUserToVector(
     }
 
     Value *Index = calculateVectorIndex(
-        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, DL);
+        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, VecEltTy, DL);
 
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
@@ -581,8 +574,8 @@ static Value *promoteAllocaUserToVector(
     // to know the current value. If this is a store of a single element, we
     // need to know the value.
     StoreInst *SI = cast<StoreInst>(Inst);
-    Value *Index =
-        calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx, DL);
+    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx,
+                                        VecEltTy, DL);
     Value *Val = SI->getValueOperand();
 
     // We're storing the full vector, we can handle this without knowing CurVal.
@@ -845,7 +838,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
     if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
       // If we can't compute a vector index from this GEP, then we can't
       // promote this alloca to vector.
-      Value *Index = GEPToVectorIndex(GEP, &Alloca, VecEltTy, *DL);
+      Value *Index = GEPToVectorIndex(GEP, VecEltTy, *DL, &Alloca);
       if (!Index)
         return RejectUser(Inst, "cannot compute vector index for GEP");
 
@@ -881,7 +874,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
           return nullptr;
 
         return dyn_cast<ConstantInt>(
-            calculateVectorIndex(Ptr, GEPVectorIdx, *DL));
+            calculateVectorIndex(Ptr, GEPVectorIdx, VecEltTy, *DL));
       };
 
       unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 9db416041a5bc..7ec3cab24d9a4 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -127,14 +127,14 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
 %gl_PV = type { <4 x i32>, i32, [1 x i32], [1 x i32] }
 @pv1 = external addrspace(1) global %gl_PV
 
-; This should should not crash on variable offset that can be
-; optimized out (variable foo4 in the test)
-define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr {
+; This should not crash on an aliased variable offset that can be
+; optimized out (variable %aliasToG1 in the test)
+define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) {
 ; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
 ; CHECK-NEXT:    [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <3 x i32> undef, i32 [[FOO3_UNPACK2]], i32 2
 ; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <3 x i32> [[TMP1]], i32 [[FOO3_UNPACK2]]
-; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 3
+; CHECK-NEXT:    [[FOO12:%.*]] = insertelement <4 x i32> %input, i32 [[TMP2]], i64 3
 ; CHECK-NEXT:    store <4 x i32> [[FOO12]], ptr addrspace(1) @pv1, align 16
 ; CHECK-NEXT:    ret void
 ;
@@ -142,10 +142,10 @@ define amdgpu_vs void @promote_load_from_store_aggr_varoff() local_unnamed_addr
   %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
   %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
   store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
-  %foo4 = load i32, ptr addrspace(5) %G1, align 4
-  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %foo4
+  %aliasToG1 = load i32, ptr addrspace(5) %G1, align 4
+  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %aliasToG1
   %foo6 = load i32, ptr addrspace(5) %foo5, align 4
-  %foo12 = insertelement <4 x i32> poison, i32 %foo6, i64 3
+  %foo12 = insertelement <4 x i32> %input, i32 %foo6, i64 3
   store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16
   ret void
 }

>From 13a43bdad408fb6c1d112889ec7248977e1986a7 Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Mon, 10 Feb 2025 14:29:19 -0600
Subject: [PATCH 3/5] [AMDGPU] Update PromoteAlloca to handle changed GEP
 Indexes.

The PromoteAlloca pass was using outdated cached GEP indices in some cases,
leading to an assertion failure. This commit fixes the issue by using ValueHandle
to track the change in values.

Issue found by fuzzer.
---
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 75 +++++++------------
 .../AMDGPU/promote-alloca-array-aggregate.ll  |  6 +-
 2 files changed, 31 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index c1198a4f81a4f..88df7a36e9b06 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -385,20 +385,33 @@ static bool isSupportedMemset(MemSetInst *I, AllocaInst *AI,
          match(I->getOperand(2), m_SpecificInt(Size)) && !I->isVolatile();
 }
 
-static Value *GEPToVectorIndex(GetElementPtrInst *GEP, Type *VecElemTy,
-                               const DataLayout &DL,
-                               AllocaInst *Alloca = nullptr) {
+static Value *calculateVectorIndex(
+    Value *Ptr, const std::map<GetElementPtrInst *, WeakTrackingVH> &GEPIdx) {
+  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
+  if (!GEP)
+    return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
+
+  auto I = GEPIdx.find(GEP);
+  assert(I != GEPIdx.end() && "Must have entry for GEP!");
+
+  if (Value *IndexValue = I->second)
+    return IndexValue;
+
+  llvm_unreachable(
+      "Index value is not present in Cached GEPs! This indicates a "
+      "bug in the pass that populates GEPIdx.");
+}
+
+static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
+                               Type *VecElemTy, const DataLayout &DL) {
   // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
   // helper.
   unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
   SmallMapVector<Value *, APInt, 4> VarOffsets;
   APInt ConstOffset(BW, 0);
-
-  bool CanCollect = GEP->collectOffset(DL, BW, VarOffsets, ConstOffset);
-
-  if (Alloca)
-    if (GEP->getPointerOperand()->stripPointerCasts() != Alloca || !CanCollect)
-      return nullptr;
+  if (GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
+      !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
+    return nullptr;
 
   unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy);
   if (VarOffsets.size() > 1)
@@ -422,36 +435,6 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, Type *VecElemTy,
   return ConstantInt::get(GEP->getContext(), Quot);
 }
 
-// Function to check if a Value is an operand of a GetElementPtrInst.
-static bool isValueInGEP(GetElementPtrInst *GEP, Value *ValueToCheck) {
-  if (!GEP || !ValueToCheck)
-    return false;
-
-  for (unsigned i = 0; i < GEP->getNumOperands(); ++i)
-    if (GEP->getOperand(i) == ValueToCheck)
-      return true;
-
-  return false;
-}
-
-static Value *
-calculateVectorIndex(Value *Ptr, std::map<GetElementPtrInst *, Value *> &GEPIdx,
-                     Type *VecElemTy, const DataLayout &DL) {
-  auto *GEP = dyn_cast<GetElementPtrInst>(Ptr->stripPointerCasts());
-  if (!GEP)
-    return ConstantInt::getNullValue(Type::getInt32Ty(Ptr->getContext()));
-
-  // Update the cached index if the value is changed.
-  if (!isValueInGEP(GEP, GEPIdx[GEP])) {
-    Value *Index = GEPToVectorIndex(GEP, VecElemTy, DL);
-    GEPIdx[GEP] = Index;
-  }
-
-  auto I = GEPIdx.find(GEP);
-  assert(I != GEPIdx.end() && "Must have entry for GEP!");
-  return I->second;
-}
-
 /// Promotes a single user of the alloca to a vector form.
 ///
 /// \param Inst           Instruction to be promoted.
@@ -471,7 +454,7 @@ static Value *promoteAllocaUserToVector(
     Instruction *Inst, const DataLayout &DL, FixedVectorType *VectorTy,
     unsigned VecStoreSize, unsigned ElementSize,
     DenseMap<MemTransferInst *, MemTransferInfo> &TransferInfo,
-    std::map<GetElementPtrInst *, Value *> &GEPVectorIdx, Value *CurVal,
+    std::map<GetElementPtrInst *, WeakTrackingVH> &GEPVectorIdx, Value *CurVal,
     SmallVectorImpl<LoadInst *> &DeferredLoads) {
   // Note: we use InstSimplifyFolder because it can leverage the DataLayout
   // to do more folding, especially in the case of vector splats.
@@ -518,7 +501,7 @@ static Value *promoteAllocaUserToVector(
     }
 
     Value *Index = calculateVectorIndex(
-        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx, VecEltTy, DL);
+        cast<LoadInst>(Inst)->getPointerOperand(), GEPVectorIdx);
 
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
@@ -574,8 +557,7 @@ static Value *promoteAllocaUserToVector(
     // to know the current value. If this is a store of a single element, we
     // need to know the value.
     StoreInst *SI = cast<StoreInst>(Inst);
-    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx,
-                                        VecEltTy, DL);
+    Value *Index = calculateVectorIndex(SI->getPointerOperand(), GEPVectorIdx);
     Value *Val = SI->getValueOperand();
 
     // We're storing the full vector, we can handle this without knowing CurVal.
@@ -780,7 +762,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
     return false;
   }
 
-  std::map<GetElementPtrInst *, Value *> GEPVectorIdx;
+  std::map<GetElementPtrInst *, WeakTrackingVH> GEPVectorIdx;
   SmallVector<Instruction *> WorkList;
   SmallVector<Instruction *> UsersToRemove;
   SmallVector<Instruction *> DeferredInsts;
@@ -838,7 +820,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
     if (auto *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
       // If we can't compute a vector index from this GEP, then we can't
       // promote this alloca to vector.
-      Value *Index = GEPToVectorIndex(GEP, VecEltTy, *DL, &Alloca);
+      Value *Index = GEPToVectorIndex(GEP, &Alloca, VecEltTy, *DL);
       if (!Index)
         return RejectUser(Inst, "cannot compute vector index for GEP");
 
@@ -872,8 +854,7 @@ bool AMDGPUPromoteAllocaImpl::tryPromoteAllocaToVector(AllocaInst &Alloca) {
         if (Ptr != &Alloca && !GEPVectorIdx.count(GEP))
           return nullptr;
 
-        return dyn_cast<ConstantInt>(
-            calculateVectorIndex(Ptr, GEPVectorIdx, VecEltTy, *DL));
+        return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
       };
 
       unsigned OpNum = U->getOperandNo();
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
index 7ec3cab24d9a4..885766d572345 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-array-aggregate.ll
@@ -128,7 +128,7 @@ define amdgpu_vs void @promote_load_from_store_aggr() #0 {
 @pv1 = external addrspace(1) global %gl_PV
 
 ; This should not crash on an aliased variable offset that can be
-; optimized out (variable %aliasToG1 in the test)
+; optimized out (variable %aliasTofoo3 in the test)
 define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) {
 ; CHECK-LABEL: @promote_load_from_store_aggr_varoff(
 ; CHECK-NEXT:    [[FOO3_UNPACK2:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
@@ -142,8 +142,8 @@ define amdgpu_vs void @promote_load_from_store_aggr_varoff(<4 x i32> %input) {
   %G1 = getelementptr inbounds i8, ptr addrspace(5) %f1, i32 8
   %foo3.unpack2 = load i32, ptr addrspace(1) getelementptr inbounds (i8, ptr addrspace(1) @block4, i64 8), align 4
   store i32 %foo3.unpack2, ptr addrspace(5) %G1, align 4
-  %aliasToG1 = load i32, ptr addrspace(5) %G1, align 4
-  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %aliasToG1
+  %aliasTofoo3 = load i32, ptr addrspace(5) %G1, align 4
+  %foo5 = getelementptr [3 x i32], ptr addrspace(5) %f1, i32 0, i32 %aliasTofoo3
   %foo6 = load i32, ptr addrspace(5) %foo5, align 4
   %foo12 = insertelement <4 x i32> %input, i32 %foo6, i64 3
   store <4 x i32> %foo12, ptr addrspace(1) @pv1, align 16

>From bf8a22d0596b9e7392a98f953877f8bc29d93b9b Mon Sep 17 00:00:00 2001
From: Sumanth Gundapaneni <sumanth.gundapaneni at amd.com>
Date: Mon, 24 Feb 2025 09:21:08 -0600
Subject: [PATCH 4/5] Update llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 88df7a36e9b06..643dc8d6e3830 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -394,12 +394,9 @@ static Value *calculateVectorIndex(
   auto I = GEPIdx.find(GEP);
   assert(I != GEPIdx.end() && "Must have entry for GEP!");
 
-  if (Value *IndexValue = I->second)
+    Value *IndexValue = I->second;
+    assert(IndexValue && "index value missing from GEP index map");
     return IndexValue;
-
-  llvm_unreachable(
-      "Index value is not present in Cached GEPs! This indicates a "
-      "bug in the pass that populates GEPIdx.");
 }
 
 static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,

>From 3e8e15f5a351ca5d380e884ea808ca206aef950a Mon Sep 17 00:00:00 2001
From: Matt Arsenault <arsenm2 at gmail.com>
Date: Mon, 24 Feb 2025 23:06:31 +0700
Subject: [PATCH 5/5] clang-format fix

---
 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 643dc8d6e3830..28016b5936ccf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -394,9 +394,9 @@ static Value *calculateVectorIndex(
   auto I = GEPIdx.find(GEP);
   assert(I != GEPIdx.end() && "Must have entry for GEP!");
 
-    Value *IndexValue = I->second;
-    assert(IndexValue && "index value missing from GEP index map");
-    return IndexValue;
+  Value *IndexValue = I->second;
+  assert(IndexValue && "index value missing from GEP index map");
+  return IndexValue;
 }
 
 static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,



More information about the llvm-commits mailing list