[llvm] [AMDGPU] Promote nestedGEP allocas to vectors (PR #141199)

Jay Foad via llvm-commits llvm-commits at lists.llvm.org
Wed May 28 01:30:23 PDT 2025


================
@@ -437,9 +437,53 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
   SmallMapVector<Value *, APInt, 4> VarOffsets;
   APInt ConstOffset(BW, 0);
-  if (GEP->getPointerOperand()->stripPointerCasts() != Alloca ||
-      !GEP->collectOffset(DL, BW, VarOffsets, ConstOffset))
-    return nullptr;
+
+  // Walk backwards through nested GEPs to collect both constant and variable
+  // offsets, so that nested vector GEP chains can be lowered in one step.
+  //
+  // Given this IR fragment as input:
+  //
+  //   %0 = alloca [10 x <2 x i32>], align 8, addrspace(5)
+  //   %1 = getelementptr [10 x <2 x i32>], ptr addrspace(5) %0, i32 0, i32 %j
+  //   %2 = getelementptr i8, ptr addrspace(5) %1, i32 4
+  //   %3 = load  i32, ptr addrspace(5) %2, align 4
+  //
+  // Combine both GEP operations in a single pass, producing:
+  //   BasePtr      = %0
+  //   ConstOffset  = 4
+  //   VarOffsets   = { %j -> element_size(<2 x i32>) }
+  //
+  // That lets us emit a single buffer_load directly into a VGPR, without ever
+  // allocating scratch memory for the intermediate pointer.
+  Value *CurPtr = GEP;
+  SmallMapVector<Value *, APInt, 4> LocalVarsOffsets;
+  while (auto *CurGEP = dyn_cast<GetElementPtrInst>(CurPtr)) {
+    if (!CurGEP->collectOffset(DL, BW, LocalVarsOffsets, ConstOffset))
+      return nullptr;
+
+    // Merge any variable-index contributions into the accumulated VarOffsets
+    // map.
+    // Only a single pointer variable is allowed in the entire GEP chain.
+    // If VarOffsets already holds a different pointer, abort.
+    for (auto &[Var, Offset] : LocalVarsOffsets) {
+      // If VarOffsets already records a different pointer, abort.
+      if (!VarOffsets.empty() && !VarOffsets.contains(Var))
+        return nullptr;
+
+      // Try to insert VarEntry.first with its offset; if that pointer is
+      // already in VarOffsets, add the new offset to the existing one.
+      auto [Existing, Inserted] = VarOffsets.try_emplace(Var, Offset);
+      if (!Inserted)
+        Existing->second += Offset;
+    }
+
+    LocalVarsOffsets.clear();
+    // Move to the next outer pointer
+    CurPtr = CurGEP->getPointerOperand();
+  }
+
+  // Only proceed if this GEP stems from the same alloca.
+  assert(CurPtr == Alloca && "GEP not based on alloca");
 
   unsigned VecElemSize = DL.getTypeAllocSize(VecElemTy);
   if (VarOffsets.size() > 1)
----------------
jayfoad wrote:

How can this condition ever be true?

https://github.com/llvm/llvm-project/pull/141199


More information about the llvm-commits mailing list