[llvm] [AMDGPU] Support i8/i16 GEP indices when promoting allocas to vectors (PR #175489)

Tue Jan 20 09:36:28 PST 2026

================
@@ -568,15 +572,31 @@ computeGEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
   const auto &VarOffset = VarOffsets.front();
   APInt OffsetQuot;
   APInt::sdivrem(VarOffset.second, VecElemSize, OffsetQuot, Rem);
-  if (Rem != 0 || OffsetQuot.isZero())
+  uint64_t DivForVarIndex = 0;
+  if (Rem != 0) {
+    uint64_t Scale = VarOffset.second.getZExtValue();
+    if (Scale == 0 || (VecElemSize % Scale) != 0)
+      return {};
+
+    DivForVarIndex = VecElemSize / Scale;
+    if (!isPowerOf2_64(DivForVarIndex))
+      return {};
+
+    KnownBits KB = computeKnownBits(VarOffset.first, DL);
+    if (KB.countMinTrailingZeros() < Log2_64(DivForVarIndex))
+      return {};
+  } else if (OffsetQuot.isZero()) {
     return {};
+  }
 
   Result.VarIndex = VarOffset.first;
   auto *OffsetType = dyn_cast<IntegerType>(Result.VarIndex->getType());
   if (!OffsetType)
     return {};
 
-  if (!OffsetQuot.isOne())
+  if (Rem != 0)
+    Result.VarShift = ConstantInt::get(Ctx, APInt(BW, Log2_64(DivForVarIndex)));
+  else if (!OffsetQuot.isOne())
     Result.VarMul = ConstantInt::get(Ctx, OffsetQuot.sextOrTrunc(BW));
----------------
arsenm wrote:

Instead of treating these as separate cases, can you keep this logic in terms of a single constant and then apply the simplification to shift / multiply when building the instruction?

https://github.com/llvm/llvm-project/pull/175489