[llvm] [AMDGPU] Fix `AMDGPUPromoteAlloca` handling certain loads incorrectly (PR #123173)

Thu Jan 16 01:33:34 PST 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Chinmay Deshpande (chinmaydd)

<details>
<summary>Changes</summary>

`AMDGPUPromoteAlloca` was incorrectly handling loads from the alloca which were used as a gep index into the same alloca.

This aims to fix SWDEV-493625, SWDEV-504918, SWDEV-508818.

---
Full diff: https://github.com/llvm/llvm-project/pull/123173.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+11) 
- (added) llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll (+16) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..d8dcdc6afd18c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -397,6 +397,14 @@ calculateVectorIndex(Value *Ptr,
   return I->second;
 }
 
+static void updateVectorIndex(Value *OldIdx, Value *NewIdx,
+                              std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+  for (auto &[GEP, Idx] : GEPIdx) {
+    if (Idx == OldIdx)
+      GEPIdx[GEP] = NewIdx;
+  }
+}
+
 static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
                                Type *VecElemTy, const DataLayout &DL) {
   // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
@@ -544,6 +552,9 @@ static Value *promoteAllocaUserToVector(
       ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
 
     Inst->replaceAllUsesWith(ExtractElement);
+    // If the loaded value is used as an index into a GEP, update all its uses
+    // in the GEPVectorIdx map.
+    updateVectorIndex(Inst, ExtractElement, GEPVectorIdx);
     return nullptr;
   }
   case Instruction::Store: {
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll
new file mode 100644
index 00000000000000..4fef7d19413815
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+define void @vector_alloca_with_loaded_value_as_index(<2 x i64> %arg) {
+; CHECK-LABEL: define void @vector_alloca_with_loaded_value_as_index(
+; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[ARG]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[ARG]], i64 1
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca <2 x i64>, align 16
+  %idx = load i64, ptr %alloca, align 4
+  %gep = getelementptr <1 x double>, ptr %alloca, i64 %idx
+  store <2 x i64> %arg, ptr %gep, align 16
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/123173