[llvm] [AMDGPU] Fix `AMDGPUPromoteAlloca` handling certain loads incorrectly (PR #123173)

Thu Jan 16 01:32:52 PST 2025

https://github.com/chinmaydd created https://github.com/llvm/llvm-project/pull/123173

`AMDGPUPromoteAlloca` was incorrectly handling loads from the alloca which were used as a gep index into the same alloca.

This aims to fix SWDEV-493625, SWDEV-504918, SWDEV-508818.

Change-Id: I91059749dc80a960555b44f67043233e4102d271

>From 6858552f3d2e7cb72caea7c7877963dcaeeeba4b Mon Sep 17 00:00:00 2001
From: Chinmay Deshpande <ChinmayDiwakar.Deshpande at amd.com>
Date: Thu, 16 Jan 2025 04:21:26 -0500
Subject: [PATCH] [AMDGPU] Fix `AMDGPUPromoteAlloca` handling certain loads
 incorrectly

`AMDGPUPromoteAlloca` was incorrectly handling loads from the alloca
which were used as a gep index into the same alloca.

Change-Id: I91059749dc80a960555b44f67043233e4102d271
---
 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp   | 11 +++++++++++
 .../AMDGPU/promote-alloca-update-vector-idx.ll   | 16 ++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index e27ef71c1c0883..d8dcdc6afd18c7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -397,6 +397,14 @@ calculateVectorIndex(Value *Ptr,
   return I->second;
 }
 
+static void updateVectorIndex(Value *OldIdx, Value *NewIdx,
+                              std::map<GetElementPtrInst *, Value *> &GEPIdx) {
+  for (auto &[GEP, Idx] : GEPIdx) {
+    if (Idx == OldIdx)
+      GEPIdx[GEP] = NewIdx;
+  }
+}
+
 static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
                                Type *VecElemTy, const DataLayout &DL) {
   // TODO: Extracting a "multiple of X" from a GEP might be a useful generic
@@ -544,6 +552,9 @@ static Value *promoteAllocaUserToVector(
       ExtractElement = Builder.CreateBitOrPointerCast(ExtractElement, AccessTy);
 
     Inst->replaceAllUsesWith(ExtractElement);
+    // If the loaded value is used as an index into a GEP, update all its uses
+    // in the GEPVectorIdx map.
+    updateVectorIndex(Inst, ExtractElement, GEPVectorIdx);
     return nullptr;
   }
   case Instruction::Store: {
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll
new file mode 100644
index 00000000000000..4fef7d19413815
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-update-vector-idx.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+define void @vector_alloca_with_loaded_value_as_index(<2 x i64> %arg) {
+; CHECK-LABEL: define void @vector_alloca_with_loaded_value_as_index(
+; CHECK-SAME: <2 x i64> [[ARG:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x i64> [[ARG]], i64 0
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x i64> [[ARG]], i64 1
+; CHECK-NEXT:    ret void
+;
+  %alloca = alloca <2 x i64>, align 16
+  %idx = load i64, ptr %alloca, align 4
+  %gep = getelementptr <1 x double>, ptr %alloca, i64 %idx
+  store <2 x i64> %arg, ptr %gep, align 16
+  ret void
+}