[llvm] AMDGPU: Make VarIndex a WeakVH in AMDGPUPromoteAlloca (PR #188662)

Wed Mar 25 18:52:47 PDT 2026

https://github.com/ruiling created https://github.com/llvm/llvm-project/pull/188662

The VarIndex might come from (like load) another alloca which maybe promoted before. The value will replaced in this case. WeakVH correctly handles this.

>From dba2d720dfa9ebf78f1aa1261fc823ab84daf525 Mon Sep 17 00:00:00 2001
From: Ruiling Song <ruiling.song at amd.com>
Date: Thu, 26 Mar 2026 09:02:23 +0800
Subject: [PATCH] AMDGPU: Make VarIndex a WeakVH in AMDGPUPromoteAlloca

The VarIndex might come from (like load) another alloca which maybe promoted before.
The value will replaced in this case. WeakVH correctly handles this.
---
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp |  2 +-
 ...promote-alloca-proper-value-replacement.ll | 29 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/promote-alloca-proper-value-replacement.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index ee9ba9f798443..c8ff65f06f2d4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -90,7 +90,7 @@ static cl::opt<unsigned>
 // VarIndex is A, VarMul is stride, VarShift is shift and ConstIndex is B. All
 // parts are optional.
 struct GEPToVectorIndex {
-  Value *VarIndex = nullptr;         // defaults to 0
+  WeakVH VarIndex = nullptr;         // defaults to 0
   ConstantInt *VarMul = nullptr;     // defaults to 1
   ConstantInt *VarShift = nullptr;   // defaults to 0
   ConstantInt *ConstIndex = nullptr; // defaults to 0
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-proper-value-replacement.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-proper-value-replacement.ll
new file mode 100644
index 0000000000000..08e9904c185f9
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-proper-value-replacement.ll
@@ -0,0 +1,29 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+define void @alloca_value_cross_reference() {
+; CHECK-LABEL: define void @alloca_value_cross_reference() {
+; CHECK-NEXT:  [[_ENTRY:.*:]]
+; CHECK-NEXT:    [[HIT_ORDERED:%.*]] = freeze <4 x float> poison
+; CHECK-NEXT:    [[HIT_INDEX:%.*]] = freeze <4 x i32> poison
+; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> [[HIT_INDEX]], i32 0, i32 0
+; CHECK-NEXT:    br [[DOTLR_PH5:label %.*]]
+; CHECK:       [[_LR_PH5:.*:]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> [[HIT_ORDERED]], float 0.000000e+00, i32 0
+; CHECK-NEXT:    ret void
+;
+.entry:
+  %hit_ordered = alloca [4 x float], align 4, addrspace(5)
+  %hit_index = alloca [4 x i32], align 4, addrspace(5)
+  store i32 0, ptr addrspace(5) %hit_index, align 4
+  br label %.lr.ph5
+
+  ; The separate block is needed to avoid constant-folding on
+  ; the load from %hit_index.
+.lr.ph5:
+  %i = load i32, ptr addrspace(5) %hit_index, align 4
+  %p = getelementptr float, ptr addrspace(5) %hit_ordered, i32 %i
+  store float 0.000000e+00, ptr addrspace(5) %p, align 4
+  ret void
+}