[llvm] [AMDGPU][PromoteAlloca] Correctly handle a variable vector index (PR #83597)

Fri Mar 1 09:57:24 PST 2024

https://github.com/bcahoon updated https://github.com/llvm/llvm-project/pull/83597

>From bc3871f70d552718ee7157f79c3fdb01842e7803 Mon Sep 17 00:00:00 2001
From: Brendon Cahoon <brendon.cahoon at amd.com>
Date: Fri, 1 Mar 2024 11:16:05 -0600
Subject: [PATCH 1/2] [AMDGPU][PromoteAlloca] Correctly handle a variable
 vector index

The promote alloca to vector transformation assumes that the
vector index is a constant value. If it is not a constant, then
either an assert occurs or the tranformation generates an
incorrect index.
---
 .../lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp |  5 +--
 .../promote-alloca-non-constant-index.ll      | 35 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index c1b244f50d93f8..ebd48f4082369f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -401,7 +401,8 @@ static Value *promoteAllocaUserToVector(
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
     TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
-    if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
+    if (AccessSize == VecStoreSize && isa<Constant>(Index) &&
+        cast<Constant>(Index)->isZeroValue()) {
       if (AccessTy->isPtrOrPtrVectorTy())
         CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
       else if (CurVal->getType()->isPtrOrPtrVectorTy())
@@ -456,7 +457,7 @@ static Value *promoteAllocaUserToVector(
     // We're storing the full vector, we can handle this without knowing CurVal.
     Type *AccessTy = Val->getType();
     TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
-    if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
+    if (AccessSize == VecStoreSize && isa<Constant>(Index) && cast<Constant>(Index)->isZeroValue()) {
       if (AccessTy->isPtrOrPtrVectorTy())
         Val = CreateTempPtrIntCast(Val, AccessTy);
       else if (VectorTy->isPtrOrPtrVectorTy())
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
new file mode 100644
index 00000000000000..0ea92f186d77ab
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Check that promoting an alloca to a vector form works correctly when a variable
+; vector index is used.
+
+define amdgpu_kernel void @non_constant_index(i32 %0) {
+; CHECK-LABEL: define amdgpu_kernel void @non_constant_index(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    br label [[TMP2:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    br label [[TMP2]]
+; CHECK:       3:
+; CHECK-NEXT:    br label [[TMP4:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[PROMOTEALLOCA:%.*]] = phi <2 x float> [ [[TMP7:%.*]], [[TMP4]] ], [ undef, [[TMP3:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[PROMOTEALLOCA]], float 0.000000e+00, i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP7]] = insertelement <2 x float> [[TMP5]], float 0.000000e+00, i32 [[TMP6]]
+; CHECK-NEXT:    br label [[TMP4]]
+;
+  %2 = alloca [2 x float], align 4, addrspace(5)
+  br label %3
+
+3:
+  br label %3
+
+4:
+  br label %5
+
+5:
+  %6 = getelementptr float, ptr addrspace(5) %2, i32 %0
+  store <2 x float> zeroinitializer, ptr addrspace(5) %6, align 8
+  br label %5
+}

>From 46c20b136e94cb2c7b3597c3125662c5aca24296 Mon Sep 17 00:00:00 2001
From: Brendon Cahoon <brendon.cahoon at amd.com>
Date: Fri, 1 Mar 2024 11:55:51 -0600
Subject: [PATCH 2/2] Fix formatting

---
 llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index ebd48f4082369f..c87c34731fabe3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -457,7 +457,8 @@ static Value *promoteAllocaUserToVector(
     // We're storing the full vector, we can handle this without knowing CurVal.
     Type *AccessTy = Val->getType();
     TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
-    if (AccessSize == VecStoreSize && isa<Constant>(Index) && cast<Constant>(Index)->isZeroValue()) {
+    if (AccessSize == VecStoreSize && isa<Constant>(Index) &&
+        cast<Constant>(Index)->isZeroValue()) {
       if (AccessTy->isPtrOrPtrVectorTy())
         Val = CreateTempPtrIntCast(Val, AccessTy);
       else if (VectorTy->isPtrOrPtrVectorTy())