[llvm] [AMDGPU][PromoteAlloca] Correctly handle a variable vector index (PR #83597)

Fri Mar 1 09:23:57 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: None (bcahoon)

<details>
<summary>Changes</summary>

The promote alloca to vector transformation assumes that the
vector index is a constant value. If it is not a constant, then
either an assert occurs or the tranformation generates an
incorrect index.


---
Full diff: https://github.com/llvm/llvm-project/pull/83597.diff


2 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp (+3-2) 
- (added) llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll (+35) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index c1b244f50d93f8..ebd48f4082369f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -401,7 +401,8 @@ static Value *promoteAllocaUserToVector(
     // We're loading the full vector.
     Type *AccessTy = Inst->getType();
     TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
-    if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
+    if (AccessSize == VecStoreSize && isa<Constant>(Index) &&
+        cast<Constant>(Index)->isZeroValue()) {
       if (AccessTy->isPtrOrPtrVectorTy())
         CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
       else if (CurVal->getType()->isPtrOrPtrVectorTy())
@@ -456,7 +457,7 @@ static Value *promoteAllocaUserToVector(
     // We're storing the full vector, we can handle this without knowing CurVal.
     Type *AccessTy = Val->getType();
     TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
-    if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
+    if (AccessSize == VecStoreSize && isa<Constant>(Index) && cast<Constant>(Index)->isZeroValue()) {
       if (AccessTy->isPtrOrPtrVectorTy())
         Val = CreateTempPtrIntCast(Val, AccessTy);
       else if (VectorTy->isPtrOrPtrVectorTy())
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
new file mode 100644
index 00000000000000..0ea92f186d77ab
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Check that promoting an alloca to a vector form works correctly when a variable
+; vector index is used.
+
+define amdgpu_kernel void @non_constant_index(i32 %0) {
+; CHECK-LABEL: define amdgpu_kernel void @non_constant_index(
+; CHECK-SAME: i32 [[TMP0:%.*]]) {
+; CHECK-NEXT:    br label [[TMP2:%.*]]
+; CHECK:       2:
+; CHECK-NEXT:    br label [[TMP2]]
+; CHECK:       3:
+; CHECK-NEXT:    br label [[TMP4:%.*]]
+; CHECK:       4:
+; CHECK-NEXT:    [[PROMOTEALLOCA:%.*]] = phi <2 x float> [ [[TMP7:%.*]], [[TMP4]] ], [ undef, [[TMP3:%.*]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[PROMOTEALLOCA]], float 0.000000e+00, i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP7]] = insertelement <2 x float> [[TMP5]], float 0.000000e+00, i32 [[TMP6]]
+; CHECK-NEXT:    br label [[TMP4]]
+;
+  %2 = alloca [2 x float], align 4, addrspace(5)
+  br label %3
+
+3:
+  br label %3
+
+4:
+  br label %5
+
+5:
+  %6 = getelementptr float, ptr addrspace(5) %2, i32 %0
+  store <2 x float> zeroinitializer, ptr addrspace(5) %6, align 8
+  br label %5
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/83597