[llvm] 4cf8b29 - [AMDGPU][PromoteAlloca] Correctly handle a variable vector index (#83597)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 06:18:23 PST 2024
Author: bcahoon
Date: 2024-03-05T08:18:17-06:00
New Revision: 4cf8b298cf1837e75243a299ddefd59e6ed80e1b
URL: https://github.com/llvm/llvm-project/commit/4cf8b298cf1837e75243a299ddefd59e6ed80e1b
DIFF: https://github.com/llvm/llvm-project/commit/4cf8b298cf1837e75243a299ddefd59e6ed80e1b.diff
LOG: [AMDGPU][PromoteAlloca] Correctly handle a variable vector index (#83597)
The promote alloca to vector transformation assumes that the
vector index is a constant value. If it is not a constant, then
either an assert occurs or the tranformation generates an
incorrect index.
Added:
llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index c1b244f50d93f8..b1b15e9915aea3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -401,14 +401,16 @@ static Value *promoteAllocaUserToVector(
// We're loading the full vector.
Type *AccessTy = Inst->getType();
TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
- if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
- if (AccessTy->isPtrOrPtrVectorTy())
- CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
- else if (CurVal->getType()->isPtrOrPtrVectorTy())
- CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());
- Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
- Inst->replaceAllUsesWith(NewVal);
- return nullptr;
+ if (Constant *CI = dyn_cast<Constant>(Index)) {
+ if (CI->isZeroValue() && AccessSize == VecStoreSize) {
+ if (AccessTy->isPtrOrPtrVectorTy())
+ CurVal = CreateTempPtrIntCast(CurVal, AccessTy);
+ else if (CurVal->getType()->isPtrOrPtrVectorTy())
+ CurVal = CreateTempPtrIntCast(CurVal, CurVal->getType());
+ Value *NewVal = Builder.CreateBitOrPointerCast(CurVal, AccessTy);
+ Inst->replaceAllUsesWith(NewVal);
+ return nullptr;
+ }
}
// Loading a subvector.
@@ -456,12 +458,14 @@ static Value *promoteAllocaUserToVector(
// We're storing the full vector, we can handle this without knowing CurVal.
Type *AccessTy = Val->getType();
TypeSize AccessSize = DL.getTypeStoreSize(AccessTy);
- if (AccessSize == VecStoreSize && cast<Constant>(Index)->isZeroValue()) {
- if (AccessTy->isPtrOrPtrVectorTy())
- Val = CreateTempPtrIntCast(Val, AccessTy);
- else if (VectorTy->isPtrOrPtrVectorTy())
- Val = CreateTempPtrIntCast(Val, VectorTy);
- return Builder.CreateBitOrPointerCast(Val, VectorTy);
+ if (Constant *CI = dyn_cast<Constant>(Index)) {
+ if (CI->isZeroValue() && AccessSize == VecStoreSize) {
+ if (AccessTy->isPtrOrPtrVectorTy())
+ Val = CreateTempPtrIntCast(Val, AccessTy);
+ else if (VectorTy->isPtrOrPtrVectorTy())
+ Val = CreateTempPtrIntCast(Val, VectorTy);
+ return Builder.CreateBitOrPointerCast(Val, VectorTy);
+ }
}
// Storing a subvector.
diff --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
new file mode 100644
index 00000000000000..272a9ebe536c9c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-non-constant-index.ll
@@ -0,0 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -passes=amdgpu-promote-alloca < %s | FileCheck %s
+
+; Check that promoting an alloca to a vector form works correctly when a variable
+; vector index is used.
+
+define amdgpu_kernel void @non_constant_index(i32 %arg) {
+; CHECK-LABEL: define amdgpu_kernel void @non_constant_index(
+; CHECK-SAME: i32 [[ARG:%.*]]) {
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB1:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB3:%.*]]
+; CHECK: bb3:
+; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <2 x float> [ [[TMP2:%.*]], [[BB3]] ], [ undef, [[BB2:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> [[PROMOTEALLOCA]], float 0.000000e+00, i32 [[ARG]]
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[ARG]], 1
+; CHECK-NEXT: [[TMP2]] = insertelement <2 x float> [[TMP0]], float 0.000000e+00, i32 [[TMP1]]
+; CHECK-NEXT: br label [[BB3]]
+;
+bb:
+ %i = alloca [2 x float], align 4, addrspace(5)
+ br label %bb1
+
+bb1:
+ br label %bb1
+
+bb2:
+ br label %bb3
+
+bb3:
+ %i4 = getelementptr float, ptr addrspace(5) %i, i32 %arg
+ store <2 x float> zeroinitializer, ptr addrspace(5) %i4, align 8
+ br label %bb3
+}
More information about the llvm-commits
mailing list