[PATCH] D79738: [AMDGPU] Fix promote alloca which is already vector
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 11 12:56:08 PDT 2020
rampitec created this revision.
rampitec added reviewers: arsenm, foad, piotr.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.
Just do not touch loads and stores which are already vector.
Previously pass was just unable to see these loads and stores
because these were hidden bitcasts.
https://reviews.llvm.org/D79738
Files:
llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
Index: llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
+++ llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
@@ -345,6 +345,50 @@
ret void
}
+; OPT-LABEL: @bitcast_vector_to_vector(
+; OPT-NOT: alloca
+; OPT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(1)* %out, align 16
+
+; GCN-LABEL: {{^}}bitcast_vector_to_vector:
+; GCN: v_mov_b32_e32 v0, 1
+; GCN: v_mov_b32_e32 v1, 2
+; GCN: v_mov_b32_e32 v2, 3
+; GCN: v_mov_b32_e32 v3, 4
+
+; GCN: ScratchSize: 0
+
+define amdgpu_kernel void @bitcast_vector_to_vector(<4 x i32> addrspace(1)* %out) {
+.entry:
+ %alloca = alloca <4 x float>, align 16, addrspace(5)
+ %cast = bitcast <4 x float> addrspace(5)* %alloca to <4 x i32> addrspace(5)*
+ store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %cast
+ %load = load <4 x i32>, <4 x i32> addrspace(5)* %cast, align 16
+ store <4 x i32> %load, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
+; OPT-LABEL: @vector_bitcast_from_alloca_array(
+; OPT-NOT: alloca
+; OPT: store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(1)* %out, align 16
+
+; GCN-LABEL: {{^}}vector_bitcast_from_alloca_array:
+; GCN: v_mov_b32_e32 v0, 1
+; GCN: v_mov_b32_e32 v1, 2
+; GCN: v_mov_b32_e32 v2, 3
+; GCN: v_mov_b32_e32 v3, 4
+
+; GCN: ScratchSize: 0
+
+define amdgpu_kernel void @vector_bitcast_from_alloca_array(<4 x i32> addrspace(1)* %out) {
+.entry:
+ %alloca = alloca [4 x float], align 16, addrspace(5)
+ %cast = bitcast [4 x float] addrspace(5)* %alloca to <4 x i32> addrspace(5)*
+ store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %cast
+ %load = load <4 x i32>, <4 x i32> addrspace(5)* %cast, align 16
+ store <4 x i32> %load, <4 x i32> addrspace(1)* %out
+ ret void
+}
+
declare void @llvm.lifetime.start.p5i8(i64 immarg, i8 addrspace(5)* nocapture)
declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture)
Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -468,7 +468,7 @@
IRBuilder<> Builder(Inst);
switch (Inst->getOpcode()) {
case Instruction::Load: {
- if (Inst->getType() == AllocaTy)
+ if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
break;
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
@@ -486,7 +486,8 @@
}
case Instruction::Store: {
StoreInst *SI = cast<StoreInst>(Inst);
- if (SI->getValueOperand()->getType() == AllocaTy)
+ if (SI->getValueOperand()->getType() == AllocaTy ||
+ SI->getValueOperand()->getType()->isVectorTy())
break;
Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D79738.263245.patch
Type: text/x-patch
Size: 2967 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200511/4e831aaa/attachment.bin>
More information about the llvm-commits
mailing list