[PATCH] D79738: [AMDGPU] Fix promote alloca which is already vector

Mon May 11 12:56:08 PDT 2020

rampitec created this revision.
rampitec added reviewers: arsenm, foad, piotr.
Herald added subscribers: kerbowa, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl.
Herald added a project: LLVM.

Just do not touch loads and stores which are already vector.
Previously pass was just unable to see these loads and stores
because these were hidden bitcasts.


https://reviews.llvm.org/D79738

Files:
  llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
  llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll


Index: llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
+++ llvm/test/CodeGen/AMDGPU/vector-alloca-bitcast.ll
@@ -345,6 +345,50 @@
   ret void
 }
 
+; OPT-LABEL: @bitcast_vector_to_vector(
+; OPT-NOT:   alloca
+; OPT:       store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(1)* %out, align 16
+
+; GCN-LABEL: {{^}}bitcast_vector_to_vector:
+; GCN: v_mov_b32_e32 v0, 1
+; GCN: v_mov_b32_e32 v1, 2
+; GCN: v_mov_b32_e32 v2, 3
+; GCN: v_mov_b32_e32 v3, 4
+
+; GCN: ScratchSize: 0
+
+define amdgpu_kernel void @bitcast_vector_to_vector(<4 x i32> addrspace(1)* %out)  {
+.entry:
+  %alloca = alloca <4 x float>, align 16, addrspace(5)
+  %cast = bitcast <4 x float> addrspace(5)* %alloca to <4 x i32> addrspace(5)*
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %cast
+  %load = load <4 x i32>, <4 x i32> addrspace(5)* %cast, align 16
+  store <4 x i32> %load, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
+; OPT-LABEL: @vector_bitcast_from_alloca_array(
+; OPT-NOT:   alloca
+; OPT:       store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(1)* %out, align 16
+
+; GCN-LABEL: {{^}}vector_bitcast_from_alloca_array:
+; GCN: v_mov_b32_e32 v0, 1
+; GCN: v_mov_b32_e32 v1, 2
+; GCN: v_mov_b32_e32 v2, 3
+; GCN: v_mov_b32_e32 v3, 4
+
+; GCN: ScratchSize: 0
+
+define amdgpu_kernel void @vector_bitcast_from_alloca_array(<4 x i32> addrspace(1)* %out)  {
+.entry:
+  %alloca = alloca [4 x float], align 16, addrspace(5)
+  %cast = bitcast [4 x float] addrspace(5)* %alloca to <4 x i32> addrspace(5)*
+  store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> addrspace(5)* %cast
+  %load = load <4 x i32>, <4 x i32> addrspace(5)* %cast, align 16
+  store <4 x i32> %load, <4 x i32> addrspace(1)* %out
+  ret void
+}
+
 declare void @llvm.lifetime.start.p5i8(i64 immarg, i8 addrspace(5)* nocapture)
 
 declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture)
Index: llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -468,7 +468,7 @@
     IRBuilder<> Builder(Inst);
     switch (Inst->getOpcode()) {
     case Instruction::Load: {
-      if (Inst->getType() == AllocaTy)
+      if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
         break;
 
       Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
@@ -486,7 +486,8 @@
     }
     case Instruction::Store: {
       StoreInst *SI = cast<StoreInst>(Inst);
-      if (SI->getValueOperand()->getType() == AllocaTy)
+      if (SI->getValueOperand()->getType() == AllocaTy ||
+          SI->getValueOperand()->getType()->isVectorTy())
         break;
 
       Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D79738.263245.patch
Type: text/x-patch
Size: 2967 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200511/4e831aaa/attachment.bin>