[llvm] 512e806 - [AMDGPU] Bail alloca vectorization if GEP not found

Tue May 26 14:02:51 PDT 2020

Author: Stanislav Mekhanoshin
Date: 2020-05-26T13:59:49-07:00
New Revision: 512e806a33e80058a409d205a378a6e6fc2ef39d

URL: https://github.com/llvm/llvm-project/commit/512e806a33e80058a409d205a378a6e6fc2ef39d
DIFF: https://github.com/llvm/llvm-project/commit/512e806a33e80058a409d205a378a6e6fc2ef39d.diff

LOG: [AMDGPU] Bail alloca vectorization if GEP not found

Differential Revision: https://reviews.llvm.org/D80587

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
    llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 03e927b3cdc4..036f5440dc75 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -339,7 +339,9 @@ static Value *stripBitcasts(Value *V) {
 static Value *
 calculateVectorIndex(Value *Ptr,
                      const std::map<GetElementPtrInst *, Value *> &GEPIdx) {
-  GetElementPtrInst *GEP = cast<GetElementPtrInst>(stripBitcasts(Ptr));
+  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(stripBitcasts(Ptr));
+  if (!GEP)
+    return nullptr;
 
   auto I = GEPIdx.find(GEP);
   return I == GEPIdx.end() ? nullptr : I->second;
@@ -496,10 +498,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) {
       if (Inst->getType() == AllocaTy || Inst->getType()->isVectorTy())
         break;
 
-      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
       Value *Ptr = cast<LoadInst>(Inst)->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      if (!Index)
+        break;
 
+      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
       Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
       Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index);
@@ -515,9 +519,12 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL) {
           SI->getValueOperand()->getType()->isVectorTy())
         break;
 
-      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
       Value *Ptr = SI->getPointerOperand();
       Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx);
+      if (!Index)
+        break;
+
+      Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS);
       Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
       Value *VecValue = Builder.CreateLoad(VectorTy, BitCast);
       Value *Elt = SI->getValueOperand();

diff  --git a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
index 15da72db4abb..da52bcee3637 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-alloca-vector-to-vector.ll
@@ -189,5 +189,23 @@ entry:
   ret void
 }
 
+; GCN-LABEL: {{^}}ptr_alloca_bitcast:
+; OPT-LABEL: define i64 @ptr_alloca_bitcast
+
+; GCN-NOT: buffer_
+; GCN: v_mov_b32_e32 v1, 0
+
+; OPT: %private_iptr = alloca <2 x i32>, align 8, addrspace(5)
+; OPT: %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)*
+; OPT: %tmp1 = load i64, i64 addrspace(5)* %cast, align 8
+
+define i64 @ptr_alloca_bitcast() {
+entry:
+  %private_iptr = alloca <2 x i32>, align 8, addrspace(5)
+  %cast = bitcast <2 x i32> addrspace(5)* %private_iptr to i64 addrspace(5)*
+  %tmp1 = load i64, i64 addrspace(5)* %cast, align 8
+  ret i64 %tmp1
+}
+
 declare i32 @llvm.amdgcn.workitem.id.x()
 declare i32 @llvm.amdgcn.workitem.id.y()