[llvm] 2862379 - InstCombine: Gracefully handle more allocas in the wrong address space

Fri Dec 24 06:21:37 PST 2021

Author: Matt Arsenault
Date: 2021-12-24T08:59:26-05:00
New Revision: 286237962ad35bc4756947c32310838ad4c76451

URL: https://github.com/llvm/llvm-project/commit/286237962ad35bc4756947c32310838ad4c76451
DIFF: https://github.com/llvm/llvm-project/commit/286237962ad35bc4756947c32310838ad4c76451.diff

LOG: InstCombine: Gracefully handle more allocas in the wrong address space

Officially this is currently required to always use the datalayout's
alloca address space. This may change in the future, and it's cleaner
to propagate the existing alloca's addrspace anyway.

This is a triple fix. Initially the change in simplifyAllocaArraySize
would drop the address space, but produce output. Fixing this hit an
assertion in the cast combine.

This patch also makes the changes to handle this situation from
a33e12801279a947c74fdee2655b24480941fb39 dead, so eliminate
it. InstCombine should not take it upon itself to introduce
addrspacecasts, and preserve the original address space instead.

Added: 
    

Modified: 
    llvm/include/llvm/IR/Instructions.h
    llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
    llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
    llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index 046e9b5e809e5..ccf17628e265b 100644

--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -105,6 +105,11 @@ class AllocaInst : public UnaryInstruction {
     return cast<PointerType>(Instruction::getType());
   }
 
+  /// Return the address space for the allocation.
+  unsigned getAddressSpace() const {
+    return getType()->getAddressSpace();
+  }
+
   /// Get allocation size in bits. Returns None if size can't be determined,
   /// e.g. in case of a VLA.
   Optional<TypeSize> getAllocationSizeInBits(const DataLayout &DL) const;

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 18eb245779bf9..8df4a4529f472 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -157,7 +157,7 @@ Instruction *InstCombinerImpl::PromoteCastOfAllocation(BitCastInst &CI,
     Amt = Builder.CreateAdd(Amt, Off);
   }
 
-  AllocaInst *New = Builder.CreateAlloca(CastElTy, Amt);
+  AllocaInst *New = Builder.CreateAlloca(CastElTy, AI.getAddressSpace(), Amt);
   New->setAlignment(AI.getAlign());
   New->takeName(&AI);
   New->setUsedWithInAlloca(AI.isUsedWithInAlloca());

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 286a947fc603a..0dbfdba353c4d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -183,7 +183,8 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
   if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) {
     if (C->getValue().getActiveBits() <= 64) {
       Type *NewTy = ArrayType::get(AI.getAllocatedType(), C->getZExtValue());
-      AllocaInst *New = IC.Builder.CreateAlloca(NewTy, nullptr, AI.getName());
+      AllocaInst *New = IC.Builder.CreateAlloca(NewTy, AI.getAddressSpace(),
+                                                nullptr, AI.getName());
       New->setAlignment(AI.getAlign());
 
       // Scan to the end of the allocation instructions, to skip over a block of
@@ -199,21 +200,13 @@ static Instruction *simplifyAllocaArraySize(InstCombinerImpl &IC,
       Type *IdxTy = IC.getDataLayout().getIntPtrType(AI.getType());
       Value *NullIdx = Constant::getNullValue(IdxTy);
       Value *Idx[2] = {NullIdx, NullIdx};
-      Instruction *NewI = GetElementPtrInst::CreateInBounds(
+      Instruction *GEP = GetElementPtrInst::CreateInBounds(
           NewTy, New, Idx, New->getName() + ".sub");
-      IC.InsertNewInstBefore(NewI, *It);
-
-      // Gracefully handle allocas in other address spaces.
-      if (AI.getType()->getPointerAddressSpace() !=
-          NewI->getType()->getPointerAddressSpace()) {
-        NewI =
-            CastInst::CreatePointerBitCastOrAddrSpaceCast(NewI, AI.getType());
-        IC.InsertNewInstBefore(NewI, *It);
-      }
+      IC.InsertNewInstBefore(GEP, *It);
 
       // Now make everything use the getelementptr instead of the original
       // allocation.
-      return IC.replaceInstUsesWith(AI, NewI);
+      return IC.replaceInstUsesWith(AI, GEP);
     }
   }
 

diff  --git a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll
index c1c45b8e5ab34..c8987e11a7239 100644
--- a/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll
+++ b/llvm/test/Transforms/InstCombine/alloca-in-non-alloca-as.ll
@@ -11,11 +11,9 @@ declare void @use(i8*, i32**)
 define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) {
 ; CHECK-LABEL: @__omp_offloading_802_ea0109_main_l8(
 ; CHECK-NEXT:  .master:
-; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32*, align 1, addrspace(5)
-; CHECK-NEXT:    [[DOTSUB:%.*]] = bitcast i32* addrspace(5)* [[TMP0]] to i8 addrspace(5)*
-; CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast i8 addrspace(5)* [[DOTSUB]] to i8*
-; CHECK-NEXT:    [[A_ON_STACK:%.*]] = addrspacecast i32* addrspace(5)* [[TMP0]] to i32**
-; CHECK-NEXT:    call void @use(i8* [[TMP1]], i32** [[A_ON_STACK]])
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32*, align 1
+; CHECK-NEXT:    [[DOTSUB:%.*]] = bitcast i32** [[TMP0]] to i8*
+; CHECK-NEXT:    call void @use(i8* [[DOTSUB]], i32** [[TMP0]])
 ; CHECK-NEXT:    ret void
 ;
 .master:
@@ -25,3 +23,22 @@ define weak amdgpu_kernel void @__omp_offloading_802_ea0109_main_l8(i32* %a) {
   call void @use(i8* %0, i32** %a_on_stack)
   ret void
 }
+
+%struct.widget = type { [8 x i8] }
+
+define void @spam(i64* %arg1) {
+; CHECK-LABEL: @spam(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[ALLOCA1:%.*]] = alloca [0 x [30 x %struct.widget]], align 16
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [0 x [30 x %struct.widget]], [0 x [30 x %struct.widget]]* [[ALLOCA1]], i64 0, i64 0, i64 0
+; CHECK-NEXT:    call void @zot(%struct.widget* [[GEP]])
+; CHECK-NEXT:    ret void
+;
+bb:
+  %alloca = alloca [30 x %struct.widget], i32 0, align 16
+  %gep = getelementptr inbounds [30 x %struct.widget], [30 x %struct.widget]* %alloca, i64 0, i64 0
+  call void @zot(%struct.widget* %gep)
+  ret void
+}
+
+declare hidden void @zot(%struct.widget*)