[PATCH] InlineFunction doesn't update InlineFunctionInfo with allocas created for byval arguments

Julien Lerouge jlerouge at apple.com
Mon Apr 14 11:42:33 PDT 2014




On Fri, Apr 11, 2014 at 12:41:54PM -0700, Manman Ren wrote:
> Hi Julien,
> 
> The updated patch looks good overall.
> Since there is refactoring going on, can you separate to two patches, one
> with refactoring but no functionality change, to make review easier?
> 
> Thanks,
> Manman
> 

Hello,

Attached inline_001.diff that splits the byval argument initialization,
and inline_002.diff that adds the marker for the byval induced allocas
and test case.

Let me know if there is anything else.

Thanks again,
Julien
-------------- next part --------------
commit 32a19ccc0c89f9137c9d75fae512cb371c356eb0
Author: Julien Lerouge <jlerouge at apple.com>
Date:   Mon Apr 14 10:53:17 2014 -0700

    Split byval argument initialization so the memcpy(s) are injected at the
    beginning of the first new block after inlining.

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 86def3e..b2262dc 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -322,6 +322,36 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
   CallerNode->removeCallEdgeFor(CS);
 }
 
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+                                    BasicBlock *InsertBlock,
+                                    InlineFunctionInfo &IFI) {
+  LLVMContext &Context = Src->getContext();
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+  Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
+  Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+  IRBuilder<> builder(InsertBlock->begin());
+  Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
+  Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+
+  Value *Size;
+  if (IFI.DL == 0)
+    Size = ConstantExpr::getSizeOf(AggTy);
+  else
+    Size = ConstantInt::get(Type::getInt64Ty(Context),
+                            IFI.DL->getTypeStoreSize(AggTy));
+
+  // Always generate a memcpy of alignment 1 here because we don't know
+  // the alignment of the src pointer.  Other optimizations can infer
+  // better alignment.
+  Value *CallArgs[] = {
+    DstCast, SrcCast, Size,
+    ConstantInt::get(Type::getInt32Ty(Context), 1),
+    ConstantInt::getFalse(Context) // isVolatile
+  };
+  builder.CreateCall(MemCpyFn, CallArgs);
+}
+
 /// HandleByValArgument - When inlining a call site that has a byval argument,
 /// we have to make the implicit memcpy explicit by adding it.
 static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
@@ -349,11 +379,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     // Otherwise, we have to make a memcpy to get a safe alignment.  This is bad
     // for code quality, but rarely happens and is required for correctness.
   }
-  
-  LLVMContext &Context = Arg->getContext();
 
-  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-  
   // Create the alloca.  If we have DataLayout, use nice alignment.
   unsigned Align = 1;
   if (IFI.DL)
@@ -368,30 +394,6 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
-  // Emit a memcpy.
-  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
-  Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
-                                                 Intrinsic::memcpy, 
-                                                 Tys);
-  Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
-  Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
-  
-  Value *Size;
-  if (IFI.DL == 0)
-    Size = ConstantExpr::getSizeOf(AggTy);
-  else
-    Size = ConstantInt::get(Type::getInt64Ty(Context),
-                            IFI.DL->getTypeStoreSize(AggTy));
-  
-  // Always generate a memcpy of alignment 1 here because we don't know
-  // the alignment of the src pointer.  Other optimizations can infer
-  // better alignment.
-  Value *CallArgs[] = {
-    DestCast, SrcCast, Size,
-    ConstantInt::get(Type::getInt32Ty(Context), 1),
-    ConstantInt::getFalse(Context) // isVolatile
-  };
-  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
   
   // Uses of the argument in the function should use our new alloca
   // instead.
@@ -562,6 +564,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
 
   { // Scope to destroy VMap after cloning.
     ValueToValueMapTy VMap;
+    // Keep a list of pair (dst, src) to emit byval initializations.
+    SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
 
     assert(CalledFunc->arg_size() == CS.arg_size() &&
            "No varargs calls can be inlined!");
@@ -585,7 +589,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
         // Calls that we inline may use the new alloca, so we need to clear
         // their 'tail' flags if HandleByValArgument introduced a new alloca and
         // the callee has calls.
-        MustClearTailCallFlags |= ActualArg != *AI;
+        if (ActualArg != *AI) {
+          MustClearTailCallFlags = true;
+          ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
+        }
+
       }
 
       VMap[I] = ActualArg;
@@ -602,6 +610,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Remember the first block that is newly cloned over.
     FirstNewBlock = LastBlock; ++FirstNewBlock;
 
+    // Inject byval arguments initialization.
+    for (std::pair<Value*, Value*> &Init : ByValInit)
+      HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+                              FirstNewBlock, IFI);
+
     // Update the callgraph if requested.
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
-------------- next part --------------
commit e876da8e1e3e36d145536abf033e9a22ae49f8cd
Author: Julien Lerouge <jlerouge at apple.com>
Date:   Mon Apr 14 11:27:45 2014 -0700

    Add lifetime markers for allocas created to hold byval arguments.

diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index b2262dc..5692d91 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -394,6 +394,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
+  IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
   
   // Uses of the argument in the function should use our new alloca
   // instead.
diff --git a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
index b37b9f2..07ea5fc 100644
--- a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
+++ b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
@@ -18,7 +18,8 @@ define void @bar(i32* byval %x) {
 
 define void @foo(i32* %x) {
 ; CHECK-LABEL: define void @foo(
-; CHECK: store i32 %1, i32* %x
+; CHECK: llvm.lifetime.start
+; CHECK: store i32 %2, i32* %x
   call void @bar(i32* byval %x)
   ret void
 }
diff --git a/test/Transforms/Inline/byval_lifetime.ll b/test/Transforms/Inline/byval_lifetime.ll
new file mode 100644
index 0000000..e8dff2a
--- /dev/null
+++ b/test/Transforms/Inline/byval_lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+; END.
+
+; By inlining foo, an alloca is created in main to hold the byval argument, so
+; a lifetime marker should be generated as well by default.
+
+%struct.foo = type { i32, [16 x i32] }
+
+ at gFoo = global %struct.foo zeroinitializer, align 8
+
+define i32 @foo(%struct.foo* byval align 8 %f, i32 %a) {
+entry:
+  %a1 = getelementptr inbounds %struct.foo* %f, i32 0, i32 1
+  %arrayidx = getelementptr inbounds [16 x i32]* %a1, i32 0, i32 %a
+  %tmp2 = load i32* %arrayidx, align 1
+  ret i32 %tmp2
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+; CHECK-LABEL: @main
+; CHECK: llvm.lifetime.start
+; CHECK: memcpy
+entry:
+  %call = call i32 @foo(%struct.foo* byval align 8 @gFoo, i32 %argc)
+  ret i32 %call
+}


More information about the llvm-commits mailing list