[PATCH] InlineFunction doesn't update InlineFunctionInfo with allocas created for byval arguments
Julien Lerouge
jlerouge at apple.com
Mon Apr 14 11:42:33 PDT 2014
On Fri, Apr 11, 2014 at 12:41:54PM -0700, Manman Ren wrote:
> Hi Julien,
>
> The updated patch looks good overall.
> Since there is refactoring going on, can you separate to two patches, one
> with refactoring but no functionality change, to make review easier?
>
> Thanks,
> Manman
>
Hello,
Attached inline_001.diff that splits the byval argument initialization,
and inline_002.diff that adds the marker for the byval induced allocas
and test case.
Let me know if there is anything else.
Thanks again,
Julien
-------------- next part --------------
commit 32a19ccc0c89f9137c9d75fae512cb371c356eb0
Author: Julien Lerouge <jlerouge at apple.com>
Date: Mon Apr 14 10:53:17 2014 -0700
Split byval argument initialization so the memcpy(s) are injected at the
beginning of the first new block after inlining.
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 86def3e..b2262dc 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -322,6 +322,36 @@ static void UpdateCallGraphAfterInlining(CallSite CS,
CallerNode->removeCallEdgeFor(CS);
}
+static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,
+ BasicBlock *InsertBlock,
+ InlineFunctionInfo &IFI) {
+ LLVMContext &Context = Src->getContext();
+ Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+ Type *AggTy = cast<PointerType>(Src->getType())->getElementType();
+ Type *Tys[3] = { VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context) };
+ Function *MemCpyFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
+ IRBuilder<> builder(InsertBlock->begin());
+ Value *DstCast = builder.CreateBitCast(Dst, VoidPtrTy, "tmp");
+ Value *SrcCast = builder.CreateBitCast(Src, VoidPtrTy, "tmp");
+
+ Value *Size;
+ if (IFI.DL == 0)
+ Size = ConstantExpr::getSizeOf(AggTy);
+ else
+ Size = ConstantInt::get(Type::getInt64Ty(Context),
+ IFI.DL->getTypeStoreSize(AggTy));
+
+ // Always generate a memcpy of alignment 1 here because we don't know
+ // the alignment of the src pointer. Other optimizations can infer
+ // better alignment.
+ Value *CallArgs[] = {
+ DstCast, SrcCast, Size,
+ ConstantInt::get(Type::getInt32Ty(Context), 1),
+ ConstantInt::getFalse(Context) // isVolatile
+ };
+ builder.CreateCall(MemCpyFn, CallArgs);
+}
+
/// HandleByValArgument - When inlining a call site that has a byval argument,
/// we have to make the implicit memcpy explicit by adding it.
static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
@@ -349,11 +379,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
// Otherwise, we have to make a memcpy to get a safe alignment. This is bad
// for code quality, but rarely happens and is required for correctness.
}
-
- LLVMContext &Context = Arg->getContext();
- Type *VoidPtrTy = Type::getInt8PtrTy(Context);
-
// Create the alloca. If we have DataLayout, use nice alignment.
unsigned Align = 1;
if (IFI.DL)
@@ -368,30 +394,6 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
&*Caller->begin()->begin());
- // Emit a memcpy.
- Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
- Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
- Intrinsic::memcpy,
- Tys);
- Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
- Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
-
- Value *Size;
- if (IFI.DL == 0)
- Size = ConstantExpr::getSizeOf(AggTy);
- else
- Size = ConstantInt::get(Type::getInt64Ty(Context),
- IFI.DL->getTypeStoreSize(AggTy));
-
- // Always generate a memcpy of alignment 1 here because we don't know
- // the alignment of the src pointer. Other optimizations can infer
- // better alignment.
- Value *CallArgs[] = {
- DestCast, SrcCast, Size,
- ConstantInt::get(Type::getInt32Ty(Context), 1),
- ConstantInt::getFalse(Context) // isVolatile
- };
- IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
// Uses of the argument in the function should use our new alloca
// instead.
@@ -562,6 +564,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
{ // Scope to destroy VMap after cloning.
ValueToValueMapTy VMap;
+ // Keep a list of pair (dst, src) to emit byval initializations.
+ SmallVector<std::pair<Value*, Value*>, 4> ByValInit;
assert(CalledFunc->arg_size() == CS.arg_size() &&
"No varargs calls can be inlined!");
@@ -585,7 +589,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Calls that we inline may use the new alloca, so we need to clear
// their 'tail' flags if HandleByValArgument introduced a new alloca and
// the callee has calls.
- MustClearTailCallFlags |= ActualArg != *AI;
+ if (ActualArg != *AI) {
+ MustClearTailCallFlags = true;
+ ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));
+ }
+
}
VMap[I] = ActualArg;
@@ -602,6 +610,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
+ // Inject byval arguments initialization.
+ for (std::pair<Value*, Value*> &Init : ByValInit)
+ HandleByValArgumentInit(Init.first, Init.second, Caller->getParent(),
+ FirstNewBlock, IFI);
+
// Update the callgraph if requested.
if (IFI.CG)
UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
-------------- next part --------------
commit e876da8e1e3e36d145536abf033e9a22ae49f8cd
Author: Julien Lerouge <jlerouge at apple.com>
Date: Mon Apr 14 11:27:45 2014 -0700
Add lifetime markers for allocas created to hold byval arguments.
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index b2262dc..5692d91 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -394,6 +394,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(),
&*Caller->begin()->begin());
+ IFI.StaticAllocas.push_back(cast<AllocaInst>(NewAlloca));
// Uses of the argument in the function should use our new alloca
// instead.
diff --git a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
index b37b9f2..07ea5fc 100644
--- a/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
+++ b/test/Transforms/Inline/2010-05-31-ByvalTailcall.ll
@@ -18,7 +18,8 @@ define void @bar(i32* byval %x) {
define void @foo(i32* %x) {
; CHECK-LABEL: define void @foo(
-; CHECK: store i32 %1, i32* %x
+; CHECK: llvm.lifetime.start
+; CHECK: store i32 %2, i32* %x
call void @bar(i32* byval %x)
ret void
}
diff --git a/test/Transforms/Inline/byval_lifetime.ll b/test/Transforms/Inline/byval_lifetime.ll
new file mode 100644
index 0000000..e8dff2a
--- /dev/null
+++ b/test/Transforms/Inline/byval_lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+; END.
+
+; By inlining foo, an alloca is created in main to hold the byval argument, so
+; a lifetime marker should be generated as well by default.
+
+%struct.foo = type { i32, [16 x i32] }
+
+ at gFoo = global %struct.foo zeroinitializer, align 8
+
+define i32 @foo(%struct.foo* byval align 8 %f, i32 %a) {
+entry:
+ %a1 = getelementptr inbounds %struct.foo* %f, i32 0, i32 1
+ %arrayidx = getelementptr inbounds [16 x i32]* %a1, i32 0, i32 %a
+ %tmp2 = load i32* %arrayidx, align 1
+ ret i32 %tmp2
+}
+
+define i32 @main(i32 %argc, i8** %argv) {
+; CHECK-LABEL: @main
+; CHECK: llvm.lifetime.start
+; CHECK: memcpy
+entry:
+ %call = call i32 @foo(%struct.foo* byval align 8 @gFoo, i32 %argc)
+ ret i32 %call
+}
More information about the llvm-commits
mailing list