[llvm-branch-commits] [llvm] 1e23802 - [IROutliner] Merging identical output blocks for extracted functions.
Andrew Litteken via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Dec 28 19:21:49 PST 2020
Author: Andrew Litteken
Date: 2020-12-28T21:01:48-06:00
New Revision: 1e23802507d18ef8cb5a063325ff442ac7f527be
URL: https://github.com/llvm/llvm-project/commit/1e23802507d18ef8cb5a063325ff442ac7f527be
DIFF: https://github.com/llvm/llvm-project/commit/1e23802507d18ef8cb5a063325ff442ac7f527be.diff
LOG: [IROutliner] Merging identical output blocks for extracted functions.
Many of the sets of output stores will be the same. When a block is
created, we check if there is an output block with the same set of store
instructions. If there is, we map the output block of the region back
to the block, so that the extra argument controlling the switch
statement can be set to the appropriate block value.
Tests:
- llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll
Reviewers: jroelofs, paquette
Differential Revision: https://reviews.llvm.org/D87298
Added:
Modified:
llvm/lib/Transforms/IPO/IROutliner.cpp
llvm/test/Transforms/IROutliner/extraction.ll
llvm/test/Transforms/IROutliner/illegal-assumes.ll
llvm/test/Transforms/IROutliner/illegal-memcpy.ll
llvm/test/Transforms/IROutliner/illegal-memmove.ll
llvm/test/Transforms/IROutliner/illegal-vaarg.ll
llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll
llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp
index ec6bfaef26ec..4c0e09911ab9 100644
--- a/llvm/lib/Transforms/IPO/IROutliner.cpp
+++ b/llvm/lib/Transforms/IPO/IROutliner.cpp
@@ -53,6 +53,11 @@ struct OutlinableGroup {
/// The return block for the overall function.
BasicBlock *EndBB = nullptr;
+ /// A set containing the
diff erent GVN store sets needed. Each array contains
+ /// a sorted list of the
diff erent values that need to be stored into output
+ /// registers.
+ DenseSet<ArrayRef<unsigned>> OutputGVNCombinations;
+
/// Flag for whether the \ref ArgumentTypes have been defined after the
/// extraction of the first region.
bool InputTypesSet = false;
@@ -67,6 +72,13 @@ struct OutlinableGroup {
/// \param [in,out] NotSame contains the global value numbers where the
/// constant is not always the same, and must be passed in as an argument.
void findSameConstants(DenseSet<unsigned> &NotSame);
+
+ /// For the regions, look at each set of GVN stores needed and account for
+ /// each combination. Add an argument to the argument types if there is
+ /// more than one combination.
+ ///
+ /// \param [in] M - The module we are outlining from.
+ void collectGVNStoreSets(Module &M);
};
/// Move the contents of \p SourceBB to before the last instruction of \p
@@ -266,6 +278,17 @@ void OutlinableGroup::findSameConstants(DenseSet<unsigned> &NotSame) {
collectRegionsConstants(*Region, GVNToConstant, NotSame);
}
+void OutlinableGroup::collectGVNStoreSets(Module &M) {
+ for (OutlinableRegion *OS : Regions)
+ OutputGVNCombinations.insert(OS->GVNStores);
+
+ // We are adding an extracted argument to decide between which output path
+ // to use in the basic block. It is used in a switch statement and only
+ // needs to be an integer.
+ if (OutputGVNCombinations.size() > 1)
+ ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
+}
+
Function *IROutliner::createFunction(Module &M, OutlinableGroup &Group,
unsigned FunctionNameSuffix) {
assert(!Group.OutlinedFunction && "Function is already defined!");
@@ -655,7 +678,7 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
for (unsigned AggArgIdx = 0; AggArgIdx < AggFunc->arg_size(); AggArgIdx++) {
if (AggArgIdx == AggFunc->arg_size() - 1 &&
- Group.ArgumentTypes.size() > Group.NumAggregateInputs) {
+ Group.OutputGVNCombinations.size() > 1) {
// If we are on the last argument, and we need to
diff erentiate between
// output blocks, add an integer to the argument list to determine
// what block to take
@@ -703,9 +726,9 @@ CallInst *replaceCalledFunction(Module &M, OutlinableRegion &Region) {
Call);
// It is possible that the call to the outlined function is either the first
- // instruction in the new block, the last instruction, or both. If either of
- // these is the case, we need to make sure that we replace the instruction in
- // the IRInstructionData struct with the new call.
+ // instruction is in the new block, the last instruction, or both. If either
+ // of these is the case, we need to make sure that we replace the instruction
+ // in the IRInstructionData struct with the new call.
CallInst *OldCall = Region.Call;
if (Region.NewFront->Inst == OldCall)
Region.NewFront->Inst = Call;
@@ -831,8 +854,51 @@ collectRelevantInstructions(Function &F,
return RelevantInstructions;
}
+/// It is possible that there is a basic block that already performs the same
+/// stores. This returns a duplicate block, if it exists
+///
+/// \param OutputBB [in] the block we are looking for a duplicate of.
+/// \param OutputStoreBBs [in] The existing output blocks.
+/// \returns an optional value with the number output block if there is a match.
+Optional<unsigned>
+findDuplicateOutputBlock(BasicBlock *OutputBB,
+ ArrayRef<BasicBlock *> OutputStoreBBs) {
+
+ bool WrongInst = false;
+ bool WrongSize = false;
+ unsigned MatchingNum = 0;
+ for (BasicBlock *CompBB : OutputStoreBBs) {
+ WrongInst = false;
+ if (CompBB->size() - 1 != OutputBB->size()) {
+ WrongSize = true;
+ MatchingNum++;
+ continue;
+ }
+
+ WrongSize = false;
+ BasicBlock::iterator NIt = OutputBB->begin();
+ for (Instruction &I : *CompBB) {
+ if (isa<BranchInst>(&I))
+ continue;
+
+ if (!I.isIdenticalTo(&(*NIt))) {
+ WrongInst = true;
+ break;
+ }
+
+ NIt++;
+ }
+ if (!WrongInst && !WrongSize)
+ return MatchingNum;
+
+ MatchingNum++;
+ }
+
+ return None;
+}
+
/// For the outlined section, move needed the StoreInsts for the output
-/// registers into their own block. Then, determine if there is a duplicate
+/// registers into their own block. Then, determine if there is a duplicate
/// output block already created.
///
/// \param [in] OG - The OutlinableGroup of regions to be outlined.
@@ -856,9 +922,9 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
// be contained in a store, we replace the uses of the value with the value
// from the overall function, so that the store is storing the correct
// value from the overall function.
-
DenseSet<BasicBlock *> ExcludeBBs(OutputStoreBBs.begin(),
OutputStoreBBs.end());
+ ExcludeBBs.insert(OutputBB);
std::vector<Instruction *> ExtractedFunctionInsts =
collectRelevantInstructions(*(Region.ExtractedFunction), ExcludeBBs);
std::vector<Instruction *> OverallFunctionInsts =
@@ -890,6 +956,38 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
}
assert(ValuesToFind.size() == 0 && "Not all store values were handled!");
+
+ // If the size of the block is 0, then there are no stores, and we do not
+ // need to save this block.
+ if (OutputBB->size() == 0) {
+ Region.OutputBlockNum = -1;
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ // Determine is there is a duplicate block.
+ Optional<unsigned> MatchingBB =
+ findDuplicateOutputBlock(OutputBB, OutputStoreBBs);
+
+ // If there is, we remove the new output block. If it does not,
+ // we add it to our list of output blocks.
+ if (MatchingBB.hasValue()) {
+ LLVM_DEBUG(dbgs() << "Set output block for region in function"
+ << Region.ExtractedFunction << " to "
+ << MatchingBB.getValue());
+
+ Region.OutputBlockNum = MatchingBB.getValue();
+ OutputBB->eraseFromParent();
+ return;
+ }
+
+ Region.OutputBlockNum = OutputStoreBBs.size();
+
+ LLVM_DEBUG(dbgs() << "Create output block for region in"
+ << Region.ExtractedFunction << " to "
+ << *OutputBB);
+ OutputStoreBBs.push_back(OutputBB);
+ BranchInst::Create(EndBB, OutputBB);
}
/// Create the switch statement for outlined function to
diff erentiate between
@@ -904,27 +1002,46 @@ alignOutputBlockWithAggFunc(OutlinableGroup &OG, OutlinableRegion &Region,
/// \param [in,out] OutputStoreBBs - The existing output blocks.
void createSwitchStatement(Module &M, OutlinableGroup &OG, BasicBlock *EndBB,
ArrayRef<BasicBlock *> OutputStoreBBs) {
- Function *AggFunc = OG.OutlinedFunction;
- // Create a final block
- BasicBlock *ReturnBlock =
- BasicBlock::Create(M.getContext(), "final_block", AggFunc);
- Instruction *Term = EndBB->getTerminator();
- Term->moveBefore(*ReturnBlock, ReturnBlock->end());
- // Put the switch statement in the old end basic block for the function with
- // a fall through to the new return block
- LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
- << OutputStoreBBs.size() << "\n");
- SwitchInst *SwitchI =
- SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1), ReturnBlock,
- OutputStoreBBs.size(), EndBB);
-
- unsigned Idx = 0;
- for (BasicBlock *BB : OutputStoreBBs) {
- SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
- BB);
- Term = BB->getTerminator();
- Term->setSuccessor(0, ReturnBlock);
- Idx++;
+ // We only need the switch statement if there is more than one store
+ // combination.
+ if (OG.OutputGVNCombinations.size() > 1) {
+ Function *AggFunc = OG.OutlinedFunction;
+ // Create a final block
+ BasicBlock *ReturnBlock =
+ BasicBlock::Create(M.getContext(), "final_block", AggFunc);
+ Instruction *Term = EndBB->getTerminator();
+ Term->moveBefore(*ReturnBlock, ReturnBlock->end());
+ // Put the switch statement in the old end basic block for the function with
+ // a fall through to the new return block
+ LLVM_DEBUG(dbgs() << "Create switch statement in " << *AggFunc << " for "
+ << OutputStoreBBs.size() << "\n");
+ SwitchInst *SwitchI =
+ SwitchInst::Create(AggFunc->getArg(AggFunc->arg_size() - 1),
+ ReturnBlock, OutputStoreBBs.size(), EndBB);
+
+ unsigned Idx = 0;
+ for (BasicBlock *BB : OutputStoreBBs) {
+ SwitchI->addCase(ConstantInt::get(Type::getInt32Ty(M.getContext()), Idx),
+ BB);
+ Term = BB->getTerminator();
+ Term->setSuccessor(0, ReturnBlock);
+ Idx++;
+ }
+ return;
+ }
+
+ // If there needs to be stores, move them from the output block to the end
+ // block to save on branching instructions.
+ if (OutputStoreBBs.size() == 1) {
+ LLVM_DEBUG(dbgs() << "Move store instructions to the end block in "
+ << *OG.OutlinedFunction << "\n");
+ BasicBlock *OutputBlock = OutputStoreBBs[0];
+ Instruction *Term = OutputBlock->getTerminator();
+ Term->eraseFromParent();
+ Term = EndBB->getTerminator();
+ moveBBContents(*OutputBlock, *EndBB);
+ Term->moveBefore(*EndBB, EndBB->end());
+ OutputBlock->eraseFromParent();
}
return;
@@ -968,11 +1085,16 @@ static void fillOverallFunction(Module &M, OutlinableGroup &CurrentGroup,
replaceArgumentUses(*CurrentOS, NewBB);
replaceConstants(*CurrentOS);
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) {
+ // If the new basic block has no new stores, we can erase it from the module.
+ // It it does, we create a branch instruction to the last basic block from the
+ // new one.
+ if (NewBB->size() == 0) {
+ CurrentOS->OutputBlockNum = -1;
+ NewBB->eraseFromParent();
+ } else {
BranchInst::Create(CurrentGroup.EndBB, NewBB);
OutputStoreBBs.push_back(NewBB);
- } else
- NewBB->eraseFromParent();
+ }
// Replace the call to the extracted function with the outlined function.
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
@@ -1002,23 +1124,16 @@ void IROutliner::deduplicateExtractedSections(
CurrentGroup.OutlinedFunction);
replaceArgumentUses(*CurrentOS, NewBB);
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs) {
- BranchInst::Create(CurrentGroup.EndBB, NewBB);
- CurrentOS->OutputBlockNum = OutputStoreBBs.size();
- OutputStoreBBs.push_back(NewBB);
- alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
- CurrentGroup.EndBB, OutputMappings,
- OutputStoreBBs);
- } else
- NewBB->eraseFromParent();
+ alignOutputBlockWithAggFunc(CurrentGroup, *CurrentOS, NewBB,
+ CurrentGroup.EndBB, OutputMappings,
+ OutputStoreBBs);
CurrentOS->Call = replaceCalledFunction(M, *CurrentOS);
FuncsToRemove.push_back(CurrentOS->ExtractedFunction);
}
// Create a switch statement to handle the
diff erent output schemes.
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs)
- createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
+ createSwitchStatement(M, CurrentGroup, CurrentGroup.EndBB, OutputStoreBBs);
OutlinedFunctionNum++;
}
@@ -1231,11 +1346,7 @@ unsigned IROutliner::doOutline(Module &M) {
if (CurrentGroup.Regions.empty())
continue;
- // We are adding an extracted argument to decide between which output path
- // to use in the basic block. It is used in a switch statement and only
- // needs to be an integer.
- if (CurrentGroup.ArgumentTypes.size() > CurrentGroup.NumAggregateInputs)
- CurrentGroup.ArgumentTypes.push_back(Type::getInt32Ty(M.getContext()));
+ CurrentGroup.collectGVNStoreSets(M);
// Create functions out of all the sections, and mark them as outlined.
OutlinedRegions.clear();
diff --git a/llvm/test/Transforms/IROutliner/extraction.ll b/llvm/test/Transforms/IROutliner/extraction.ll
index 22d7aa54228b..889b7a2d3a84 100644
--- a/llvm/test/Transforms/IROutliner/extraction.ll
+++ b/llvm/test/Transforms/IROutliner/extraction.ll
@@ -63,7 +63,7 @@ define void @extract_outs1() #0 {
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
@@ -105,7 +105,7 @@ define void @extract_outs2() #0 {
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
diff --git a/llvm/test/Transforms/IROutliner/illegal-assumes.ll b/llvm/test/Transforms/IROutliner/illegal-assumes.ll
index e36d852a91e0..1da29c12338e 100644
--- a/llvm/test/Transforms/IROutliner/illegal-assumes.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-assumes.ll
@@ -14,7 +14,7 @@ define void @outline_assumes() {
; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, i1* [[D]], i1* [[DL_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_3(i1 true, i1* [[D]], i1* [[DL_LOC]])
; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[SPLIT_INST:%.*]] = sub i1 [[DL_RELOAD]], [[DL_RELOAD]]
@@ -51,7 +51,7 @@ define void @outline_assumes2() {
; CHECK-NEXT: [[D:%.*]] = alloca i1, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i1* [[DL_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, i1* [[D]], i1* [[DL_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_3(i1 false, i1* [[D]], i1* [[DL_LOC]])
; CHECK-NEXT: [[DL_RELOAD:%.*]] = load i1, i1* [[DL_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[C]])
diff --git a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
index e59de1e5c99b..3b55672a6a8a 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memcpy.ll
@@ -14,14 +14,14 @@ define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
@@ -43,14 +43,14 @@ define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
diff --git a/llvm/test/Transforms/IROutliner/illegal-memmove.ll b/llvm/test/Transforms/IROutliner/illegal-memmove.ll
index aa2863b24f30..f5d002f8b1e2 100644
--- a/llvm/test/Transforms/IROutliner/illegal-memmove.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-memmove.ll
@@ -14,14 +14,14 @@ define i8 @function1(i8* noalias %s, i8* noalias %d, i64 %len) {
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
@@ -43,14 +43,14 @@ define i8 @function2(i8* noalias %s, i8* noalias %d, i64 %len) {
; CHECK-NEXT: [[RET_LOC:%.*]] = alloca i8, align 1
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[B_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_1(i8* [[S:%.*]], i8* [[D:%.*]], i8* [[A_LOC]], i8* [[B_LOC]])
; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_LOC]], align 1
; CHECK-NEXT: [[B_RELOAD:%.*]] = load i8, i8* [[B_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[A_LOC]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[B_LOC]])
; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* [[D]], i8* [[S]], i64 [[LEN:%.*]], i1 false)
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[RET_LOC]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i8 [[A_RELOAD]], i8 [[B_RELOAD]], i8* [[S]], i8* [[RET_LOC]])
; CHECK-NEXT: [[RET_RELOAD:%.*]] = load i8, i8* [[RET_LOC]], align 1
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[RET_LOC]])
; CHECK-NEXT: ret i8 [[RET_RELOAD]]
diff --git a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
index 100239a61f84..378f0cd25869 100644
--- a/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
+++ b/llvm/test/Transforms/IROutliner/illegal-vaarg.ll
@@ -18,7 +18,7 @@ define i32 @func1(i32 %a, double %b, i8* %v, ...) nounwind {
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]])
; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]])
@@ -56,7 +56,7 @@ define i32 @func2(i32 %a, double %b, i8* %v, ...) nounwind {
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i8** [[AP1_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[A:%.*]], i32* [[A_ADDR]], double [[B:%.*]], double* [[B_ADDR]], i8** [[AP]], i8** [[AP1_LOC]])
; CHECK-NEXT: [[AP1_RELOAD:%.*]] = load i8*, i8** [[AP1_LOC]], align 8
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.va_start(i8* [[AP1_RELOAD]])
diff --git a/llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll b/llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll
index b9ab33721a1f..7d491a2b3099 100644
--- a/llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-remapped-outputs.ll
@@ -21,7 +21,7 @@ define void @outline_outputs1() #0 {
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 2, i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 2, i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
@@ -34,7 +34,7 @@ define void @outline_outputs1() #0 {
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST4]])
; CHECK-NEXT: [[LT_CAST5:%.*]] = bitcast i32* [[DOTLOC2]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST5]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD_RELOAD]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[OUTPUT2]], i32* [[ADD2_LOC]], i32* [[DOTLOC2]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32 [[ADD_RELOAD]], i32* [[OUTPUT]], i32* [[RESULT]], i32* [[OUTPUT2]], i32* [[ADD2_LOC]], i32* [[DOTLOC2]])
; CHECK-NEXT: [[ADD2_RELOAD:%.*]] = load i32, i32* [[ADD2_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD3:%.*]] = load i32, i32* [[DOTLOC2]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST4]])
diff --git a/llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll b/llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll
index f4ddfcefdf7e..fb2ce92c82e0 100644
--- a/llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-same-output-blocks.ll
@@ -18,7 +18,7 @@ define void @outline_outputs1() #0 {
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 0)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
@@ -58,7 +58,7 @@ define void @outline_outputs2() #0 {
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
-; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]], i32 1)
+; CHECK-NEXT: call void @outlined_ir_func_0(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
@@ -83,25 +83,16 @@ entry:
ret void
}
-; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]], i32* [[ARG4:%.*]], i32 [[ARG5:%.*]]) #1 {
+; CHECK: define internal void @outlined_ir_func_0(i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]], i32* [[ARG3:%.*]], i32* [[ARG4:%.*]]) #1 {
; CHECK: entry_after_outline.exitStub:
-; CHECK-NEXT: switch i32 [[ARG5]], label [[BLOCK:%.*]] [
-; CHECK-NEXT: i32 0, label %[[BLOCK_0:.*]]
-; CHECK-NEXT: i32 1, label %[[BLOCK_1:.*]]
+; CHECK-NEXT: store i32 [[ADD:%.*]], i32* [[ARG3]], align 4
+; CHECK-NEXT: store i32 [[TMP2:%.*]], i32* [[ARG4]], align 4
; CHECK: entry_to_outline:
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARG1]], align 4
-; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[ADD]] = add i32 [[TMP0]], [[TMP1]]
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG2]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARG2]], align 4
-
-; CHECK: [[BLOCK_0]]:
-; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG3]], align 4
-; CHECK-NEXT: store i32 [[TMP2]], i32* [[ARG4]], align 4
-
-; CHECK: [[BLOCK_1]]:
-; CHECK-NEXT: store i32 [[ADD]], i32* [[ARG3]], align 4
-; CHECK-NEXT: store i32 [[TMP2]], i32* [[ARG4]], align 4
+; CHECK-NEXT: [[TMP2]] = load i32, i32* [[ARG2]], align 4
More information about the llvm-branch-commits
mailing list