[llvm] 8045bf9 - [FuncSpec] Support function specialization across multiple arguments.
Alexandros Lamprineas via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 28 04:08:40 PDT 2022
Author: Alexandros Lamprineas
Date: 2022-03-28T12:01:53+01:00
New Revision: 8045bf9d0dc5be3a8b8d075fdfe23828f4b7d70e
URL: https://github.com/llvm/llvm-project/commit/8045bf9d0dc5be3a8b8d075fdfe23828f4b7d70e
DIFF: https://github.com/llvm/llvm-project/commit/8045bf9d0dc5be3a8b8d075fdfe23828f4b7d70e.diff
LOG: [FuncSpec] Support function specialization across multiple arguments.
The current implementation of Function Specialization does not allow
specializing more than one arguments per function call, which is a
limitation I am lifting with this patch.
My main challenge was to choose the most suitable ADT for storing the
specializations. We need an associative container for binding all the
actual arguments of a specialization to the function call. We also
need a consistent iteration order across executions. Lastly we want
to be able to sort the entries by Gain and reject the least profitable
ones.
MapVector fits the bill but not quite; erasing elements is expensive
and using stable_sort messes up the indices to the underlying vector.
I am therefore using the underlying vector directly after calculating
the Gain.
Differential Revision: https://reviews.llvm.org/D119880
Added:
llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
Modified:
llvm/include/llvm/Transforms/Utils/SCCPSolver.h
llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
llvm/lib/Transforms/Utils/SCCPSolver.cpp
llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
index fb94b1dc20b81..17bd072598ee5 100644
--- a/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
+++ b/llvm/include/llvm/Transforms/Utils/SCCPSolver.h
@@ -151,13 +151,14 @@ class SCCPSolver {
/// Return a reference to the set of argument tracked functions.
SmallPtrSetImpl<Function *> &getArgumentTrackedFunctions();
- /// Mark the constant argument of a new function specialization. \p F points
- /// to the cloned function and \p Arg represents the constant argument as a
- /// pair of {formal,actual} values (the formal argument is associated with the
- /// original function definition). All other arguments of the specialization
- /// inherit the lattice state of their corresponding values in the original
- /// function.
- void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg);
+ /// Mark the constant arguments of a new function specialization. \p F points
+ /// to the cloned function and \p Args contains a list of constant arguments
+ /// represented as pairs of {formal,actual} values (the formal argument is
+ /// associated with the original function definition). All other arguments of
+ /// the specialization inherit the lattice state of their corresponding values
+ /// in the original function.
+ void markArgInFuncSpecialization(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
/// Mark all of the blocks in function \p F non-executable. Clients can used
/// this method to erase a function from the module (e.g., if it has been
diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
index 8faca670112ba..c9775e097a45d 100644
--- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp
@@ -99,8 +99,13 @@ static cl::opt<bool> SpecializeOnAddresses(
"func-specialization-on-address", cl::init(false), cl::Hidden,
cl::desc("Enable function specialization on the address of global values"));
-// TODO: This needs checking to see the impact on compile-times, which is why
-// this is off by default for now.
+// Disabled by default as it can significantly increase compilation times.
+// Running nikic's compile time tracker on x86 with instruction count as the
+// metric shows 3-4% regression for SPASS while being neutral for all other
+// benchmarks of the llvm test suite.
+//
+// https://llvm-compile-time-tracker.com
+// https://github.com/nikic/llvm-compile-time-tracker
static cl::opt<bool> EnableSpecializationForLiteralConstant(
"function-specialization-for-literal-constant", cl::init(false), cl::Hidden,
cl::desc("Enable specialization of functions that take a literal constant "
@@ -110,17 +115,17 @@ namespace {
// Bookkeeping struct to pass data from the analysis and profitability phase
// to the actual transform helper functions.
struct SpecializationInfo {
- ArgInfo Arg; // Stores the {formal,actual} argument pair.
- InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
-
- SpecializationInfo(Argument *A, Constant *C, InstructionCost G)
- : Arg(A, C), Gain(G){};
+ SmallVector<ArgInfo, 8> Args; // Stores the {formal,actual} argument pairs.
+ InstructionCost Gain; // Profitability: Gain = Bonus - Cost.
};
} // Anonymous namespace
using FuncList = SmallVectorImpl<Function *>;
-using ConstList = SmallVector<Constant *>;
-using SpecializationList = SmallVector<SpecializationInfo>;
+using CallArgBinding = std::pair<CallBase *, Constant *>;
+using CallSpecBinding = std::pair<CallBase *, SpecializationInfo>;
+// We are using MapVector because it guarantees deterministic iteration
+// order across executions.
+using SpecializationMap = SmallMapVector<CallBase *, SpecializationInfo, 8>;
// Helper to check if \p LV is either a constant or a constant
// range with a single element. This should cover exactly the same cases as the
@@ -307,17 +312,15 @@ class FunctionSpecializer {
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization cost for "
<< F->getName() << " is " << Cost << "\n");
- SpecializationList Specializations;
- calculateGains(F, Cost, Specializations);
- if (Specializations.empty()) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: no possible constants found\n");
+ SmallVector<CallSpecBinding, 8> Specializations;
+ if (!calculateGains(F, Cost, Specializations)) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: No possible constants found\n");
continue;
}
- for (SpecializationInfo &S : Specializations) {
- specializeFunction(F, S, WorkList);
- Changed = true;
- }
+ Changed = true;
+ for (auto &Entry : Specializations)
+ specializeFunction(F, Entry.second, WorkList);
}
updateSpecializedFuncs(Candidates, WorkList);
@@ -392,21 +395,22 @@ class FunctionSpecializer {
return Clone;
}
- /// This function decides whether it's worthwhile to specialize function \p F
- /// based on the known constant values its arguments can take on, i.e. it
- /// calculates a gain and returns a list of actual arguments that are deemed
- /// profitable to specialize. Specialization is performed on the first
- /// interesting argument. Specializations based on additional arguments will
- /// be evaluated on following iterations of the main IPSCCP solve loop.
- void calculateGains(Function *F, InstructionCost Cost,
- SpecializationList &WorkList) {
+ /// This function decides whether it's worthwhile to specialize function
+ /// \p F based on the known constant values its arguments can take on. It
+ /// only discovers potential specialization opportunities without actually
+ /// applying them.
+ ///
+ /// \returns true if any specializations have been found.
+ bool calculateGains(Function *F, InstructionCost Cost,
+ SmallVectorImpl<CallSpecBinding> &WorkList) {
+ SpecializationMap Specializations;
// Determine if we should specialize the function based on the values the
// argument can take on. If specialization is not profitable, we continue
// on to the next argument.
for (Argument &FormalArg : F->args()) {
// Determine if this argument is interesting. If we know the argument can
// take on any constant values, they are collected in Constants.
- ConstList ActualArgs;
+ SmallVector<CallArgBinding, 8> ActualArgs;
if (!isArgumentInteresting(&FormalArg, ActualArgs)) {
LLVM_DEBUG(dbgs() << "FnSpecialization: Argument "
<< FormalArg.getNameOrAsOperand()
@@ -414,50 +418,56 @@ class FunctionSpecializer {
continue;
}
- for (auto *ActualArg : ActualArgs) {
- InstructionCost Gain =
- ForceFunctionSpecialization
- ? 1
- : getSpecializationBonus(&FormalArg, ActualArg) - Cost;
+ for (const auto &Entry : ActualArgs) {
+ CallBase *Call = Entry.first;
+ Constant *ActualArg = Entry.second;
- if (Gain <= 0)
- continue;
- WorkList.push_back({&FormalArg, ActualArg, Gain});
- }
+ auto I = Specializations.insert({Call, SpecializationInfo()});
+ SpecializationInfo &S = I.first->second;
- if (WorkList.empty())
- continue;
-
- // Sort the candidates in descending order.
- llvm::stable_sort(WorkList, [](const SpecializationInfo &L,
- const SpecializationInfo &R) {
- return L.Gain > R.Gain;
- });
-
- // Truncate the worklist to 'MaxClonesThreshold' candidates if
- // necessary.
- if (WorkList.size() > MaxClonesThreshold) {
- LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
- << "the maximum number of clones threshold.\n"
- << "FnSpecialization: Truncating worklist to "
- << MaxClonesThreshold << " candidates.\n");
- WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
+ if (I.second)
+ S.Gain = ForceFunctionSpecialization ? 1 : 0 - Cost;
+ if (!ForceFunctionSpecialization)
+ S.Gain += getSpecializationBonus(&FormalArg, ActualArg);
+ S.Args.push_back({&FormalArg, ActualArg});
}
+ }
+
+ // Remove unprofitable specializations.
+ Specializations.remove_if(
+ [](const auto &Entry) { return Entry.second.Gain <= 0; });
+
+ // Clear the MapVector and return the underlying vector.
+ WorkList = Specializations.takeVector();
+
+ // Sort the candidates in descending order.
+ llvm::stable_sort(WorkList, [](const auto &L, const auto &R) {
+ return L.second.Gain > R.second.Gain;
+ });
+
+ // Truncate the worklist to 'MaxClonesThreshold' candidates if necessary.
+ if (WorkList.size() > MaxClonesThreshold) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Number of candidates exceed "
+ << "the maximum number of clones threshold.\n"
+ << "FnSpecialization: Truncating worklist to "
+ << MaxClonesThreshold << " candidates.\n");
+ WorkList.erase(WorkList.begin() + MaxClonesThreshold, WorkList.end());
+ }
- LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
- << F->getName() << "\n";
- for (SpecializationInfo &S
- : WorkList) {
+ LLVM_DEBUG(dbgs() << "FnSpecialization: Specializations for function "
+ << F->getName() << "\n";
+ for (const auto &Entry
+ : WorkList) {
+ dbgs() << "FnSpecialization: Gain = " << Entry.second.Gain
+ << "\n";
+ for (const ArgInfo &Arg : Entry.second.Args)
dbgs() << "FnSpecialization: FormalArg = "
- << S.Arg.Formal->getNameOrAsOperand()
+ << Arg.Formal->getNameOrAsOperand()
<< ", ActualArg = "
- << S.Arg.Actual->getNameOrAsOperand()
- << ", Gain = " << S.Gain << "\n";
- });
+ << Arg.Actual->getNameOrAsOperand() << "\n";
+ });
- // FIXME: Only one argument per function.
- break;
- }
+ return !WorkList.empty();
}
bool isCandidateFunction(Function *F) {
@@ -490,12 +500,12 @@ class FunctionSpecializer {
Function *Clone = cloneCandidateFunction(F, Mappings);
// Rewrite calls to the function so that they call the clone instead.
- rewriteCallSites(Clone, S.Arg, Mappings);
+ rewriteCallSites(Clone, S.Args, Mappings);
// Initialize the lattice state of the arguments of the function clone,
// marking the argument on which we specialized the function constant
// with the given value.
- Solver.markArgInFuncSpecialization(Clone, S.Arg);
+ Solver.markArgInFuncSpecialization(Clone, S.Args);
// Mark all the specialized functions
WorkList.push_back(Clone);
@@ -641,7 +651,8 @@ class FunctionSpecializer {
///
/// \returns true if the function should be specialized on the given
/// argument.
- bool isArgumentInteresting(Argument *A, ConstList &Constants) {
+ bool isArgumentInteresting(Argument *A,
+ SmallVectorImpl<CallArgBinding> &Constants) {
// For now, don't attempt to specialize functions based on the values of
// composite types.
if (!A->getType()->isSingleValueType() || A->user_empty())
@@ -681,7 +692,8 @@ class FunctionSpecializer {
/// Collect in \p Constants all the constant values that argument \p A can
/// take on.
- void getPossibleConstants(Argument *A, ConstList &Constants) {
+ void getPossibleConstants(Argument *A,
+ SmallVectorImpl<CallArgBinding> &Constants) {
Function *F = A->getParent();
// Iterate over all the call sites of the argument's parent function.
@@ -723,23 +735,24 @@ class FunctionSpecializer {
if (isa<Constant>(V) && (Solver.getLatticeValueFor(V).isConstant() ||
EnableSpecializationForLiteralConstant))
- Constants.push_back(cast<Constant>(V));
+ Constants.push_back({&CS, cast<Constant>(V)});
}
}
/// Rewrite calls to function \p F to call function \p Clone instead.
///
/// This function modifies calls to function \p F as long as the actual
- /// argument matches the one in \p Arg. Note that for recursive calls we
- /// need to compare against the cloned formal argument.
+ /// arguments match those in \p Args. Note that for recursive calls we
+ /// need to compare against the cloned formal arguments.
///
/// Callsites that have been marked with the MinSize function attribute won't
/// be specialized and rewritten.
- void rewriteCallSites(Function *Clone, const ArgInfo &Arg,
+ void rewriteCallSites(Function *Clone, const SmallVectorImpl<ArgInfo> &Args,
ValueToValueMapTy &Mappings) {
- Function *F = Arg.Formal->getParent();
- unsigned ArgNo = Arg.Formal->getArgNo();
- SmallVector<CallBase *, 4> CallSitesToRewrite;
+ assert(!Args.empty() && "Specialization without arguments");
+ Function *F = Args[0].Formal->getParent();
+
+ SmallVector<CallBase *, 8> CallSitesToRewrite;
for (auto *U : F->users()) {
if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
continue;
@@ -758,9 +771,16 @@ class FunctionSpecializer {
<< "\n");
if (/* recursive call */
(CS->getFunction() == Clone &&
- CS->getArgOperand(ArgNo) == Mappings[Arg.Formal]) ||
+ all_of(Args,
+ [CS, &Mappings](const ArgInfo &Arg) {
+ unsigned ArgNo = Arg.Formal->getArgNo();
+ return CS->getArgOperand(ArgNo) == Mappings[Arg.Formal];
+ })) ||
/* normal call */
- CS->getArgOperand(ArgNo) == Arg.Actual) {
+ all_of(Args, [CS](const ArgInfo &Arg) {
+ unsigned ArgNo = Arg.Formal->getArgNo();
+ return CS->getArgOperand(ArgNo) == Arg.Actual;
+ })) {
CS->setCalledFunction(Clone);
Solver.markOverdefined(CS);
}
@@ -891,7 +911,7 @@ bool llvm::runFunctionSpecialization(
// Initially resolve the constants in all the argument tracked functions.
RunSCCPSolver(FuncDecls);
- SmallVector<Function *, 2> WorkList;
+ SmallVector<Function *, 8> WorkList;
unsigned I = 0;
while (FuncSpecializationMaxIters != I++ &&
FS.specializeFunctions(FuncDecls, WorkList)) {
diff --git a/llvm/lib/Transforms/Utils/SCCPSolver.cpp b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
index 88dd5e6031ecf..607928c835fb5 100644
--- a/llvm/lib/Transforms/Utils/SCCPSolver.cpp
+++ b/llvm/lib/Transforms/Utils/SCCPSolver.cpp
@@ -450,7 +450,8 @@ class SCCPInstVisitor : public InstVisitor<SCCPInstVisitor> {
return TrackingIncomingArguments;
}
- void markArgInFuncSpecialization(Function *F, const ArgInfo &Arg);
+ void markArgInFuncSpecialization(Function *F,
+ const SmallVectorImpl<ArgInfo> &Args);
void markFunctionUnreachable(Function *F) {
for (auto &BB : *F)
@@ -524,21 +525,24 @@ Constant *SCCPInstVisitor::getConstant(const ValueLatticeElement &LV) const {
return nullptr;
}
-void SCCPInstVisitor::markArgInFuncSpecialization(Function *F,
- const ArgInfo &Arg) {
- assert(F->arg_size() == Arg.Formal->getParent()->arg_size() &&
+void SCCPInstVisitor::markArgInFuncSpecialization(
+ Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+ assert(!Args.empty() && "Specialization without arguments");
+ assert(F->arg_size() == Args[0].Formal->getParent()->arg_size() &&
"Functions should have the same number of arguments");
+ auto Iter = Args.begin();
Argument *NewArg = F->arg_begin();
- Argument *OldArg = Arg.Formal->getParent()->arg_begin();
+ Argument *OldArg = Args[0].Formal->getParent()->arg_begin();
for (auto End = F->arg_end(); NewArg != End; ++NewArg, ++OldArg) {
LLVM_DEBUG(dbgs() << "SCCP: Marking argument "
<< NewArg->getNameOrAsOperand() << "\n");
- if (OldArg == Arg.Formal) {
+ if (OldArg == Iter->Formal) {
// Mark the argument constants in the new function.
- markConstant(NewArg, Arg.Actual);
+ markConstant(NewArg, Iter->Actual);
+ ++Iter;
} else if (ValueState.count(OldArg)) {
// For the remaining arguments in the new function, copy the lattice state
// over from the old function.
@@ -1717,8 +1721,9 @@ SmallPtrSetImpl<Function *> &SCCPSolver::getArgumentTrackedFunctions() {
return Visitor->getArgumentTrackedFunctions();
}
-void SCCPSolver::markArgInFuncSpecialization(Function *F, const ArgInfo &Arg) {
- Visitor->markArgInFuncSpecialization(F, Arg);
+void SCCPSolver::markArgInFuncSpecialization(
+ Function *F, const SmallVectorImpl<ArgInfo> &Args) {
+ Visitor->markArgInFuncSpecialization(F, Args);
}
void SCCPSolver::markFunctionUnreachable(Function *F) {
diff --git a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
index 35ad27e2cc9a1..787e6e6c87d53 100644
--- a/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
+++ b/llvm/test/Transforms/FunctionSpecialization/function-specialization4.ll
@@ -46,7 +46,7 @@ entry:
; CHECK-NEXT: entry:
; CHECK-NEXT: %0 = load i32, i32* @A, align 4
; CHECK-NEXT: %add = add nsw i32 %x, %0
-; CHECK-NEXT: %1 = load i32, i32* %c, align 4
+; CHECK-NEXT: %1 = load i32, i32* @C, align 4
; CHECK-NEXT: %add1 = add nsw i32 %add, %1
; CHECK-NEXT: ret i32 %add1
; CHECK-NEXT: }
@@ -55,7 +55,7 @@ entry:
; CHECK-NEXT: entry:
; CHECK-NEXT: %0 = load i32, i32* @B, align 4
; CHECK-NEXT: %add = add nsw i32 %x, %0
-; CHECK-NEXT: %1 = load i32, i32* %c, align 4
+; CHECK-NEXT: %1 = load i32, i32* @D, align 4
; CHECK-NEXT: %add1 = add nsw i32 %add, %1
; CHECK-NEXT: ret i32 %add1
; CHECK-NEXT: }
diff --git a/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
new file mode 100644
index 0000000000000..b4f28fd2b2446
--- /dev/null
+++ b/llvm/test/Transforms/FunctionSpecialization/specialize-multiple-arguments.ll
@@ -0,0 +1,185 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -function-specialization -func-specialization-max-clones=0 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=NONE
+; RUN: opt -function-specialization -func-specialization-max-clones=1 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=ONE
+; RUN: opt -function-specialization -func-specialization-max-clones=2 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=TWO
+; RUN: opt -function-specialization -func-specialization-max-clones=3 -func-specialization-size-threshold=14 -S < %s | FileCheck %s --check-prefix=THREE
+
+; Make sure that we iterate correctly after sorting the specializations:
+; FnSpecialization: Specializations for function compute
+; FnSpecialization: Gain = 608
+; FnSpecialization: FormalArg = binop1, ActualArg = power
+; FnSpecialization: FormalArg = binop2, ActualArg = mul
+; FnSpecialization: Gain = 982
+; FnSpecialization: FormalArg = binop1, ActualArg = plus
+; FnSpecialization: FormalArg = binop2, ActualArg = minus
+; FnSpecialization: Gain = 795
+; FnSpecialization: FormalArg = binop1, ActualArg = minus
+; FnSpecialization: FormalArg = binop2, ActualArg = power
+
+define i64 @main(i64 %x, i64 %y, i1 %flag) {
+; NONE-LABEL: @main(
+; NONE-NEXT: entry:
+; NONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; NONE: plus:
+; NONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; NONE-NEXT: br label [[MERGE:%.*]]
+; NONE: minus:
+; NONE-NEXT: [[TMP1:%.*]] = call i64 @compute(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; NONE-NEXT: br label [[MERGE]]
+; NONE: merge:
+; NONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; NONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; NONE-NEXT: ret i64 [[TMP3]]
+;
+; ONE-LABEL: @main(
+; ONE-NEXT: entry:
+; ONE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; ONE: plus:
+; ONE-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; ONE-NEXT: br label [[MERGE:%.*]]
+; ONE: minus:
+; ONE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; ONE-NEXT: br label [[MERGE]]
+; ONE: merge:
+; ONE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; ONE-NEXT: [[TMP3:%.*]] = call i64 @compute(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; ONE-NEXT: ret i64 [[TMP3]]
+;
+; TWO-LABEL: @main(
+; TWO-NEXT: entry:
+; TWO-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; TWO: plus:
+; TWO-NEXT: [[TMP0:%.*]] = call i64 @compute(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; TWO-NEXT: br label [[MERGE:%.*]]
+; TWO: minus:
+; TWO-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; TWO-NEXT: br label [[MERGE]]
+; TWO: merge:
+; TWO-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; TWO-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; TWO-NEXT: ret i64 [[TMP3]]
+;
+; THREE-LABEL: @main(
+; THREE-NEXT: entry:
+; THREE-NEXT: br i1 [[FLAG:%.*]], label [[PLUS:%.*]], label [[MINUS:%.*]]
+; THREE: plus:
+; THREE-NEXT: [[TMP0:%.*]] = call i64 @compute.3(i64 [[X:%.*]], i64 [[Y:%.*]], i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+; THREE-NEXT: br label [[MERGE:%.*]]
+; THREE: minus:
+; THREE-NEXT: [[TMP1:%.*]] = call i64 @compute.1(i64 [[X]], i64 [[Y]], i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+; THREE-NEXT: br label [[MERGE]]
+; THREE: merge:
+; THREE-NEXT: [[TMP2:%.*]] = phi i64 [ [[TMP0]], [[PLUS]] ], [ [[TMP1]], [[MINUS]] ]
+; THREE-NEXT: [[TMP3:%.*]] = call i64 @compute.2(i64 [[TMP2]], i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+; THREE-NEXT: ret i64 [[TMP3]]
+;
+entry:
+ br i1 %flag, label %plus, label %minus
+
+plus:
+ %tmp0 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @power, i64 (i64, i64)* @mul)
+ br label %merge
+
+minus:
+ %tmp1 = call i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* @plus, i64 (i64, i64)* @minus)
+ br label %merge
+
+merge:
+ %tmp2 = phi i64 [ %tmp0, %plus ], [ %tmp1, %minus]
+ %tmp3 = call i64 @compute(i64 %tmp2, i64 42, i64 (i64, i64)* @minus, i64 (i64, i64)* @power)
+ ret i64 %tmp3
+}
+
+; THREE-NOT: define internal i64 @compute
+;
+; THREE-LABEL: define internal i64 @compute.1(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+; THREE-NEXT: entry:
+; THREE-NEXT: [[TMP0:%.+]] = call i64 @plus(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP1:%.+]] = call i64 @minus(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
+; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
+; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
+; THREE-NEXT: ret i64 [[TMP5]]
+; THREE-NEXT: }
+;
+; THREE-LABEL: define internal i64 @compute.2(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+; THREE-NEXT: entry:
+; THREE-NEXT: [[TMP0:%.+]] = call i64 @minus(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP1:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
+; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
+; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
+; THREE-NEXT: ret i64 [[TMP5]]
+; THREE-NEXT: }
+;
+; THREE-LABEL: define internal i64 @compute.3(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+; THREE-NEXT: entry:
+; THREE-NEXT: [[TMP0:%.+]] = call i64 @power(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP1:%.+]] = call i64 @mul(i64 %x, i64 %y)
+; THREE-NEXT: [[TMP2:%.+]] = add i64 [[TMP0]], [[TMP1]]
+; THREE-NEXT: [[TMP3:%.+]] = sdiv i64 [[TMP2]], %x
+; THREE-NEXT: [[TMP4:%.+]] = sub i64 [[TMP3]], %y
+; THREE-NEXT: [[TMP5:%.+]] = mul i64 [[TMP4]], 2
+; THREE-NEXT: ret i64 [[TMP5]]
+; THREE-NEXT: }
+;
+define internal i64 @compute(i64 %x, i64 %y, i64 (i64, i64)* %binop1, i64 (i64, i64)* %binop2) {
+entry:
+ %tmp0 = call i64 %binop1(i64 %x, i64 %y)
+ %tmp1 = call i64 %binop2(i64 %x, i64 %y)
+ %add = add i64 %tmp0, %tmp1
+ %div = sdiv i64 %add, %x
+ %sub = sub i64 %div, %y
+ %mul = mul i64 %sub, 2
+ ret i64 %mul
+}
+
+define internal i64 @plus(i64 %x, i64 %y) {
+entry:
+ %tmp0 = add i64 %x, %y
+ ret i64 %tmp0
+}
+
+define internal i64 @minus(i64 %x, i64 %y) {
+entry:
+ %tmp0 = sub i64 %x, %y
+ ret i64 %tmp0
+}
+
+define internal i64 @mul(i64 %x, i64 %n) {
+entry:
+ %cmp6 = icmp sgt i64 %n, 1
+ br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %add, %for.body ]
+ ret i64 %x.addr.0.lcssa
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+ %x.addr.07 = phi i64 [ %add, %for.body ], [ %x, %entry ]
+ %add = shl nsw i64 %x.addr.07, 1
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
+
+define internal i64 @power(i64 %x, i64 %n) {
+entry:
+ %cmp6 = icmp sgt i64 %n, 1
+ br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %x.addr.0.lcssa = phi i64 [ %x, %entry ], [ %mul, %for.body ]
+ ret i64 %x.addr.0.lcssa
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 1, %entry ]
+ %x.addr.07 = phi i64 [ %mul, %for.body ], [ %x, %entry ]
+ %mul = mul nsw i64 %x.addr.07, %x.addr.07
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, %n
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+}
More information about the llvm-commits
mailing list