[polly] r304528 - [CodeGen] Extend Performance Counter to track per-scop information.
Siddharth Bhat via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 2 01:01:22 PDT 2017
Author: bollu
Date: Fri Jun 2 03:01:22 2017
New Revision: 304528
URL: http://llvm.org/viewvc/llvm-project?rev=304528&view=rev
Log:
[CodeGen] Extend Performance Counter to track per-scop information.
Previously, we would generate one performance counter for all scops.
Now, we generate both the old information, as well as a per-scop
performance counter to generate finer grained information.
This patch needed a way to generate a unique name for a `Scop`.
The start region, end region, and function name combined provides a
unique `Scop` name. So, `Scop` has a new public API to provide its start
and end region names.
Differential Revision: https://reviews.llvm.org/D33723
Added:
polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll
Modified:
polly/trunk/include/polly/CodeGen/PerfMonitor.h
polly/trunk/include/polly/ScopInfo.h
polly/trunk/lib/Analysis/ScopInfo.cpp
polly/trunk/lib/CodeGen/CodeGeneration.cpp
polly/trunk/lib/CodeGen/PerfMonitor.cpp
polly/trunk/test/Isl/CodeGen/perf_monitoring.ll
Modified: polly/trunk/include/polly/CodeGen/PerfMonitor.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PerfMonitor.h?rev=304528&r1=304527&r2=304528&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/PerfMonitor.h (original)
+++ polly/trunk/include/polly/CodeGen/PerfMonitor.h Fri Jun 2 03:01:22 2017
@@ -25,8 +25,10 @@ class PerfMonitor {
public:
/// Create a new performance monitor.
///
+ /// @param S The scop for which to generate fine-grained performance
+ /// monitoring information.
/// @param M The module for which to generate the performance monitor.
- PerfMonitor(llvm::Module *M);
+ PerfMonitor(const Scop &S, llvm::Module *M);
/// Initialize the performance monitor.
///
@@ -48,12 +50,18 @@ private:
llvm::Module *M;
PollyIRBuilder Builder;
+ // The scop to profile against.
+ const Scop &S;
+
/// Indicates if performance profiling is supported on this architecture.
bool Supported;
/// The cycle counter at the beginning of the program execution.
llvm::Value *CyclesTotalStartPtr;
+ /// The total number of cycles spent in the current scop S.
+ llvm::Value *CyclesInCurrentScopPtr;
+
/// The total number of cycles spent within scops.
llvm::Value *CyclesInScopsPtr;
@@ -89,6 +97,12 @@ private:
/// into the module (or obtain references to them if they already exist).
void addGlobalVariables();
+ /// Add per-scop tracking to module.
+ ///
+ /// Insert the global variable which is used to track the number of cycles
+ /// this scop runs.
+ void addScopCounter();
+
/// Get a reference to the intrinsic "i64 @llvm.x86.rdtscp(i8*)".
///
/// The rdtscp function returns the current value of the processor's
@@ -126,6 +140,12 @@ private:
/// This function finalizes the performance measurements and prints the
/// results to stdout. It is expected to be registered with 'atexit()'.
llvm::Function *insertFinalReporting();
+
+ /// Append Scop reporting data to "__polly_perf_final_reporting".
+ ///
+ /// This function appends the current scop (S)'s information to the final
+ /// printing function.
+ void AppendScopReporting();
};
} // namespace polly
Modified: polly/trunk/include/polly/ScopInfo.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/ScopInfo.h?rev=304528&r1=304527&r2=304528&view=diff
==============================================================================
--- polly/trunk/include/polly/ScopInfo.h (original)
+++ polly/trunk/include/polly/ScopInfo.h Fri Jun 2 03:01:22 2017
@@ -2329,6 +2329,14 @@ public:
/// Check if the SCoP has been optimized by the scheduler.
bool isOptimized() const { return IsOptimized; }
+ /// Get the name of the entry and exit blocks of this Scop.
+ ///
+ /// These along with the function name can uniquely identify a Scop.
+ ///
+ /// @return std::pair whose first element is the entry name & second element
+ /// is the exit name.
+ std::pair<std::string, std::string> getEntryExitStr() const;
+
/// Get the name of this Scop.
std::string getNameStr() const;
Modified: polly/trunk/lib/Analysis/ScopInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/Analysis/ScopInfo.cpp?rev=304528&r1=304527&r2=304528&view=diff
==============================================================================
--- polly/trunk/lib/Analysis/ScopInfo.cpp (original)
+++ polly/trunk/lib/Analysis/ScopInfo.cpp Fri Jun 2 03:01:22 2017
@@ -4126,6 +4126,12 @@ std::string Scop::getInvalidContextStr()
std::string Scop::getNameStr() const {
std::string ExitName, EntryName;
+ std::tie(EntryName, ExitName) = getEntryExitStr();
+ return EntryName + "---" + ExitName;
+}
+
+std::pair<std::string, std::string> Scop::getEntryExitStr() const {
+ std::string ExitName, EntryName;
raw_string_ostream ExitStr(ExitName);
raw_string_ostream EntryStr(EntryName);
@@ -4138,7 +4144,7 @@ std::string Scop::getNameStr() const {
} else
ExitName = "FunctionExit";
- return EntryName + "---" + ExitName;
+ return std::make_pair(EntryName, ExitName);
}
__isl_give isl_set *Scop::getContext() const { return isl_set_copy(Context); }
Modified: polly/trunk/lib/CodeGen/CodeGeneration.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=304528&r1=304527&r2=304528&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/CodeGeneration.cpp (original)
+++ polly/trunk/lib/CodeGen/CodeGeneration.cpp Fri Jun 2 03:01:22 2017
@@ -184,7 +184,7 @@ static bool CodeGen(Scop &S, IslAstInfo
IslNodeBuilder NodeBuilder(Builder, Annotator, DL, LI, SE, DT, S, StartBlock);
if (PerfMonitoring) {
- PerfMonitor P(EnteringBB->getParent()->getParent());
+ PerfMonitor P(S, EnteringBB->getParent()->getParent());
P.initialize();
P.insertRegionStart(SplitBlock->getTerminator());
Modified: polly/trunk/lib/CodeGen/PerfMonitor.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PerfMonitor.cpp?rev=304528&r1=304527&r2=304528&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PerfMonitor.cpp (original)
+++ polly/trunk/lib/CodeGen/PerfMonitor.cpp Fri Jun 2 03:01:22 2017
@@ -11,8 +11,10 @@
#include "polly/CodeGen/PerfMonitor.h"
#include "polly/CodeGen/RuntimeDebugBuilder.h"
+#include "polly/ScopInfo.h"
#include "llvm/ADT/Triple.h"
#include "llvm/IR/Intrinsics.h"
+#include <sstream>
using namespace llvm;
using namespace polly;
@@ -60,51 +62,73 @@ Function *PerfMonitor::getRDTSCP() {
return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
}
-PerfMonitor::PerfMonitor(Module *M) : M(M), Builder(M->getContext()) {
+PerfMonitor::PerfMonitor(const Scop &S, Module *M)
+ : M(M), Builder(M->getContext()), S(S) {
if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
Supported = true;
else
Supported = false;
}
-void PerfMonitor::addGlobalVariables() {
- auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue,
- Value **Location) {
- *Location = M->getGlobalVariable(Name);
-
- if (!*Location)
- *Location = new GlobalVariable(
- *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
- InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
- };
+static void TryRegisterGlobal(Module *M, const char *Name,
+ Constant *InitialValue, Value **Location) {
+ *Location = M->getGlobalVariable(Name);
+
+ if (!*Location)
+ *Location = new GlobalVariable(
+ *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
+ InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
+};
+
+// Generate a unique name that is usable as a LLVM name for a scop to name its
+// performance counter.
+static std::string GetScopUniqueVarname(const Scop &S) {
+ std::stringstream Name;
+ std::string EntryString, ExitString;
+ std::tie(EntryString, ExitString) = S.getEntryExitStr();
+
+ Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
+ << "_from__" << EntryString << "__to__" << ExitString;
+ return Name.str();
+}
- TryRegisterGlobal("__polly_perf_cycles_total_start", Builder.getInt64(0),
+void PerfMonitor::addScopCounter() {
+ const std::string varname = GetScopUniqueVarname(S);
+ TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
+ &CyclesInCurrentScopPtr);
+}
+
+void PerfMonitor::addGlobalVariables() {
+ TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
&CyclesTotalStartPtr);
- TryRegisterGlobal("__polly_perf_initialized", Builder.getInt1(0),
+ TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
&AlreadyInitializedPtr);
- TryRegisterGlobal("__polly_perf_cycles_in_scops", Builder.getInt64(0),
+ TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
&CyclesInScopsPtr);
- TryRegisterGlobal("__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
+ TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
&CyclesInScopStartPtr);
- TryRegisterGlobal("__polly_perf_write_loation", Builder.getInt32(0),
+ TryRegisterGlobal(M, "__polly_perf_write_loation", Builder.getInt32(0),
&RDTSCPWriteLocation);
}
static const char *InitFunctionName = "__polly_perf_init";
static const char *FinalReportingFunctionName = "__polly_perf_final";
+static BasicBlock *FinalStartBB = nullptr;
+static ReturnInst *ReturnFromFinal = nullptr;
+
Function *PerfMonitor::insertFinalReporting() {
// Create new function.
GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
Function *ExitFn =
Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
- BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", ExitFn);
- Builder.SetInsertPoint(Start);
+ FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
+ Builder.SetInsertPoint(FinalStartBB);
if (!Supported) {
RuntimeDebugBuilder::createCPUPrinter(
@@ -128,23 +152,42 @@ Function *PerfMonitor::insertFinalReport
RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
"\n");
-
- // Finalize function.
- Builder.CreateRetVoid();
+ ReturnFromFinal = Builder.CreateRetVoid();
return ExitFn;
}
+void PerfMonitor::AppendScopReporting() {
+ Builder.SetInsertPoint(FinalStartBB);
+ ReturnFromFinal->eraseFromParent();
+
+ Value *CyclesInCurrentScop =
+ Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
+ std::string EntryName, ExitName;
+ std::tie(EntryName, ExitName) = S.getEntryExitStr();
+
+ RuntimeDebugBuilder::createCPUPrinter(
+ Builder, "Scop(", S.getFunction().getName(), " |from: ", EntryName,
+ " |to: ", ExitName, "): ", CyclesInCurrentScop, "\n");
+
+ ReturnFromFinal = Builder.CreateRetVoid();
+}
+
+static Function *FinalReporting = nullptr;
+
void PerfMonitor::initialize() {
addGlobalVariables();
+ addScopCounter();
- Function *F = M->getFunction(InitFunctionName);
- if (F)
- return;
+ // Ensure that we only add the final reporting function once.
+ // On later invocations, append to the reporting function.
+ if (!FinalReporting) {
+ FinalReporting = insertFinalReporting();
+
+ Function *InitFn = insertInitFunction(FinalReporting);
+ addToGlobalConstructors(InitFn);
+ }
- // initialize
- Function *FinalReporting = insertFinalReporting();
- Function *InitFn = insertInitFunction(FinalReporting);
- addToGlobalConstructors(InitFn);
+ AppendScopReporting();
}
Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
@@ -223,4 +266,8 @@ void PerfMonitor::insertRegionEnd(Instru
Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
+
+ Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
+ CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
+ Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
}
Modified: polly/trunk/test/Isl/CodeGen/perf_monitoring.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring.ll?rev=304528&r1=304527&r2=304528&view=diff
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring.ll (original)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring.ll Fri Jun 2 03:01:22 2017
@@ -49,7 +49,6 @@ return:
; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
; CHECK-NEXT: %9 = add i64 %8, %7
; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: br label %return
; CHECK: define weak_odr void @__polly_perf_final() {
@@ -66,8 +65,6 @@ return:
; CHECK-NEXT: %9 = call i32 @fflush(i8* null)
; CHECK-NEXT: %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))
; CHECK-NEXT: %11 = call i32 @fflush(i8* null)
-; CHECK-NEXT: ret void
-; CHECK-NEXT: }
; CHECK: define weak_odr void @__polly_perf_init() {
Added: polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll?rev=304528&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll (added)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll Fri Jun 2 03:01:22 2017
@@ -0,0 +1,100 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN: -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+; void g(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+; Declaration of globals
+; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
+; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up counter in f
+; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
+; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
+; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
+; CHECK-NEXT: %7 = sub i64 %6, %5
+; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %9 = add i64 %8, %7
+; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: br label %return
+
+; Bumping up counter in g
+; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
+; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
+; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
+; CHECK-NEXT: %7 = sub i64 %6, %5
+; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %9 = add i64 %8, %7
+; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
+; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: br label %return
+
+; Final reporting prints
+; CHECK: %12 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %13 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @10, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @11, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @12, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @13, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @14, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @15, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @16, i32 0, i32 0), i64 %12, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @17, i32 0, i32 0))
+; CHECK-NEXT: %14 = call i32 @fflush(i8* null)
+; CHECK-NEXT: %15 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
+; CHECK-NEXT: %16 = call i32 (...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([7 x i8], [7 x i8] addrspace(4)* @23, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @24, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(4)* @25, i32 0, i32 0), i64 %15, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0))
+; CHECK-NEXT: %17 = call i32 @fflush(i8* null)
+; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list