<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Apr 3, 2017 at 7:55 AM, Tobias Grosser via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Author: grosser<br>
Date: Mon Apr 3 09:55:37 2017<br>
New Revision: 299359<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=299359&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=299359&view=rev</a><br>
Log:<br>
[CodeGen] Add Performance Monitor<br>
<br>
Add support for -polly-codegen-perf-<wbr>monitoring. When performance monitoring<br>
is enabled, we emit performance monitoring code during code generation that<br>
prints after program exit statistics about the total number of cycles executed<br>
as well as the number of cycles spent in scops. This gives an estimate on how<br>
useful polyhedral optimizations might be for a given program.<br>
<br>
Example output:<br>
<br>
Polly runtime information<br>
-------------------------<br>
Total: 783110081637<br>
Scops: 663718949365<br>
<br>
In the future, we might also add functionality to measure how much time is spent<br>
in optimized scops and how many cycles are spent in the fallback code.<br>
<br>
Reviewers: bollu,sebpop<br>
<br>
Tags: #polly<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D31599" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D31599</a><br>
<br>
Added:<br>
polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h<br>
polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp<br>
polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll<br>
Modified:<br>
polly/trunk/lib/CMakeLists.txt<br>
polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp<br>
<br>
Added: polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PerfMonitor.h?rev=299359&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/include/<wbr>polly/CodeGen/PerfMonitor.h?<wbr>rev=299359&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h (added)<br>
+++ polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h Mon Apr 3 09:55:37 2017<br>
@@ -0,0 +1,132 @@<br>
+//===--- PerfMonitor.h --- Monitor time spent in scops --------------------===//<br>
+//<br>
+// The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+<br>
+#ifndef PERF_MONITOR_H<br>
+#define PERF_MONITOR_H<br>
+<br>
+#include "polly/CodeGen/IRBuilder.h"<br>
+<br>
+namespace llvm {<br>
+class Function;<br>
+class Module;<br>
+class Value;<br>
+class Instruction;<br>
+} // namespace llvm<br>
+<br>
+namespace polly {<br>
+<br>
+class PerfMonitor {<br>
+public:<br>
+ /// Create a new performance monitor.<br>
+ ///<br>
+ /// @param M The module for which to generate the performance monitor.<br>
+ PerfMonitor(llvm::Module *M);<br>
+<br>
+ /// Initialize the performance monitor.<br>
+ ///<br>
+ /// Ensure that all global variables, functions, and callbacks needed to<br>
+ /// manage the performance monitor are initialized and registered.<br>
+ void initialize();<br>
+<br>
+ /// Mark the beginning of a timing region.<br>
+ ///<br>
+ /// @param InsertBefore The instruction before which the timing region starts.<br>
+ void insertRegionStart(llvm::<wbr>Instruction *InserBefore);<br>
+<br>
+ /// Mark the end of a timing region.<br>
+ ///<br>
+ /// @param InsertBefore The instruction before which the timing region starts.<br>
+ void insertRegionEnd(llvm::<wbr>Instruction *InsertBefore);<br>
+<br>
+private:<br>
+ llvm::Module *M;<br>
+ PollyIRBuilder Builder;<br>
+<br>
+ /// Indicates if performance profiling is supported on this architecture.<br>
+ bool Supported;<br>
+<br>
+ /// The cycle counter at the beginning of the program execution.<br>
+ llvm::Value *CyclesTotalStartPtr;<br>
+<br>
+ /// The total number of cycles spent within scops.<br>
+ llvm::Value *CyclesInScopsPtr;<br>
+<br>
+ /// The value of the cycle counter at the beginning of the last scop.<br>
+ llvm::Value *CyclesInScopStartPtr;<br>
+<br>
+ /// A memory location which serves as argument of the RDTSCP function.<br>
+ ///<br>
+ /// The value written to this location is currently not used.<br>
+ llvm::Value *RDTSCPWriteLocation;<br>
+<br>
+ /// A global variable, that keeps track if the performance monitor<br>
+ /// initialization has already been run.<br>
+ llvm::Value *AlreadyInitializedPtr;<br>
+<br>
+ llvm::Function *insertInitFunction(llvm::<wbr>Function *FinalReporting);<br>
+<br>
+ /// Add Function @p to list of global constructors<br>
+ ///<br>
+ /// If no global constructors are available in this current module, insert<br>
+ /// a new list of global constructors containing @p Fn as only global<br>
+ /// constructor. Otherwise, append @p Fn to the list of global constructors.<br>
+ ///<br>
+ /// All functions listed as global constructors are executed before the<br>
+ /// main() function is called.<br>
+ ///<br>
+ /// @param Fn Function to add to global constructors<br>
+ void addToGlobalConstructors(llvm::<wbr>Function *Fn);<br>
+<br>
+ /// Add global variables to module.<br>
+ ///<br>
+ /// Insert a set of global variables that are used to track performance,<br>
+ /// into the module (or obtain references to them if they already exist).<br>
+ void addGlobalVariables();<br>
+<br>
+ /// Get a reference to the intrinsic "i64 @llvm.x86.rdtscp(i8*)".<br>
+ ///<br>
+ /// The rdtscp function returns the current value of the processor's<br>
+ /// time-stamp counter as well as the current CPU identifier. On modern x86<br>
+ /// systems, the returned value is independent of the dynamic clock frequency<br>
+ /// and consistent across multiple cores. It can consequently be used to get<br>
+ /// accurate and low-overhead timing information. Even though the counter is<br>
+ /// wrapping, it can be reliably used even for measuring longer time<br>
+ /// intervals, as on a 1 GHz processor the counter only wraps every 545 years.<br>
+ ///<br>
+ /// The RDTSCP instruction is "pseudo" serializing:<br>
+ ///<br>
+ /// "“The RDTSCP instruction waits until all previous instructions have been<br>
+ /// executed before reading the counter. However, subsequent instructions may<br>
+ /// begin execution before the read operation is performed.â€<br>
+ ///<br>
+ /// To ensure that no later instructions are scheduled before the RDTSCP<br>
+ /// instruction it is often recommended to schedule a cpuid call after the<br>
+ /// RDTSCP instruction. We do not do this yet, trading some imprecision in<br>
+ /// our timing for a reduced overhead in our timing.<br>
+ ///<br>
+ /// @returns A reference to the declaration of @llvm.x86.rdtscp.<br>
+ llvm::Function *getRDTSCP();<br>
+<br>
+ /// Get a reference to "int atexit(void (*function)(void))" function.<br>
+ ///<br>
+ /// This function allows to register function pointers that must be executed<br>
+ /// when the program is terminated.<br>
+ ///<br>
+ /// @returns A reference to @atexit().<br>
+ llvm::Function *getAtExit();<br>
+<br>
+ /// Create function "__polly_perf_final_reporting"<wbr>.<br>
+ ///<br>
+ /// This function finalizes the performance measurements and prints the<br>
+ /// results to stdout. It is expected to be registered with 'atexit()'.<br>
+ llvm::Function *insertFinalReporting();<br>
+};<br>
+} // namespace polly<br>
+<br>
+#endif<br>
<br>
Modified: polly/trunk/lib/CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CMakeLists.txt?rev=299359&r1=299358&r2=299359&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/lib/<wbr>CMakeLists.txt?rev=299359&r1=<wbr>299358&r2=299359&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/lib/CMakeLists.txt (original)<br>
+++ polly/trunk/lib/CMakeLists.txt Mon Apr 3 09:55:37 2017<br>
@@ -43,6 +43,7 @@ add_polly_library(Polly<br>
CodeGen/Utils.cpp<br>
CodeGen/RuntimeDebugBuilder.<wbr>cpp<br>
CodeGen/CodegenCleanup.cpp<br>
+ CodeGen/PerfMonitor.cpp<br>
${GPGPU_CODEGEN_FILES}<br>
Exchange/JSONExporter.cpp<br>
Support/GICHelper.cpp<br>
<br>
Modified: polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=299359&r1=299358&r2=299359&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/lib/<wbr>CodeGen/CodeGeneration.cpp?<wbr>rev=299359&r1=299358&r2=<wbr>299359&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp (original)<br>
+++ polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp Mon Apr 3 09:55:37 2017<br>
@@ -21,6 +21,7 @@<br>
<br>
#include "polly/CodeGen/IslAst.h"<br>
#include "polly/CodeGen/IslNodeBuilder.<wbr>h"<br>
+#include "polly/CodeGen/PerfMonitor.h"<br>
#include "polly/CodeGen/Utils.h"<br>
#include "polly/DependenceInfo.h"<br>
#include "polly/LinkAllPasses.h"<br>
@@ -45,6 +46,11 @@ static cl::opt<bool> Verify("polly-codeg<br>
cl::Hidden, cl::init(true), cl::ZeroOrMore,<br>
cl::cat(PollyCategory));<br>
<br>
+static cl::opt<bool><br>
+ PerfMonitoring("polly-codegen-<wbr>perf-monitoring",<br>
+ cl::desc("Add run-time performance monitoring"), cl::Hidden,<br>
+ cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));<br>
+<br>
namespace {<br>
class CodeGeneration : public ScopPass {<br>
public:<br>
@@ -145,6 +151,18 @@ public:<br>
IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S,<br>
StartBlock);<br>
<br>
+ if (PerfMonitoring) {<br>
+ PerfMonitor P(EnteringBB->getParent()-><wbr>getParent());<br>
+ P.initialize();<br>
+ P.insertRegionStart(<wbr>SplitBlock->getTerminator());<br>
+<br>
+ BasicBlock *MergeBlock = SplitBlock->getTerminator()<br>
+ ->getSuccessor(0)<br>
+ ->getUniqueSuccessor()<br>
+ ->getUniqueSuccessor();<br>
+ P.insertRegionEnd(MergeBlock-><wbr>getTerminator());<br>
+ }<br>
+<br>
// First generate code for the hoisted invariant loads and transitively the<br>
// parameters they reference. Afterwards, for the remaining parameters that<br>
// might reference the hoisted loads. Finally, build the runtime check<br>
<br>
Added: polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PerfMonitor.cpp?rev=299359&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/lib/<wbr>CodeGen/PerfMonitor.cpp?rev=<wbr>299359&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp (added)<br>
+++ polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp Mon Apr 3 09:55:37 2017<br>
@@ -0,0 +1,235 @@<br>
+//===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//<br>
+//<br>
+// The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+<br>
+#include "polly/CodeGen/PerfMonitor.h"<br>
+#include "polly/CodeGen/<wbr>RuntimeDebugBuilder.h"<br>
+#include "llvm/ADT/Triple.h"<br>
+<br>
+using namespace llvm;<br>
+using namespace polly;<br>
+<br>
+Function *PerfMonitor::getAtExit() {<br>
+ const char *Name = "atexit";<br>
+ Function *F = M->getFunction(Name);<br>
+<br>
+ if (!F) {<br>
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;<br>
+ FunctionType *Ty = FunctionType::get(Builder.<wbr>getInt32Ty(),<br>
+ {Builder.getInt8PtrTy()}, false);<br>
+ F = Function::Create(Ty, Linkage, Name, M);<br>
+ }<br>
+<br>
+ return F;<br>
+}<br>
+<br>
+void PerfMonitor::<wbr>addToGlobalConstructors(<wbr>Function *Fn) {<br>
+ const char *Name = "llvm.global_ctors";<br>
+ GlobalVariable *GV = M->getGlobalVariable(Name);<br>
+ std::vector<Constant *> V;<br>
+<br>
+ if (GV) {<br>
+ Constant *Array = GV->getInitializer();<br>
+ for (Value *X : Array->operand_values())<br>
+ V.push_back(cast<Constant>(X))<wbr>;<br>
+ GV->eraseFromParent();<br>
+ }<br>
+<br>
+ StructType *ST = StructType::get(Builder.<wbr>getInt32Ty(), Fn->getType(),<br>
+ Builder.getInt8PtrTy(), nullptr);<br>
+<br>
+ V.push_back(ConstantStruct::<wbr>get(<br>
+ ST, Builder.getInt32(10), Fn,<br>
+ ConstantPointerNull::get(<wbr>Builder.getInt8PtrTy()), nullptr));<br>
+ ArrayType *Ty = ArrayType::get(ST, V.size());<br>
+<br>
+ GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,<br>
+ ConstantArray::get(Ty, V), Name, nullptr,<br>
+ GlobalVariable::<wbr>NotThreadLocal);<br>
+}<br>
+<br>
+Function *PerfMonitor::getRDTSCP() {<br>
+ const char *Name = "llvm.x86.rdtscp";<br>
+ Function *F = M->getFunction(Name);<br></blockquote><div>For intrinsics, we better use "auto *F = Intrinsics::get(M, Intrinsics::x86_rdtscp);"</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+ if (!F) {<br>
+ GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;<br>
+ FunctionType *Ty = FunctionType::get(Builder.<wbr>getInt64Ty(),<br>
+ {Builder.getInt8PtrTy()}, false);<br>
+ F = Function::Create(Ty, Linkage, Name, M);<br>
+ }<br>
+<br>
+ return F;<br>
+}<br>
+<br>
+PerfMonitor::PerfMonitor(<wbr>Module *M) : M(M), Builder(M->getContext()) {<br>
+ if (Triple(M->getTargetTriple()).<wbr>getArch() == llvm::Triple::x86_64)<br>
+ Supported = true;<br>
+ else<br>
+ Supported = false;<br>
+}<br>
+<br>
+void PerfMonitor::<wbr>addGlobalVariables() {<br>
+ auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue,<br>
+ Value **Location) {<br>
+ *Location = M->getGlobalVariable(Name);<br>
+<br>
+ if (!*Location)<br>
+ *Location = new GlobalVariable(<br>
+ *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,<br>
+ InitialValue, Name, nullptr, GlobalVariable::<wbr>InitialExecTLSModel);<br>
+ };<br>
+<br>
+ TryRegisterGlobal("__polly_<wbr>perf_cycles_total_start", Builder.getInt64(0),<br>
+ &CyclesTotalStartPtr);<br>
+<br>
+ TryRegisterGlobal("__polly_<wbr>perf_initialized", Builder.getInt1(0),<br>
+ &AlreadyInitializedPtr);<br>
+<br>
+ TryRegisterGlobal("__polly_<wbr>perf_cycles_in_scops", Builder.getInt64(0),<br>
+ &CyclesInScopsPtr);<br>
+<br>
+ TryRegisterGlobal("__polly_<wbr>perf_cycles_in_scop_start", Builder.getInt64(0),<br>
+ &CyclesInScopStartPtr);<br>
+<br>
+ TryRegisterGlobal("__polly_<wbr>perf_write_loation", Builder.getInt32(0),<br>
+ &RDTSCPWriteLocation);<br>
+}<br>
+<br>
+static const char *InitFunctionName = "__polly_perf_init";<br>
+static const char *FinalReportingFunctionName = "__polly_perf_final";<br>
+<br>
+Function *PerfMonitor::<wbr>insertFinalReporting() {<br>
+ // Create new function.<br>
+ GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;<br>
+ FunctionType *Ty = FunctionType::get(Builder.<wbr>getVoidTy(), {}, false);<br>
+ Function *ExitFn =<br>
+ Function::Create(Ty, Linkage, FinalReportingFunctionName, M);<br>
+ BasicBlock *Start = BasicBlock::Create(M-><wbr>getContext(), "start", ExitFn);<br>
+ Builder.SetInsertPoint(Start);<br>
+<br>
+ if (!Supported) {<br>
+ RuntimeDebugBuilder::<wbr>createCPUPrinter(<br>
+ Builder, "Polly runtime information generation not supported\n");<br>
+ Builder.CreateRetVoid();<br>
+ return ExitFn;<br>
+ }<br>
+<br>
+ // Measure current cycles and compute final timings.<br>
+ Function *RDTSCPFn = getRDTSCP();<br>
+ Value *CurrentCycles = Builder.CreateCall(<br>
+ RDTSCPFn,<br>
+ Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+ Value *CyclesStart = Builder.CreateLoad(<wbr>CyclesTotalStartPtr, true);<br>
+ Value *CyclesTotal = Builder.CreateSub(<wbr>CurrentCycles, CyclesStart);<br>
+ Value *CyclesInScops = Builder.CreateLoad(<wbr>CyclesInScopsPtr, true);<br>
+<br>
+ // Print the runtime information.<br>
+ RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "Polly runtime information\n");<br>
+ RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "-------------------------\n")<wbr>;<br>
+ RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");<br>
+ RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "Scops: ", CyclesInScops,<br>
+ "\n");<br>
+<br>
+ // Finalize function.<br>
+ Builder.CreateRetVoid();<br>
+ return ExitFn;<br>
+}<br>
+<br>
+void PerfMonitor::initialize() {<br>
+ addGlobalVariables();<br>
+<br>
+ Function *F = M->getFunction(<wbr>InitFunctionName);<br>
+ if (F)<br>
+ return;<br>
+<br>
+ // initialize<br>
+ Function *FinalReporting = insertFinalReporting();<br>
+ Function *InitFn = insertInitFunction(<wbr>FinalReporting);<br>
+ addToGlobalConstructors(<wbr>InitFn);<br>
+}<br>
+<br>
+Function *PerfMonitor::<wbr>insertInitFunction(Function *FinalReporting) {<br>
+ // Insert function definition and BBs.<br>
+ GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;<br>
+ FunctionType *Ty = FunctionType::get(Builder.<wbr>getVoidTy(), {}, false);<br>
+ Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);<br>
+ BasicBlock *Start = BasicBlock::Create(M-><wbr>getContext(), "start", InitFn);<br>
+ BasicBlock *EarlyReturn =<br>
+ BasicBlock::Create(M-><wbr>getContext(), "earlyreturn", InitFn);<br>
+ BasicBlock *InitBB = BasicBlock::Create(M-><wbr>getContext(), "initbb", InitFn);<br>
+<br>
+ Builder.SetInsertPoint(Start);<br>
+<br>
+ // Check if this function was already run. If yes, return.<br>
+ //<br>
+ // In case profiling has been enabled in multiple translation units, the<br>
+ // initializer function will be added to the global constructors list of<br>
+ // each translation unit. When merging translation units, the global<br>
+ // constructor lists are just appended, such that the initializer will appear<br>
+ // multiple times. To avoid initializations being run multiple times (and<br>
+ // especially to avoid that atExitFn is called more than once), we bail<br>
+ // out if the intializer is run more than once.<br>
+ Value *HasRunBefore = Builder.CreateLoad(<wbr>AlreadyInitializedPtr);<br>
+ Builder.CreateCondBr(<wbr>HasRunBefore, EarlyReturn, InitBB);<br>
+ Builder.SetInsertPoint(<wbr>EarlyReturn);<br>
+ Builder.CreateRetVoid();<br>
+<br>
+ // Keep track that this function has been run once.<br>
+ Builder.SetInsertPoint(InitBB)<wbr>;<br>
+ Value *True = Builder.getInt1(true);<br>
+ Builder.CreateStore(True, AlreadyInitializedPtr);<br>
+<br>
+ // Register the final reporting function with atexit().<br>
+ Value *FinalReportingPtr =<br>
+ Builder.CreatePointerCast(<wbr>FinalReporting, Builder.getInt8PtrTy());<br>
+ Function *AtExitFn = getAtExit();<br>
+ Builder.CreateCall(AtExitFn, {FinalReportingPtr});<br>
+<br>
+ if (Supported) {<br>
+ // Read the currently cycle counter and store the result for later.<br>
+ Function *RDTSCPFn = getRDTSCP();<br>
+ Value *CurrentCycles = Builder.CreateCall(<br>
+ RDTSCPFn,<br>
+ Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+ Builder.CreateStore(<wbr>CurrentCycles, CyclesTotalStartPtr, true);<br>
+ }<br>
+ Builder.CreateRetVoid();<br>
+<br>
+ return InitFn;<br>
+}<br>
+<br>
+void PerfMonitor::<wbr>insertRegionStart(Instruction *InsertBefore) {<br>
+ if (!Supported)<br>
+ return;<br>
+<br>
+ Builder.SetInsertPoint(<wbr>InsertBefore);<br>
+ Function *RDTSCPFn = getRDTSCP();<br>
+ Value *CurrentCycles = Builder.CreateCall(<br>
+ RDTSCPFn,<br>
+ Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+ Builder.CreateStore(<wbr>CurrentCycles, CyclesInScopStartPtr, true);<br>
+}<br>
+<br>
+void PerfMonitor::insertRegionEnd(<wbr>Instruction *InsertBefore) {<br>
+ if (!Supported)<br>
+ return;<br>
+<br>
+ Builder.SetInsertPoint(<wbr>InsertBefore);<br>
+ Function *RDTSCPFn = getRDTSCP();<br>
+ LoadInst *CyclesStart = Builder.CreateLoad(<wbr>CyclesInScopStartPtr, true);<br>
+ Value *CurrentCycles = Builder.CreateCall(<br>
+ RDTSCPFn,<br>
+ Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+ Value *CyclesInScop = Builder.CreateSub(<wbr>CurrentCycles, CyclesStart);<br>
+ Value *CyclesInScops = Builder.CreateLoad(<wbr>CyclesInScopsPtr, true);<br>
+ CyclesInScops = Builder.CreateAdd(<wbr>CyclesInScops, CyclesInScop);<br>
+ Builder.CreateStore(<wbr>CyclesInScops, CyclesInScopsPtr, true);<br>
+}<br>
<br>
Added: polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring.ll?rev=299359&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/test/Isl/<wbr>CodeGen/perf_monitoring.ll?<wbr>rev=299359&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll (added)<br>
+++ polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll Mon Apr 3 09:55:37 2017<br>
@@ -0,0 +1,87 @@<br>
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \<br>
+; RUN: -S < %s | FileCheck %s<br>
+<br>
+; void f(long A[], long N) {<br>
+; long i;<br>
+; if (true)<br>
+; for (i = 0; i < N; ++i)<br>
+; A[i] = i;<br>
+; }<br>
+<br>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-<wbr>i16:16:16-i32:32:32-i64:64:64-<wbr>f32:32:32-f64:64:64-v64:64:64-<wbr>v128:128:128-a0:0:64-s0:64:64-<wbr>f80:128:128"<br>
+target triple = "x86_64-unknown-linux-gnu"<br>
+<br>
+define void @f(i64* %A, i64 %N) nounwind {<br>
+entry:<br>
+ fence seq_cst<br>
+ br label %next<br>
+<br>
+next:<br>
+ br i1 true, label %for.i, label %return<br>
+<br>
+for.i:<br>
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]<br>
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar<br>
+ store i64 %indvar, i64* %scevgep<br>
+ %indvar.next = add nsw i64 %indvar, 1<br>
+ %exitcond = icmp eq i64 %indvar.next, %N<br>
+ br i1 %exitcond, label %return, label %for.i<br>
+<br>
+return:<br>
+ fence seq_cst<br>
+ ret void<br>
+}<br>
+<br>
+; CHECK: @__polly_perf_cycles_total_<wbr>start = weak thread_local(initialexec) constant i64 0<br>
+; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false<br>
+; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0<br>
+; CHECK-NEXT: @__polly_perf_cycles_in_scop_<wbr>start = weak thread_local(initialexec) constant i64 0<br>
+; CHECK-NEXT: @__polly_perf_write_loation = weak thread_local(initialexec) constant i32 0<br>
+<br>
+; CHECK: polly.split_new_and_old: ; preds = %entry<br>
+; CHECK-NEXT: %0 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT: store volatile i64 %0, i64* @__polly_perf_cycles_in_scop_<wbr>start<br>
+<br>
+; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting<br>
+; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_<wbr>start<br>
+; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT: %7 = sub i64 %6, %5<br>
+; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops<br>
+; CHECK-NEXT: %9 = add i64 %8, %7<br>
+; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops<br>
+; CHECK-NEXT: br label %return<br>
+<br>
+<br>
+; CHECK: define weak_odr void @__polly_perf_final() {<br>
+; CHECK-NEXT: start:<br>
+; CHECK-NEXT: %0 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT: %1 = load volatile i64, i64* @__polly_perf_cycles_total_<wbr>start<br>
+; CHECK-NEXT: %2 = sub i64 %0, %1<br>
+; CHECK-NEXT: %3 = load volatile i64, i64* @__polly_perf_cycles_in_scops<br>
+; CHECK-NEXT: %4 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @1, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @0, i32 0, i32 0))<br>
+; CHECK-NEXT: %5 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT: %6 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @3, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @2, i32 0, i32 0))<br>
+; CHECK-NEXT: %7 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT: %8 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @6, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @4, i32 0, i32 0), i64 %2, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @5, i32 0, i32 0))<br>
+; CHECK-NEXT: %9 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT: %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))<br>
+; CHECK-NEXT: %11 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT: ret void<br>
+; CHECK-NEXT: }<br>
+<br>
+<br>
+; CHECK: define weak_odr void @__polly_perf_init() {<br>
+; CHECK-NEXT: start:<br>
+; CHECK-NEXT: %0 = load i1, i1* @__polly_perf_initialized<br>
+; CHECK-NEXT: br i1 %0, label %earlyreturn, label %initbb<br>
+<br>
+; CHECK: earlyreturn: ; preds = %start<br>
+; CHECK-NEXT: ret void<br>
+<br>
+; CHECK: initbb: ; preds = %start<br>
+; CHECK-NEXT: store i1 true, i1* @__polly_perf_initialized<br>
+; CHECK-NEXT: %1 = call i32 @atexit(i8* bitcast (void ()* @__polly_perf_final to i8*))<br>
+; CHECK-NEXT: %2 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT: store volatile i64 %2, i64* @__polly_perf_cycles_total_<wbr>start<br>
+; CHECK-NEXT: ret void<br>
+; CHECK-NEXT: }<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>