<div dir="ltr"><br><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Apr 3, 2017 at 7:55 AM, Tobias Grosser via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">Author: grosser<br>
Date: Mon Apr  3 09:55:37 2017<br>
New Revision: 299359<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=299359&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=299359&view=rev</a><br>
Log:<br>
[CodeGen] Add Performance Monitor<br>
<br>
Add support for -polly-codegen-perf-<wbr>monitoring. When performance monitoring<br>
is enabled, we emit performance monitoring code during code generation that<br>
prints after program exit statistics about the total number of cycles executed<br>
as well as the number of cycles spent in scops. This gives an estimate on how<br>
useful polyhedral optimizations might be for a given program.<br>
<br>
Example output:<br>
<br>
  Polly runtime information<br>
  -------------------------<br>
  Total: 783110081637<br>
  Scops: 663718949365<br>
<br>
In the future, we might also add functionality to measure how much time is spent<br>
in optimized scops and how many cycles are spent in the fallback code.<br>
<br>
Reviewers: bollu,sebpop<br>
<br>
Tags: #polly<br>
<br>
Differential Revision: <a href="https://reviews.llvm.org/D31599" rel="noreferrer" target="_blank">https://reviews.llvm.org/<wbr>D31599</a><br>
<br>
Added:<br>
    polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h<br>
    polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp<br>
    polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll<br>
Modified:<br>
    polly/trunk/lib/CMakeLists.txt<br>
    polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp<br>
<br>
Added: polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PerfMonitor.h?rev=299359&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/include/<wbr>polly/CodeGen/PerfMonitor.h?<wbr>rev=299359&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h (added)<br>
+++ polly/trunk/include/polly/<wbr>CodeGen/PerfMonitor.h Mon Apr  3 09:55:37 2017<br>
@@ -0,0 +1,132 @@<br>
+//===--- PerfMonitor.h --- Monitor time spent in scops --------------------===//<br>
+//<br>
+//                     The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+<br>
+#ifndef PERF_MONITOR_H<br>
+#define PERF_MONITOR_H<br>
+<br>
+#include "polly/CodeGen/IRBuilder.h"<br>
+<br>
+namespace llvm {<br>
+class Function;<br>
+class Module;<br>
+class Value;<br>
+class Instruction;<br>
+} // namespace llvm<br>
+<br>
+namespace polly {<br>
+<br>
+class PerfMonitor {<br>
+public:<br>
+  /// Create a new performance monitor.<br>
+  ///<br>
+  /// @param M The module for which to generate the performance monitor.<br>
+  PerfMonitor(llvm::Module *M);<br>
+<br>
+  /// Initialize the performance monitor.<br>
+  ///<br>
+  /// Ensure that all global variables, functions, and callbacks needed to<br>
+  /// manage the performance monitor are initialized and registered.<br>
+  void initialize();<br>
+<br>
+  /// Mark the beginning of a timing region.<br>
+  ///<br>
+  /// @param InsertBefore The instruction before which the timing region starts.<br>
+  void insertRegionStart(llvm::<wbr>Instruction *InserBefore);<br>
+<br>
+  /// Mark the end of a timing region.<br>
+  ///<br>
+  /// @param InsertBefore The instruction before which the timing region starts.<br>
+  void insertRegionEnd(llvm::<wbr>Instruction *InsertBefore);<br>
+<br>
+private:<br>
+  llvm::Module *M;<br>
+  PollyIRBuilder Builder;<br>
+<br>
+  /// Indicates if performance profiling is supported on this architecture.<br>
+  bool Supported;<br>
+<br>
+  /// The cycle counter at the beginning of the program execution.<br>
+  llvm::Value *CyclesTotalStartPtr;<br>
+<br>
+  /// The total number of cycles spent within scops.<br>
+  llvm::Value *CyclesInScopsPtr;<br>
+<br>
+  /// The value of the cycle counter at the beginning of the last scop.<br>
+  llvm::Value *CyclesInScopStartPtr;<br>
+<br>
+  /// A memory location which serves as argument of the RDTSCP function.<br>
+  ///<br>
+  /// The value written to this location is currently not used.<br>
+  llvm::Value *RDTSCPWriteLocation;<br>
+<br>
+  /// A global variable, that keeps track if the performance monitor<br>
+  /// initialization has already been run.<br>
+  llvm::Value *AlreadyInitializedPtr;<br>
+<br>
+  llvm::Function *insertInitFunction(llvm::<wbr>Function *FinalReporting);<br>
+<br>
+  /// Add Function @p to list of global constructors<br>
+  ///<br>
+  /// If no global constructors are available in this current module, insert<br>
+  /// a new list of global constructors containing @p Fn as only global<br>
+  /// constructor. Otherwise, append @p Fn to the list of global constructors.<br>
+  ///<br>
+  /// All functions listed as global constructors are executed before the<br>
+  /// main() function is called.<br>
+  ///<br>
+  /// @param Fn Function to add to global constructors<br>
+  void addToGlobalConstructors(llvm::<wbr>Function *Fn);<br>
+<br>
+  /// Add global variables to module.<br>
+  ///<br>
+  /// Insert a set of global variables that are used to track performance,<br>
+  /// into the module (or obtain references to them if they already exist).<br>
+  void addGlobalVariables();<br>
+<br>
+  /// Get a reference to the intrinsic "i64 @llvm.x86.rdtscp(i8*)".<br>
+  ///<br>
+  /// The rdtscp function returns the current value of the processor's<br>
+  /// time-stamp counter as well as the current CPU identifier. On modern x86<br>
+  /// systems, the returned value is independent of the dynamic clock frequency<br>
+  /// and consistent across multiple cores. It can consequently be used to get<br>
+  /// accurate and low-overhead timing information. Even though the counter is<br>
+  /// wrapping, it can be reliably used even for measuring longer time<br>
+  /// intervals, as on a 1 GHz processor the counter only wraps every 545 years.<br>
+  ///<br>
+  /// The RDTSCP instruction is "pseudo" serializing:<br>
+  ///<br>
+  /// "“The RDTSCP instruction waits until all previous instructions have been<br>
+  /// executed before reading the counter. However, subsequent instructions may<br>
+  /// begin execution before the read operation is performed.â€<br>
+  ///<br>
+  /// To ensure that no later instructions are scheduled before the RDTSCP<br>
+  /// instruction it is often recommended to schedule a cpuid call after the<br>
+  /// RDTSCP instruction. We do not do this yet, trading some imprecision in<br>
+  /// our timing for a reduced overhead in our timing.<br>
+  ///<br>
+  /// @returns A reference to the declaration of @llvm.x86.rdtscp.<br>
+  llvm::Function *getRDTSCP();<br>
+<br>
+  /// Get a reference to "int atexit(void (*function)(void))" function.<br>
+  ///<br>
+  /// This function allows to register function pointers that must be executed<br>
+  /// when the program is terminated.<br>
+  ///<br>
+  /// @returns A reference to @atexit().<br>
+  llvm::Function *getAtExit();<br>
+<br>
+  /// Create function "__polly_perf_final_reporting"<wbr>.<br>
+  ///<br>
+  /// This function finalizes the performance measurements and prints the<br>
+  /// results to stdout. It is expected to be registered with 'atexit()'.<br>
+  llvm::Function *insertFinalReporting();<br>
+};<br>
+} // namespace polly<br>
+<br>
+#endif<br>
<br>
Modified: polly/trunk/lib/CMakeLists.txt<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CMakeLists.txt?rev=299359&r1=299358&r2=299359&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/lib/<wbr>CMakeLists.txt?rev=299359&r1=<wbr>299358&r2=299359&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/lib/CMakeLists.txt (original)<br>
+++ polly/trunk/lib/CMakeLists.txt Mon Apr  3 09:55:37 2017<br>
@@ -43,6 +43,7 @@ add_polly_library(Polly<br>
   CodeGen/Utils.cpp<br>
   CodeGen/RuntimeDebugBuilder.<wbr>cpp<br>
   CodeGen/CodegenCleanup.cpp<br>
+  CodeGen/PerfMonitor.cpp<br>
   ${GPGPU_CODEGEN_FILES}<br>
   Exchange/JSONExporter.cpp<br>
   Support/GICHelper.cpp<br>
<br>
Modified: polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/CodeGeneration.cpp?rev=299359&r1=299358&r2=299359&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/lib/<wbr>CodeGen/CodeGeneration.cpp?<wbr>rev=299359&r1=299358&r2=<wbr>299359&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp (original)<br>
+++ polly/trunk/lib/CodeGen/<wbr>CodeGeneration.cpp Mon Apr  3 09:55:37 2017<br>
@@ -21,6 +21,7 @@<br>
<br>
 #include "polly/CodeGen/IslAst.h"<br>
 #include "polly/CodeGen/IslNodeBuilder.<wbr>h"<br>
+#include "polly/CodeGen/PerfMonitor.h"<br>
 #include "polly/CodeGen/Utils.h"<br>
 #include "polly/DependenceInfo.h"<br>
 #include "polly/LinkAllPasses.h"<br>
@@ -45,6 +46,11 @@ static cl::opt<bool> Verify("polly-codeg<br>
                             cl::Hidden, cl::init(true), cl::ZeroOrMore,<br>
                             cl::cat(PollyCategory));<br>
<br>
+static cl::opt<bool><br>
+    PerfMonitoring("polly-codegen-<wbr>perf-monitoring",<br>
+                   cl::desc("Add run-time performance monitoring"), cl::Hidden,<br>
+                   cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory));<br>
+<br>
 namespace {<br>
 class CodeGeneration : public ScopPass {<br>
 public:<br>
@@ -145,6 +151,18 @@ public:<br>
     IslNodeBuilder NodeBuilder(Builder, Annotator, this, *DL, *LI, *SE, *DT, S,<br>
                                StartBlock);<br>
<br>
+    if (PerfMonitoring) {<br>
+      PerfMonitor P(EnteringBB->getParent()-><wbr>getParent());<br>
+      P.initialize();<br>
+      P.insertRegionStart(<wbr>SplitBlock->getTerminator());<br>
+<br>
+      BasicBlock *MergeBlock = SplitBlock->getTerminator()<br>
+                                   ->getSuccessor(0)<br>
+                                   ->getUniqueSuccessor()<br>
+                                   ->getUniqueSuccessor();<br>
+      P.insertRegionEnd(MergeBlock-><wbr>getTerminator());<br>
+    }<br>
+<br>
     // First generate code for the hoisted invariant loads and transitively the<br>
     // parameters they reference. Afterwards, for the remaining parameters that<br>
     // might reference the hoisted loads. Finally, build the runtime check<br>
<br>
Added: polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PerfMonitor.cpp?rev=299359&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/lib/<wbr>CodeGen/PerfMonitor.cpp?rev=<wbr>299359&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp (added)<br>
+++ polly/trunk/lib/CodeGen/<wbr>PerfMonitor.cpp Mon Apr  3 09:55:37 2017<br>
@@ -0,0 +1,235 @@<br>
+//===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//<br>
+//<br>
+//                     The LLVM Compiler Infrastructure<br>
+//<br>
+// This file is distributed under the University of Illinois Open Source<br>
+// License. See LICENSE.TXT for details.<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+//<br>
+//===------------------------<wbr>------------------------------<wbr>----------------===//<br>
+<br>
+#include "polly/CodeGen/PerfMonitor.h"<br>
+#include "polly/CodeGen/<wbr>RuntimeDebugBuilder.h"<br>
+#include "llvm/ADT/Triple.h"<br>
+<br>
+using namespace llvm;<br>
+using namespace polly;<br>
+<br>
+Function *PerfMonitor::getAtExit() {<br>
+  const char *Name = "atexit";<br>
+  Function *F = M->getFunction(Name);<br>
+<br>
+  if (!F) {<br>
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;<br>
+    FunctionType *Ty = FunctionType::get(Builder.<wbr>getInt32Ty(),<br>
+                                         {Builder.getInt8PtrTy()}, false);<br>
+    F = Function::Create(Ty, Linkage, Name, M);<br>
+  }<br>
+<br>
+  return F;<br>
+}<br>
+<br>
+void PerfMonitor::<wbr>addToGlobalConstructors(<wbr>Function *Fn) {<br>
+  const char *Name = "llvm.global_ctors";<br>
+  GlobalVariable *GV = M->getGlobalVariable(Name);<br>
+  std::vector<Constant *> V;<br>
+<br>
+  if (GV) {<br>
+    Constant *Array = GV->getInitializer();<br>
+    for (Value *X : Array->operand_values())<br>
+      V.push_back(cast<Constant>(X))<wbr>;<br>
+    GV->eraseFromParent();<br>
+  }<br>
+<br>
+  StructType *ST = StructType::get(Builder.<wbr>getInt32Ty(), Fn->getType(),<br>
+                                   Builder.getInt8PtrTy(), nullptr);<br>
+<br>
+  V.push_back(ConstantStruct::<wbr>get(<br>
+      ST, Builder.getInt32(10), Fn,<br>
+      ConstantPointerNull::get(<wbr>Builder.getInt8PtrTy()), nullptr));<br>
+  ArrayType *Ty = ArrayType::get(ST, V.size());<br>
+<br>
+  GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,<br>
+                          ConstantArray::get(Ty, V), Name, nullptr,<br>
+                          GlobalVariable::<wbr>NotThreadLocal);<br>
+}<br>
+<br>
+Function *PerfMonitor::getRDTSCP() {<br>
+  const char *Name = "llvm.x86.rdtscp";<br>
+  Function *F = M->getFunction(Name);<br></blockquote><div>For intrinsics, we better use "auto *F = Intrinsics::get(M, Intrinsics::x86_rdtscp);"</div><div> </div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">
+<br>
+  if (!F) {<br>
+    GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;<br>
+    FunctionType *Ty = FunctionType::get(Builder.<wbr>getInt64Ty(),<br>
+                                         {Builder.getInt8PtrTy()}, false);<br>
+    F = Function::Create(Ty, Linkage, Name, M);<br>
+  }<br>
+<br>
+  return F;<br>
+}<br>
+<br>
+PerfMonitor::PerfMonitor(<wbr>Module *M) : M(M), Builder(M->getContext()) {<br>
+  if (Triple(M->getTargetTriple()).<wbr>getArch() == llvm::Triple::x86_64)<br>
+    Supported = true;<br>
+  else<br>
+    Supported = false;<br>
+}<br>
+<br>
+void PerfMonitor::<wbr>addGlobalVariables() {<br>
+  auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue,<br>
+                               Value **Location) {<br>
+    *Location = M->getGlobalVariable(Name);<br>
+<br>
+    if (!*Location)<br>
+      *Location = new GlobalVariable(<br>
+          *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,<br>
+          InitialValue, Name, nullptr, GlobalVariable::<wbr>InitialExecTLSModel);<br>
+  };<br>
+<br>
+  TryRegisterGlobal("__polly_<wbr>perf_cycles_total_start", Builder.getInt64(0),<br>
+                    &CyclesTotalStartPtr);<br>
+<br>
+  TryRegisterGlobal("__polly_<wbr>perf_initialized", Builder.getInt1(0),<br>
+                    &AlreadyInitializedPtr);<br>
+<br>
+  TryRegisterGlobal("__polly_<wbr>perf_cycles_in_scops", Builder.getInt64(0),<br>
+                    &CyclesInScopsPtr);<br>
+<br>
+  TryRegisterGlobal("__polly_<wbr>perf_cycles_in_scop_start", Builder.getInt64(0),<br>
+                    &CyclesInScopStartPtr);<br>
+<br>
+  TryRegisterGlobal("__polly_<wbr>perf_write_loation", Builder.getInt32(0),<br>
+                    &RDTSCPWriteLocation);<br>
+}<br>
+<br>
+static const char *InitFunctionName = "__polly_perf_init";<br>
+static const char *FinalReportingFunctionName = "__polly_perf_final";<br>
+<br>
+Function *PerfMonitor::<wbr>insertFinalReporting() {<br>
+  // Create new function.<br>
+  GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;<br>
+  FunctionType *Ty = FunctionType::get(Builder.<wbr>getVoidTy(), {}, false);<br>
+  Function *ExitFn =<br>
+      Function::Create(Ty, Linkage, FinalReportingFunctionName, M);<br>
+  BasicBlock *Start = BasicBlock::Create(M-><wbr>getContext(), "start", ExitFn);<br>
+  Builder.SetInsertPoint(Start);<br>
+<br>
+  if (!Supported) {<br>
+    RuntimeDebugBuilder::<wbr>createCPUPrinter(<br>
+        Builder, "Polly runtime information generation not supported\n");<br>
+    Builder.CreateRetVoid();<br>
+    return ExitFn;<br>
+  }<br>
+<br>
+  // Measure current cycles and compute final timings.<br>
+  Function *RDTSCPFn = getRDTSCP();<br>
+  Value *CurrentCycles = Builder.CreateCall(<br>
+      RDTSCPFn,<br>
+      Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+  Value *CyclesStart = Builder.CreateLoad(<wbr>CyclesTotalStartPtr, true);<br>
+  Value *CyclesTotal = Builder.CreateSub(<wbr>CurrentCycles, CyclesStart);<br>
+  Value *CyclesInScops = Builder.CreateLoad(<wbr>CyclesInScopsPtr, true);<br>
+<br>
+  // Print the runtime information.<br>
+  RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "Polly runtime information\n");<br>
+  RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "-------------------------\n")<wbr>;<br>
+  RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");<br>
+  RuntimeDebugBuilder::<wbr>createCPUPrinter(Builder, "Scops: ", CyclesInScops,<br>
+                                        "\n");<br>
+<br>
+  // Finalize function.<br>
+  Builder.CreateRetVoid();<br>
+  return ExitFn;<br>
+}<br>
+<br>
+void PerfMonitor::initialize() {<br>
+  addGlobalVariables();<br>
+<br>
+  Function *F = M->getFunction(<wbr>InitFunctionName);<br>
+  if (F)<br>
+    return;<br>
+<br>
+  // initialize<br>
+  Function *FinalReporting = insertFinalReporting();<br>
+  Function *InitFn = insertInitFunction(<wbr>FinalReporting);<br>
+  addToGlobalConstructors(<wbr>InitFn);<br>
+}<br>
+<br>
+Function *PerfMonitor::<wbr>insertInitFunction(Function *FinalReporting) {<br>
+  // Insert function definition and BBs.<br>
+  GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;<br>
+  FunctionType *Ty = FunctionType::get(Builder.<wbr>getVoidTy(), {}, false);<br>
+  Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);<br>
+  BasicBlock *Start = BasicBlock::Create(M-><wbr>getContext(), "start", InitFn);<br>
+  BasicBlock *EarlyReturn =<br>
+      BasicBlock::Create(M-><wbr>getContext(), "earlyreturn", InitFn);<br>
+  BasicBlock *InitBB = BasicBlock::Create(M-><wbr>getContext(), "initbb", InitFn);<br>
+<br>
+  Builder.SetInsertPoint(Start);<br>
+<br>
+  // Check if this function was already run. If yes, return.<br>
+  //<br>
+  // In case profiling has been enabled in multiple translation units, the<br>
+  // initializer function will be added to the global constructors list of<br>
+  // each translation unit. When merging translation units, the global<br>
+  // constructor lists are just appended, such that the initializer will appear<br>
+  // multiple times. To avoid initializations being run multiple times (and<br>
+  // especially to avoid that atExitFn is called more than once), we bail<br>
+  // out if the intializer is run more than once.<br>
+  Value *HasRunBefore = Builder.CreateLoad(<wbr>AlreadyInitializedPtr);<br>
+  Builder.CreateCondBr(<wbr>HasRunBefore, EarlyReturn, InitBB);<br>
+  Builder.SetInsertPoint(<wbr>EarlyReturn);<br>
+  Builder.CreateRetVoid();<br>
+<br>
+  // Keep track that this function has been run once.<br>
+  Builder.SetInsertPoint(InitBB)<wbr>;<br>
+  Value *True = Builder.getInt1(true);<br>
+  Builder.CreateStore(True, AlreadyInitializedPtr);<br>
+<br>
+  // Register the final reporting function with atexit().<br>
+  Value *FinalReportingPtr =<br>
+      Builder.CreatePointerCast(<wbr>FinalReporting, Builder.getInt8PtrTy());<br>
+  Function *AtExitFn = getAtExit();<br>
+  Builder.CreateCall(AtExitFn, {FinalReportingPtr});<br>
+<br>
+  if (Supported) {<br>
+    // Read the currently cycle counter and store the result for later.<br>
+    Function *RDTSCPFn = getRDTSCP();<br>
+    Value *CurrentCycles = Builder.CreateCall(<br>
+        RDTSCPFn,<br>
+        Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+    Builder.CreateStore(<wbr>CurrentCycles, CyclesTotalStartPtr, true);<br>
+  }<br>
+  Builder.CreateRetVoid();<br>
+<br>
+  return InitFn;<br>
+}<br>
+<br>
+void PerfMonitor::<wbr>insertRegionStart(Instruction *InsertBefore) {<br>
+  if (!Supported)<br>
+    return;<br>
+<br>
+  Builder.SetInsertPoint(<wbr>InsertBefore);<br>
+  Function *RDTSCPFn = getRDTSCP();<br>
+  Value *CurrentCycles = Builder.CreateCall(<br>
+      RDTSCPFn,<br>
+      Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+  Builder.CreateStore(<wbr>CurrentCycles, CyclesInScopStartPtr, true);<br>
+}<br>
+<br>
+void PerfMonitor::insertRegionEnd(<wbr>Instruction *InsertBefore) {<br>
+  if (!Supported)<br>
+    return;<br>
+<br>
+  Builder.SetInsertPoint(<wbr>InsertBefore);<br>
+  Function *RDTSCPFn = getRDTSCP();<br>
+  LoadInst *CyclesStart = Builder.CreateLoad(<wbr>CyclesInScopStartPtr, true);<br>
+  Value *CurrentCycles = Builder.CreateCall(<br>
+      RDTSCPFn,<br>
+      Builder.CreatePointerCast(<wbr>RDTSCPWriteLocation, Builder.getInt8PtrTy()));<br>
+  Value *CyclesInScop = Builder.CreateSub(<wbr>CurrentCycles, CyclesStart);<br>
+  Value *CyclesInScops = Builder.CreateLoad(<wbr>CyclesInScopsPtr, true);<br>
+  CyclesInScops = Builder.CreateAdd(<wbr>CyclesInScops, CyclesInScop);<br>
+  Builder.CreateStore(<wbr>CyclesInScops, CyclesInScopsPtr, true);<br>
+}<br>
<br>
Added: polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring.ll?rev=299359&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/polly/trunk/test/Isl/<wbr>CodeGen/perf_monitoring.ll?<wbr>rev=299359&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll (added)<br>
+++ polly/trunk/test/Isl/CodeGen/<wbr>perf_monitoring.ll Mon Apr  3 09:55:37 2017<br>
@@ -0,0 +1,87 @@<br>
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \<br>
+; RUN:   -S < %s | FileCheck %s<br>
+<br>
+; void f(long A[], long N) {<br>
+;   long i;<br>
+;   if (true)<br>
+;     for (i = 0; i < N; ++i)<br>
+;       A[i] = i;<br>
+; }<br>
+<br>
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-<wbr>i16:16:16-i32:32:32-i64:64:64-<wbr>f32:32:32-f64:64:64-v64:64:64-<wbr>v128:128:128-a0:0:64-s0:64:64-<wbr>f80:128:128"<br>
+target triple = "x86_64-unknown-linux-gnu"<br>
+<br>
+define void @f(i64* %A, i64 %N) nounwind {<br>
+entry:<br>
+  fence seq_cst<br>
+  br label %next<br>
+<br>
+next:<br>
+  br i1 true, label %for.i, label %return<br>
+<br>
+for.i:<br>
+  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]<br>
+  %scevgep = getelementptr i64, i64* %A, i64 %indvar<br>
+  store i64 %indvar, i64* %scevgep<br>
+  %indvar.next = add nsw i64 %indvar, 1<br>
+  %exitcond = icmp eq i64 %indvar.next, %N<br>
+  br i1 %exitcond, label %return, label %for.i<br>
+<br>
+return:<br>
+  fence seq_cst<br>
+  ret void<br>
+}<br>
+<br>
+; CHECK:      @__polly_perf_cycles_total_<wbr>start = weak thread_local(initialexec) constant i64 0<br>
+; CHECK-NEXT: @__polly_perf_initialized = weak thread_local(initialexec) constant i1 false<br>
+; CHECK-NEXT: @__polly_perf_cycles_in_scops = weak thread_local(initialexec) constant i64 0<br>
+; CHECK-NEXT: @__polly_perf_cycles_in_scop_<wbr>start = weak thread_local(initialexec) constant i64 0<br>
+; CHECK-NEXT: @__polly_perf_write_loation = weak thread_local(initialexec) constant i32 0<br>
+<br>
+; CHECK:      polly.split_new_and_old:                          ; preds = %entry<br>
+; CHECK-NEXT:   %0 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT:   store volatile i64 %0, i64* @__polly_perf_cycles_in_scop_<wbr>start<br>
+<br>
+; CHECK:      polly.merge_new_and_old:                          ; preds = %polly.exiting, %return.region_exiting<br>
+; CHECK-NEXT:   %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_<wbr>start<br>
+; CHECK-NEXT:   %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT:   %7 = sub i64 %6, %5<br>
+; CHECK-NEXT:   %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops<br>
+; CHECK-NEXT:   %9 = add i64 %8, %7<br>
+; CHECK-NEXT:   store volatile i64 %9, i64* @__polly_perf_cycles_in_scops<br>
+; CHECK-NEXT:   br label %return<br>
+<br>
+<br>
+; CHECK:      define weak_odr void @__polly_perf_final() {<br>
+; CHECK-NEXT: start:<br>
+; CHECK-NEXT:   %0 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT:   %1 = load volatile i64, i64* @__polly_perf_cycles_total_<wbr>start<br>
+; CHECK-NEXT:   %2 = sub i64 %0, %1<br>
+; CHECK-NEXT:   %3 = load volatile i64, i64* @__polly_perf_cycles_in_scops<br>
+; CHECK-NEXT:   %4 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @1, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @0, i32 0, i32 0))<br>
+; CHECK-NEXT:   %5 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT:   %6 = call i32 (...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @3, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([27 x i8], [27 x i8] addrspace(4)* @2, i32 0, i32 0))<br>
+; CHECK-NEXT:   %7 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT:   %8 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @6, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @4, i32 0, i32 0), i64 %2, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @5, i32 0, i32 0))<br>
+; CHECK-NEXT:   %9 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT:   %10 = call i32 (...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @9, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([8 x i8], [8 x i8] addrspace(4)* @7, i32 0, i32 0), i64 %3, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @8, i32 0, i32 0))<br>
+; CHECK-NEXT:   %11 = call i32 @fflush(i8* null)<br>
+; CHECK-NEXT:   ret void<br>
+; CHECK-NEXT: }<br>
+<br>
+<br>
+; CHECK:      define weak_odr void @__polly_perf_init() {<br>
+; CHECK-NEXT: start:<br>
+; CHECK-NEXT:   %0 = load i1, i1* @__polly_perf_initialized<br>
+; CHECK-NEXT:   br i1 %0, label %earlyreturn, label %initbb<br>
+<br>
+; CHECK:      earlyreturn:                                      ; preds = %start<br>
+; CHECK-NEXT:   ret void<br>
+<br>
+; CHECK:      initbb:                                           ; preds = %start<br>
+; CHECK-NEXT:   store i1 true, i1* @__polly_perf_initialized<br>
+; CHECK-NEXT:   %1 = call i32 @atexit(i8* bitcast (void ()* @__polly_perf_final to i8*))<br>
+; CHECK-NEXT:   %2 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))<br>
+; CHECK-NEXT:   store volatile i64 %2, i64* @__polly_perf_cycles_total_<wbr>start<br>
+; CHECK-NEXT:   ret void<br>
+; CHECK-NEXT: }<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div></div>