[polly] r304543 - [CodeGen] Track trip counts per-scop for performance measurement.

Siddharth Bhat via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 2 04:36:53 PDT 2017


Author: bollu
Date: Fri Jun  2 06:36:52 2017
New Revision: 304543

URL: http://llvm.org/viewvc/llvm-project?rev=304543&view=rev
Log:
[CodeGen] Track trip counts per-scop for performance measurement.

- Add a counter that is incremented once on exit from a scop.

- Test cases got split into two: one to test the cycles, and another one
to test trip counts.

- Sample output:
```name=sample-output.txt
scop function, entry block name, exit block name, total time, trip count
warmup, %entry.split, %polly.merge_new_and_old, 5180, 1
f, %entry.split, %polly.merge_new_and_old, 409944, 500
g, %entry.split, %polly.merge_new_and_old, 1226, 1
```

Differential Revision: https://reviews.llvm.org/D33822

Added:
    polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
    polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll
Removed:
    polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll
Modified:
    polly/trunk/include/polly/CodeGen/PerfMonitor.h
    polly/trunk/lib/CodeGen/PerfMonitor.cpp

Modified: polly/trunk/include/polly/CodeGen/PerfMonitor.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PerfMonitor.h?rev=304543&r1=304542&r2=304543&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/PerfMonitor.h (original)
+++ polly/trunk/include/polly/CodeGen/PerfMonitor.h Fri Jun  2 06:36:52 2017
@@ -62,6 +62,9 @@ private:
   /// The total number of cycles spent in the current scop S.
   llvm::Value *CyclesInCurrentScopPtr;
 
+  /// The total number of times the current scop S is executed.
+  llvm::Value *TripCountForCurrentScopPtr;
+
   /// The total number of cycles spent within scops.
   llvm::Value *CyclesInScopsPtr;
 

Modified: polly/trunk/lib/CodeGen/PerfMonitor.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PerfMonitor.cpp?rev=304543&r1=304542&r2=304543&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PerfMonitor.cpp (original)
+++ polly/trunk/lib/CodeGen/PerfMonitor.cpp Fri Jun  2 06:36:52 2017
@@ -87,15 +87,18 @@ static std::string GetScopUniqueVarname(
   std::string EntryString, ExitString;
   std::tie(EntryString, ExitString) = S.getEntryExitStr();
 
-  Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
+  Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
        << "_from__" << EntryString << "__to__" << ExitString;
   return Name.str();
 }
 
 void PerfMonitor::addScopCounter() {
   const std::string varname = GetScopUniqueVarname(S);
-  TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
+  TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
                     &CyclesInCurrentScopPtr);
+
+  TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
+                    &TripCountForCurrentScopPtr);
 }
 
 void PerfMonitor::addGlobalVariables() {
@@ -160,7 +163,7 @@ Function *PerfMonitor::insertFinalReport
 
   RuntimeDebugBuilder::createCPUPrinter(
       Builder, "scop function, "
-               "entry block name, exit block name, total time\n");
+               "entry block name, exit block name, total time, trip count\n");
   ReturnFromFinal = Builder.CreateRetVoid();
   return ExitFn;
 }
@@ -179,13 +182,17 @@ void PerfMonitor::AppendScopReporting()
 
   Value *CyclesInCurrentScop =
       Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
+
+  Value *TripCountForCurrentScop =
+      Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
+
   std::string EntryName, ExitName;
   std::tie(EntryName, ExitName) = S.getEntryExitStr();
 
   // print in CSV for easy parsing with other tools.
-  RuntimeDebugBuilder::createCPUPrinter(Builder, S.getFunction().getName(),
-                                        ", ", EntryName, ", ", ExitName, ", ",
-                                        CyclesInCurrentScop, "\n");
+  RuntimeDebugBuilder::createCPUPrinter(
+      Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
+      CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
 
   ReturnFromFinal = Builder.CreateRetVoid();
 }
@@ -288,4 +295,11 @@ void PerfMonitor::insertRegionEnd(Instru
   Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
   CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
   Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
+
+  Value *TripCountForCurrentScop =
+      Builder.CreateLoad(TripCountForCurrentScopPtr, true);
+  TripCountForCurrentScop =
+      Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
+  Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
+                      true);
 }

Added: polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll?rev=304543&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll (added)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll Fri Jun  2 06:36:52 2017
@@ -0,0 +1,75 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN:   -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+;   long i;
+;   if (true)
+;     for (i = 0; i < N; ++i)
+;       A[i] = i;
+; }
+; void g(long A[], long N) {
+;   long i;
+;   if (true)
+;     for (i = 0; i < N; ++i)
+;       A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+  fence seq_cst
+  br label %next
+
+next:
+  br i1 true, label %for.i, label %return
+
+for.i:
+  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+  %scevgep = getelementptr i64, i64* %A, i64 %indvar
+  store i64 %indvar, i64* %scevgep
+  %indvar.next = add nsw i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %N
+  br i1 %exitcond, label %return, label %for.i
+
+return:
+  fence seq_cst
+  ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+  fence seq_cst
+  br label %next
+
+next:
+  br i1 true, label %for.i, label %return
+
+for.i:
+  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+  %scevgep = getelementptr i64, i64* %A, i64 %indvar
+  store i64 %indvar, i64* %scevgep
+  %indvar.next = add nsw i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %N
+  br i1 %exitcond, label %return, label %for.i
+
+return:
+  fence seq_cst
+  ret void
+}
+
+; Declaration of globals - Check for cycles declaration.
+; @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
+; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up number of cycles in f
+; CHECK:      %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
+
+; Bumping up number of cycles in g
+; CHECK:      %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"

Removed: polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll?rev=304542&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll (original)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll (removed)
@@ -1,98 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
-; RUN:   -S < %s | FileCheck %s
-
-; void f(long A[], long N) {
-;   long i;
-;   if (true)
-;     for (i = 0; i < N; ++i)
-;       A[i] = i;
-; }
-; void g(long A[], long N) {
-;   long i;
-;   if (true)
-;     for (i = 0; i < N; ++i)
-;       A[i] = i;
-; }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @f(i64* %A, i64 %N) nounwind {
-entry:
-  fence seq_cst
-  br label %next
-
-next:
-  br i1 true, label %for.i, label %return
-
-for.i:
-  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
-  %scevgep = getelementptr i64, i64* %A, i64 %indvar
-  store i64 %indvar, i64* %scevgep
-  %indvar.next = add nsw i64 %indvar, 1
-  %exitcond = icmp eq i64 %indvar.next, %N
-  br i1 %exitcond, label %return, label %for.i
-
-return:
-  fence seq_cst
-  ret void
-}
-
-
-define void @g(i64* %A, i64 %N) nounwind {
-entry:
-  fence seq_cst
-  br label %next
-
-next:
-  br i1 true, label %for.i, label %return
-
-for.i:
-  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
-  %scevgep = getelementptr i64, i64* %A, i64 %indvar
-  store i64 %indvar, i64* %scevgep
-  %indvar.next = add nsw i64 %indvar, 1
-  %exitcond = icmp eq i64 %indvar.next, %N
-  br i1 %exitcond, label %return, label %for.i
-
-return:
-  fence seq_cst
-  ret void
-}
-
-; Declaration of globals
-; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
-; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
-
-; Bumping up counter in f
-; CHECK:      polly.merge_new_and_old:                          ; preds = %polly.exiting, %return.region_exiting
-; CHECK-NEXT:   %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
-; CHECK-NEXT:   %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
-; CHECK-NEXT:   %7 = sub i64 %6, %5
-; CHECK-NEXT:   %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT:   %9 = add i64 %8, %7
-; CHECK-NEXT:   store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT:   %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT:   %11 = add i64 %10, %7
-; CHECK-NEXT:   store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT:   br label %return
-
-; Bumping up counter in g
-; CHECK:       polly.merge_new_and_old:                          ; preds = %polly.exiting, %return.region_exiting
-; CHECK-NEXT:   %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
-; CHECK-NEXT:   %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
-; CHECK-NEXT:   %7 = sub i64 %6, %5
-; CHECK-NEXT:   %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT:   %9 = add i64 %8, %7
-; CHECK-NEXT:   store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT:   %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT:   %11 = add i64 %10, %7
-; CHECK-NEXT:   store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT:   br label %return
-
-; Final reporting prints
-; CHECK:        %20 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT:   %21 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @25, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @23, i32 0, i32 0), i64 %20, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @24, i32 0, i32 0))
-; CHECK-NEXT:   %22 = call i32 @fflush(i8* null)
-; CHECK-NEXT:   %23 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT:   %24 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @33, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @28, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @29, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @30, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @31, i32 0, i32 0), i64 %23, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @32, i32 0, i32 0))

Added: polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll?rev=304543&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll (added)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll Fri Jun  2 06:36:52 2017
@@ -0,0 +1,75 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN:   -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+;   long i;
+;   if (true)
+;     for (i = 0; i < N; ++i)
+;       A[i] = i;
+; }
+; void g(long A[], long N) {
+;   long i;
+;   if (true)
+;     for (i = 0; i < N; ++i)
+;       A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+  fence seq_cst
+  br label %next
+
+next:
+  br i1 true, label %for.i, label %return
+
+for.i:
+  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+  %scevgep = getelementptr i64, i64* %A, i64 %indvar
+  store i64 %indvar, i64* %scevgep
+  %indvar.next = add nsw i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %N
+  br i1 %exitcond, label %return, label %for.i
+
+return:
+  fence seq_cst
+  ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+  fence seq_cst
+  br label %next
+
+next:
+  br i1 true, label %for.i, label %return
+
+for.i:
+  %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+  %scevgep = getelementptr i64, i64* %A, i64 %indvar
+  store i64 %indvar, i64* %scevgep
+  %indvar.next = add nsw i64 %indvar, 1
+  %exitcond = icmp eq i64 %indvar.next, %N
+  br i1 %exitcond, label %return, label %for.i
+
+return:
+  fence seq_cst
+  ret void
+}
+
+; Declaration of globals - Check for cycles declaration.
+; CHECK: @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
+; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up number of cycles in f
+; CHECK:        %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
+; CHECK-NEXT:   %13 = add i64 %12, 1
+; CHECK-NEXT:   store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
+
+; Bumping up number of cycles in g
+; CHECK:       %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
+; CHECK-NEXT:  %13 = add i64 %12, 1
+; CHECK-NEXT:  store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"




More information about the llvm-commits mailing list