[polly] r304543 - [CodeGen] Track trip counts per-scop for performance measurement.
Siddharth Bhat via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 2 04:36:53 PDT 2017
Author: bollu
Date: Fri Jun 2 06:36:52 2017
New Revision: 304543
URL: http://llvm.org/viewvc/llvm-project?rev=304543&view=rev
Log:
[CodeGen] Track trip counts per-scop for performance measurement.
- Add a counter that is incremented once on exit from a scop.
- Test cases got split into two: one to test the cycles, and another one
to test trip counts.
- Sample output:
```name=sample-output.txt
scop function, entry block name, exit block name, total time, trip count
warmup, %entry.split, %polly.merge_new_and_old, 5180, 1
f, %entry.split, %polly.merge_new_and_old, 409944, 500
g, %entry.split, %polly.merge_new_and_old, 1226, 1
```
Differential Revision: https://reviews.llvm.org/D33822
Added:
polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll
Removed:
polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll
Modified:
polly/trunk/include/polly/CodeGen/PerfMonitor.h
polly/trunk/lib/CodeGen/PerfMonitor.cpp
Modified: polly/trunk/include/polly/CodeGen/PerfMonitor.h
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/include/polly/CodeGen/PerfMonitor.h?rev=304543&r1=304542&r2=304543&view=diff
==============================================================================
--- polly/trunk/include/polly/CodeGen/PerfMonitor.h (original)
+++ polly/trunk/include/polly/CodeGen/PerfMonitor.h Fri Jun 2 06:36:52 2017
@@ -62,6 +62,9 @@ private:
/// The total number of cycles spent in the current scop S.
llvm::Value *CyclesInCurrentScopPtr;
+ /// The total number of times the current scop S is executed.
+ llvm::Value *TripCountForCurrentScopPtr;
+
/// The total number of cycles spent within scops.
llvm::Value *CyclesInScopsPtr;
Modified: polly/trunk/lib/CodeGen/PerfMonitor.cpp
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/lib/CodeGen/PerfMonitor.cpp?rev=304543&r1=304542&r2=304543&view=diff
==============================================================================
--- polly/trunk/lib/CodeGen/PerfMonitor.cpp (original)
+++ polly/trunk/lib/CodeGen/PerfMonitor.cpp Fri Jun 2 06:36:52 2017
@@ -87,15 +87,18 @@ static std::string GetScopUniqueVarname(
std::string EntryString, ExitString;
std::tie(EntryString, ExitString) = S.getEntryExitStr();
- Name << "__polly_perf_cycles_in_" << std::string(S.getFunction().getName())
+ Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
<< "_from__" << EntryString << "__to__" << ExitString;
return Name.str();
}
void PerfMonitor::addScopCounter() {
const std::string varname = GetScopUniqueVarname(S);
- TryRegisterGlobal(M, varname.c_str(), Builder.getInt64(0),
+ TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
&CyclesInCurrentScopPtr);
+
+ TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
+ &TripCountForCurrentScopPtr);
}
void PerfMonitor::addGlobalVariables() {
@@ -160,7 +163,7 @@ Function *PerfMonitor::insertFinalReport
RuntimeDebugBuilder::createCPUPrinter(
Builder, "scop function, "
- "entry block name, exit block name, total time\n");
+ "entry block name, exit block name, total time, trip count\n");
ReturnFromFinal = Builder.CreateRetVoid();
return ExitFn;
}
@@ -179,13 +182,17 @@ void PerfMonitor::AppendScopReporting()
Value *CyclesInCurrentScop =
Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
+
+ Value *TripCountForCurrentScop =
+ Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
+
std::string EntryName, ExitName;
std::tie(EntryName, ExitName) = S.getEntryExitStr();
// print in CSV for easy parsing with other tools.
- RuntimeDebugBuilder::createCPUPrinter(Builder, S.getFunction().getName(),
- ", ", EntryName, ", ", ExitName, ", ",
- CyclesInCurrentScop, "\n");
+ RuntimeDebugBuilder::createCPUPrinter(
+ Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
+ CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
ReturnFromFinal = Builder.CreateRetVoid();
}
@@ -288,4 +295,11 @@ void PerfMonitor::insertRegionEnd(Instru
Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
+
+ Value *TripCountForCurrentScop =
+ Builder.CreateLoad(TripCountForCurrentScopPtr, true);
+ TripCountForCurrentScop =
+ Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
+ Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
+ true);
}
Added: polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll?rev=304543&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll (added)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_cycles_per_scop.ll Fri Jun 2 06:36:52 2017
@@ -0,0 +1,75 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN: -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+; void g(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+; Declaration of globals - Check for cycles declaration.
+; @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
+; @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up number of cycles in f
+; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_cycles"
+
+; Bumping up number of cycles in g
+; CHECK: %10 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
+; CHECK-NEXT: %11 = add i64 %10, %7
+; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_cycles"
Removed: polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll?rev=304542&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll (original)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_per_scop.ll (removed)
@@ -1,98 +0,0 @@
-; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
-; RUN: -S < %s | FileCheck %s
-
-; void f(long A[], long N) {
-; long i;
-; if (true)
-; for (i = 0; i < N; ++i)
-; A[i] = i;
-; }
-; void g(long A[], long N) {
-; long i;
-; if (true)
-; for (i = 0; i < N; ++i)
-; A[i] = i;
-; }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-
-define void @f(i64* %A, i64 %N) nounwind {
-entry:
- fence seq_cst
- br label %next
-
-next:
- br i1 true, label %for.i, label %return
-
-for.i:
- %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
- %scevgep = getelementptr i64, i64* %A, i64 %indvar
- store i64 %indvar, i64* %scevgep
- %indvar.next = add nsw i64 %indvar, 1
- %exitcond = icmp eq i64 %indvar.next, %N
- br i1 %exitcond, label %return, label %for.i
-
-return:
- fence seq_cst
- ret void
-}
-
-
-define void @g(i64* %A, i64 %N) nounwind {
-entry:
- fence seq_cst
- br label %next
-
-next:
- br i1 true, label %for.i, label %return
-
-for.i:
- %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
- %scevgep = getelementptr i64, i64* %A, i64 %indvar
- store i64 %indvar, i64* %scevgep
- %indvar.next = add nsw i64 %indvar, 1
- %exitcond = icmp eq i64 %indvar.next, %N
- br i1 %exitcond, label %return, label %for.i
-
-return:
- fence seq_cst
- ret void
-}
-
-; Declaration of globals
-; CHECK: @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
-; CHECK: @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old" = weak thread_local(initialexec) constant i64 0
-
-; Bumping up counter in f
-; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
-; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
-; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
-; CHECK-NEXT: %7 = sub i64 %6, %5
-; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %9 = add i64 %8, %7
-; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %11 = add i64 %10, %7
-; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: br label %return
-
-; Bumping up counter in g
-; CHECK: polly.merge_new_and_old: ; preds = %polly.exiting, %return.region_exiting
-; CHECK-NEXT: %5 = load volatile i64, i64* @__polly_perf_cycles_in_scop_start
-; CHECK-NEXT: %6 = call i64 @llvm.x86.rdtscp(i8* bitcast (i32* @__polly_perf_write_loation to i8*))
-; CHECK-NEXT: %7 = sub i64 %6, %5
-; CHECK-NEXT: %8 = load volatile i64, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %9 = add i64 %8, %7
-; CHECK-NEXT: store volatile i64 %9, i64* @__polly_perf_cycles_in_scops
-; CHECK-NEXT: %10 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %11 = add i64 %10, %7
-; CHECK-NEXT: store volatile i64 %11, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: br label %return
-
-; Final reporting prints
-; CHECK: %20 = load volatile i64, i64* @"__polly_perf_cycles_in_f_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %21 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @25, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @18, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @19, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @20, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @21, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @22, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @23, i32 0, i32 0), i64 %20, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @24, i32 0, i32 0))
-; CHECK-NEXT: %22 = call i32 @fflush(i8* null)
-; CHECK-NEXT: %23 = load volatile i64, i64* @"__polly_perf_cycles_in_g_from__%next__to__%polly.merge_new_and_old"
-; CHECK-NEXT: %24 = call i32 (...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @33, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @26, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @27, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([6 x i8], [6 x i8] addrspace(4)* @28, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @29, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([25 x i8], [25 x i8] addrspace(4)* @30, i32 0, i32 0), i8 addrspace(4)* getelementptr inbounds ([3 x i8], [3 x i8] addrspace(4)* @31, i32 0, i32 0), i64 %23, i8 addrspace(4)* getelementptr inbounds ([2 x i8], [2 x i8] addrspace(4)* @32, i32 0, i32 0))
Added: polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll
URL: http://llvm.org/viewvc/llvm-project/polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll?rev=304543&view=auto
==============================================================================
--- polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll (added)
+++ polly/trunk/test/Isl/CodeGen/perf_monitoring_trip_counts_per_scop.ll Fri Jun 2 06:36:52 2017
@@ -0,0 +1,75 @@
+; RUN: opt %loadPolly -polly-codegen -polly-codegen-perf-monitoring \
+; RUN: -S < %s | FileCheck %s
+
+; void f(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+; void g(long A[], long N) {
+; long i;
+; if (true)
+; for (i = 0; i < N; ++i)
+; A[i] = i;
+; }
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+
+define void @g(i64* %A, i64 %N) nounwind {
+entry:
+ fence seq_cst
+ br label %next
+
+next:
+ br i1 true, label %for.i, label %return
+
+for.i:
+ %indvar = phi i64 [ 0, %next], [ %indvar.next, %for.i ]
+ %scevgep = getelementptr i64, i64* %A, i64 %indvar
+ store i64 %indvar, i64* %scevgep
+ %indvar.next = add nsw i64 %indvar, 1
+ %exitcond = icmp eq i64 %indvar.next, %N
+ br i1 %exitcond, label %return, label %for.i
+
+return:
+ fence seq_cst
+ ret void
+}
+
+; Declaration of globals - Check for cycles declaration.
+; CHECK: @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
+; CHECK: @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count" = weak thread_local(initialexec) constant i64 0
+
+; Bumping up number of cycles in f
+; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
+; CHECK-NEXT: %13 = add i64 %12, 1
+; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_f_from__%next__to__%polly.merge_new_and_old_trip_count"
+
+; Bumping up number of cycles in g
+; CHECK: %12 = load volatile i64, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
+; CHECK-NEXT: %13 = add i64 %12, 1
+; CHECK-NEXT: store volatile i64 %13, i64* @"__polly_perf_in_g_from__%next__to__%polly.merge_new_and_old_trip_count"
More information about the llvm-commits
mailing list