[PATCH] D85276: [PGO][CUDA][HIP] Skip generating profile on the device stub and wrong-side functions.
Michael Liao via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 6 10:35:19 PDT 2020
hliao updated this revision to Diff 283654.
hliao added a comment.
Revise the comment.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D85276/new/
https://reviews.llvm.org/D85276
Files:
clang/lib/CodeGen/CodeGenPGO.cpp
clang/test/CodeGenCUDA/profile-coverage-mapping.cu
Index: clang/test/CodeGenCUDA/profile-coverage-mapping.cu
===================================================================
--- /dev/null
+++ clang/test/CodeGenCUDA/profile-coverage-mapping.cu
@@ -0,0 +1,20 @@
+// RUN: echo "GPU binary would be here" > %t
+// RUN: %clang_cc1 -fprofile-instrument=clang -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=PGOGEN %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm -o - %s | FileCheck --check-prefix=COVMAP %s
+// RUN: %clang_cc1 -fprofile-instrument=clang -fcoverage-mapping -dump-coverage-mapping -triple x86_64-linux-gnu -target-sdk-version=8.0 -fcuda-include-gpubinary %t -emit-llvm-only -o - %s | FileCheck --check-prefix=MAPPING %s
+
+#include "Inputs/cuda.h"
+
+// PGOGEN-NOT: @__profn_{{.*kernel.*}} =
+// COVMAP-COUNT-2: section "__llvm_covfun", comdat
+// COVMAP-NOT: section "__llvm_covfun", comdat
+// MAPPING-NOT: {{.*dfn.*}}:
+// MAPPING-NOT: {{.*kernel.*}}:
+
+__device__ void dfn(int i) {}
+
+__global__ void kernel(int i) { dfn(i); }
+
+void host(void) {
+ kernel<<<1, 1>>>(1);
+}
Index: clang/lib/CodeGen/CodeGenPGO.cpp
===================================================================
--- clang/lib/CodeGen/CodeGenPGO.cpp
+++ clang/lib/CodeGen/CodeGenPGO.cpp
@@ -773,6 +773,11 @@
if (!D->hasBody())
return;
+ // Skip CUDA/HIP kernel launch stub functions.
+ if (CGM.getLangOpts().CUDA && !CGM.getLangOpts().CUDAIsDevice &&
+ D->hasAttr<CUDAGlobalAttr>())
+ return;
+
bool InstrumentRegions = CGM.getCodeGenOpts().hasProfileClangInstr();
llvm::IndexedInstrProfReader *PGOReader = CGM.getPGOReader();
if (!InstrumentRegions && !PGOReader)
@@ -831,6 +836,18 @@
if (!D->getBody())
return true;
+ // Skip host-only functions in the CUDA device compilation and device-only
+ // functions in the host compilation. Just roughly filter them out based on
+ // the function attributes. If there are effectively host-only or device-only
+ // ones, their coverage mapping may still be generated.
+ if (CGM.getLangOpts().CUDA &&
+ ((CGM.getLangOpts().CUDAIsDevice && !D->hasAttr<CUDADeviceAttr>() &&
+ !D->hasAttr<CUDAGlobalAttr>()) ||
+ (!CGM.getLangOpts().CUDAIsDevice &&
+ (D->hasAttr<CUDAGlobalAttr>() ||
+ (!D->hasAttr<CUDAHostAttr>() && D->hasAttr<CUDADeviceAttr>())))))
+ return true;
+
// Don't map the functions in system headers.
const auto &SM = CGM.getContext().getSourceManager();
auto Loc = D->getBody()->getBeginLoc();
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D85276.283654.patch
Type: text/x-patch
Size: 2658 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20200806/51d1657a/attachment.bin>
More information about the cfe-commits
mailing list