r271336 - [CUDA] Conservatively mark inline asm as convergent.
Justin Lebar via cfe-commits
cfe-commits at lists.llvm.org
Tue May 31 14:27:13 PDT 2016
Author: jlebar
Date: Tue May 31 16:27:13 2016
New Revision: 271336
URL: http://llvm.org/viewvc/llvm-project?rev=271336&view=rev
Log:
[CUDA] Conservatively mark inline asm as convergent.
Summary:
This is particularly important because a some convergent CUDA intrinsics
(e.g. __shfl_down) are implemented in terms of inline asm.
Reviewers: tra
Subscribers: cfe-commits
Differential Revision: http://reviews.llvm.org/D20836
Modified:
cfe/trunk/lib/CodeGen/CGStmt.cpp
cfe/trunk/test/CodeGenCUDA/convergent.cu
Modified: cfe/trunk/lib/CodeGen/CGStmt.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGStmt.cpp?rev=271336&r1=271335&r2=271336&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGStmt.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGStmt.cpp Tue May 31 16:27:13 2016
@@ -2054,6 +2054,14 @@ void CodeGenFunction::EmitAsmStmt(const
llvm::ConstantAsMetadata::get(Loc)));
}
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
+ // Conservatively, mark all inline asm blocks in CUDA as convergent
+ // (meaning, they may call an intrinsically convergent op, such as bar.sync,
+ // and so can't have certain optimizations applied around them).
+ Result->addAttribute(llvm::AttributeSet::FunctionIndex,
+ llvm::Attribute::Convergent);
+ }
+
// Extract all of the register value results from the asm.
std::vector<llvm::Value*> RegResults;
if (ResultRegTypes.size() == 1) {
Modified: cfe/trunk/test/CodeGenCUDA/convergent.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenCUDA/convergent.cu?rev=271336&r1=271335&r2=271336&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenCUDA/convergent.cu (original)
+++ cfe/trunk/test/CodeGenCUDA/convergent.cu Tue May 31 16:27:13 2016
@@ -25,6 +25,11 @@ __host__ __device__ void baz();
__host__ __device__ void bar() {
// DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
baz();
+ // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
+ int x;
+ asm ("trap;" : "=l"(x));
+ // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
+ asm volatile ("trap;");
}
// DEVICE: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
@@ -32,6 +37,7 @@ __host__ __device__ void bar() {
// DEVICE-SAME: convergent
// DEVICE-SAME: }
// DEVICE: attributes [[CALL_ATTR]] = { convergent }
+// DEVICE: attributes [[ASM_ATTR]] = { convergent
// HOST: declare void @_Z3bazv() [[BAZ_ATTR:#[0-9]+]]
// HOST: attributes [[BAZ_ATTR]] = {
More information about the cfe-commits
mailing list