r315094 - OpenCL: Assume functions are convergent
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 6 12:34:41 PDT 2017
Author: arsenm
Date: Fri Oct 6 12:34:40 2017
New Revision: 315094
URL: http://llvm.org/viewvc/llvm-project?rev=315094&view=rev
Log:
OpenCL: Assume functions are convergent
This was done for CUDA functions in r261779, and for the same
reason this also needs to be done for OpenCL. An arbitrary
function could have a barrier() call in it, which in turn
requires the calling function to be convergent.
Modified:
cfe/trunk/include/clang/Basic/LangOptions.h
cfe/trunk/lib/CodeGen/CGCall.cpp
cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
cfe/trunk/test/CodeGenOpenCL/convergent.cl
Modified: cfe/trunk/include/clang/Basic/LangOptions.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/LangOptions.h?rev=315094&r1=315093&r2=315094&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/LangOptions.h (original)
+++ cfe/trunk/include/clang/Basic/LangOptions.h Fri Oct 6 12:34:40 2017
@@ -197,6 +197,10 @@ public:
bool allowsNonTrivialObjCLifetimeQualifiers() const {
return ObjCAutoRefCount || ObjCWeak;
}
+
+ bool assumeFunctionsAreConvergent() const {
+ return (CUDA && CUDAIsDevice) || OpenCL;
+ }
};
/// \brief Floating point control options
Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=315094&r1=315093&r2=315094&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Fri Oct 6 12:34:40 2017
@@ -1750,13 +1750,16 @@ void CodeGenModule::ConstructDefaultFnAt
FuncAttrs.addAttribute("backchain");
}
- if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
- // Conservatively, mark all functions and calls in CUDA as convergent
- // (meaning, they may call an intrinsically convergent op, such as
- // __syncthreads(), and so can't have certain optimizations applied around
- // them). LLVM will remove this attribute where it safely can.
+ if (getLangOpts().assumeFunctionsAreConvergent()) {
+ // Conservatively, mark all functions and calls in CUDA and OpenCL as
+ // convergent (meaning, they may call an intrinsically convergent op, such
+ // as __syncthreads() / barrier(), and so can't have certain optimizations
+ // applied around them). LLVM will remove this attribute where it safely
+ // can.
FuncAttrs.addAttribute(llvm::Attribute::Convergent);
+ }
+ if (getLangOpts().CUDA && getLangOpts().CUDAIsDevice) {
// Exceptions aren't supported in CUDA device code.
FuncAttrs.addAttribute(llvm::Attribute::NoUnwind);
Modified: cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl?rev=315094&r1=315093&r2=315094&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Fri Oct 6 12:34:40 2017
@@ -151,28 +151,28 @@ kernel void reqd_work_group_size_32_2_1_
// CHECK-NOT: "amdgpu-num-sgpr"="0"
// CHECK-NOT: "amdgpu-num-vgpr"="0"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_64_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="64,64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_16_128]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="16,128"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
-// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { convergent noinline nounwind optnone "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
Modified: cfe/trunk/test/CodeGenOpenCL/convergent.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/convergent.cl?rev=315094&r1=315093&r2=315094&view=diff
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/convergent.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/convergent.cl Fri Oct 6 12:34:40 2017
@@ -1,9 +1,19 @@
-// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck %s
+// RUN: %clang_cc1 -triple spir-unknown-unknown -emit-llvm %s -o - | opt -instnamer -S | FileCheck -enable-var-scope %s
+
+// This is initially assumed convergent, but can be deduced to not require it.
+
+// CHECK-LABEL: define spir_func void @non_convfun() local_unnamed_addr #0
+// CHECK: ret void
+__attribute__((noinline))
+void non_convfun(void) {
+ volatile int* p;
+ *p = 0;
+}
void convfun(void) __attribute__((convergent));
-void non_convfun(void);
void nodupfun(void) __attribute__((noduplicate));
+// External functions should be assumed convergent.
void f(void);
void g(void);
@@ -17,19 +27,23 @@ void g(void);
// non_convfun();
// }
//
-// CHECK: define spir_func void @test_merge_if(i32 %[[a:.+]])
-// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK-LABEL: define spir_func void @test_merge_if(i32 %a) local_unnamed_addr #1 {
+// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
// CHECK: br i1 %[[tobool]], label %[[if_end3_critedge:.+]], label %[[if_then:.+]]
+
// CHECK: [[if_then]]:
// CHECK: tail call spir_func void @f()
// CHECK: tail call spir_func void @non_convfun()
// CHECK: tail call spir_func void @g()
+
// CHECK: br label %[[if_end3:.+]]
+
// CHECK: [[if_end3_critedge]]:
// CHECK: tail call spir_func void @non_convfun()
// CHECK: br label %[[if_end3]]
+
// CHECK: [[if_end3]]:
-// CHECK-LABEL: ret void
+// CHECK: ret void
void test_merge_if(int a) {
if (a) {
@@ -41,13 +55,13 @@ void test_merge_if(int a) {
}
}
-// CHECK-DAG: declare spir_func void @f()
-// CHECK-DAG: declare spir_func void @non_convfun()
-// CHECK-DAG: declare spir_func void @g()
+// CHECK-DAG: declare spir_func void @f() local_unnamed_addr #2
+// CHECK-DAG: declare spir_func void @g() local_unnamed_addr #2
+
// Test two if's are not merged.
-// CHECK: define spir_func void @test_no_merge_if(i32 %[[a:.+]])
-// CHECK: %[[tobool:.+]] = icmp eq i32 %[[a]], 0
+// CHECK-LABEL: define spir_func void @test_no_merge_if(i32 %a) local_unnamed_addr #1
+// CHECK: %[[tobool:.+]] = icmp eq i32 %a, 0
// CHECK: br i1 %[[tobool]], label %[[if_end:.+]], label %[[if_then:.+]]
// CHECK: [[if_then]]:
// CHECK: tail call spir_func void @f()
@@ -56,7 +70,7 @@ void test_merge_if(int a) {
// CHECK: br label %[[if_end]]
// CHECK: [[if_end]]:
// CHECK: %[[tobool_pr:.+]] = phi i1 [ true, %[[if_then]] ], [ false, %{{.+}} ]
-// CHECK: tail call spir_func void @convfun() #[[attr5:.+]]
+// CHECK: tail call spir_func void @convfun() #[[attr4:.+]]
// CHECK: br i1 %[[tobool_pr]], label %[[if_then2:.+]], label %[[if_end3:.+]]
// CHECK: [[if_then2]]:
// CHECK: tail call spir_func void @g()
@@ -74,20 +88,20 @@ void test_no_merge_if(int a) {
}
}
-// CHECK: declare spir_func void @convfun(){{[^#]*}} #[[attr2:[0-9]+]]
+// CHECK: declare spir_func void @convfun(){{[^#]*}} #2
// Test loop is unrolled for convergent function.
-// CHECK-LABEL: define spir_func void @test_unroll()
-// CHECK: tail call spir_func void @convfun() #[[attr5:[0-9]+]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
-// CHECK: tail call spir_func void @convfun() #[[attr5]]
+// CHECK-LABEL: define spir_func void @test_unroll() local_unnamed_addr #1
+// CHECK: tail call spir_func void @convfun() #[[attr4:[0-9]+]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
+// CHECK: tail call spir_func void @convfun() #[[attr4]]
// CHECK-LABEL: ret void
void test_unroll() {
@@ -101,7 +115,7 @@ void test_unroll() {
// CHECK: [[for_cond_cleanup:.+]]:
// CHECK: ret void
// CHECK: [[for_body]]:
-// CHECK: tail call spir_func void @nodupfun() #[[attr6:[0-9]+]]
+// CHECK: tail call spir_func void @nodupfun() #[[attr5:[0-9]+]]
// CHECK-NOT: call spir_func void @nodupfun()
// CHECK: br i1 %{{.+}}, label %[[for_body]], label %[[for_cond_cleanup]]
@@ -112,7 +126,9 @@ void test_not_unroll() {
// CHECK: declare spir_func void @nodupfun(){{[^#]*}} #[[attr3:[0-9]+]]
-// CHECK-DAG: attributes #[[attr2]] = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr3]] = { {{[^}]*}}noduplicate{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr5]] = { {{[^}]*}}convergent{{[^}]*}} }
-// CHECK-DAG: attributes #[[attr6]] = { {{[^}]*}}noduplicate{{[^}]*}} }
+// CHECK: attributes #0 = { noinline norecurse nounwind "
+// CHECK: attributes #1 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #2 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #3 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
+// CHECK: attributes #4 = { {{[^}]*}}convergent{{[^}]*}} }
+// CHECK: attributes #5 = { {{[^}]*}}convergent noduplicate{{[^}]*}} }
More information about the cfe-commits
mailing list