[clang] [clang][CUDA] Add 'noconvergent' function and statement attribute (PR #100637)

Matt Arsenault via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 29 09:35:18 PDT 2024


================
@@ -10,36 +11,74 @@
 
 #include "Inputs/cuda.h"
 
-// DEVICE: Function Attrs:
-// DEVICE-SAME: convergent
-// DEVICE-NEXT: define{{.*}} void @_Z3foov
+// DEVICE-LABEL: define dso_local void @_Z3foov(
+// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT:  [[ENTRY:.*:]]
+// DEVICE-NEXT:    ret void
+//
 __device__ void foo() {}
+// DEVICE-LABEL: define dso_local void @_Z3baxv(
+// DEVICE-SAME: ) #[[ATTR1:[0-9]+]] {
+// DEVICE-NEXT:  [[ENTRY:.*:]]
+// DEVICE-NEXT:    ret void
+//
+[[clang::noconvergent]] __device__ void bax() {}
 
-// HOST: Function Attrs:
-// HOST-NOT: convergent
-// HOST-NEXT: define{{.*}} void @_Z3barv
-// DEVICE: Function Attrs:
-// DEVICE-SAME: convergent
-// DEVICE-NEXT: define{{.*}} void @_Z3barv
 __host__ __device__ void baz();
+// DEVICE-LABEL: define dso_local void @_Z3barv(
+// DEVICE-SAME: ) #[[ATTR0]] {
+// DEVICE-NEXT:  [[ENTRY:.*:]]
+// DEVICE-NEXT:    [[X:%.*]] = alloca i32, align 4
+// DEVICE-NEXT:    call void @_Z3bazv() #[[ATTR3:[0-9]+]]
+// DEVICE-NEXT:    [[TMP0:%.*]] = call i32 asm "trap
+// DEVICE-NEXT:    store i32 [[TMP0]], ptr [[X]], align 4
+// DEVICE-NEXT:    call void asm sideeffect "trap", ""() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
+// DEVICE-NEXT:    call void asm sideeffect "nop", ""() #[[ATTR5:[0-9]+]], !srcloc [[META5:![0-9]+]]
+// DEVICE-NEXT:    ret void
+//
+// HOST-LABEL: define dso_local void @_Z3barv(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT:  [[ENTRY:.*:]]
+// HOST-NEXT:    [[X:%.*]] = alloca i32, align 4
+// HOST-NEXT:    call void @_Z3bazv()
+// HOST-NEXT:    [[TMP0:%.*]] = call i32 asm "trap
+// HOST-NEXT:    store i32 [[TMP0]], ptr [[X]], align 4
+// HOST-NEXT:    call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META3:![0-9]+]]
+// HOST-NEXT:    call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
+// HOST-NEXT:    ret void
+//
 __host__ __device__ void bar() {
-  // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
   baz();
-  // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
   int x;
   asm ("trap;" : "=l"(x));
-  // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
-  asm volatile ("trap;");
+  asm volatile ("trap");
+  [[clang::noconvergent]] { asm volatile ("nop"); }
 }
 
----------------
arsenm wrote:

can you also test using it on an asm function declaration 

https://github.com/llvm/llvm-project/pull/100637


More information about the cfe-commits mailing list