[clang] [clang][CUDA] Add 'noconvergent' function and statement attribute (PR #100637)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Mon Jul 29 09:35:18 PDT 2024
================
@@ -10,36 +11,74 @@
#include "Inputs/cuda.h"
-// DEVICE: Function Attrs:
-// DEVICE-SAME: convergent
-// DEVICE-NEXT: define{{.*}} void @_Z3foov
+// DEVICE-LABEL: define dso_local void @_Z3foov(
+// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: ret void
+//
__device__ void foo() {}
+// DEVICE-LABEL: define dso_local void @_Z3baxv(
+// DEVICE-SAME: ) #[[ATTR1:[0-9]+]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: ret void
+//
+[[clang::noconvergent]] __device__ void bax() {}
-// HOST: Function Attrs:
-// HOST-NOT: convergent
-// HOST-NEXT: define{{.*}} void @_Z3barv
-// DEVICE: Function Attrs:
-// DEVICE-SAME: convergent
-// DEVICE-NEXT: define{{.*}} void @_Z3barv
__host__ __device__ void baz();
+// DEVICE-LABEL: define dso_local void @_Z3barv(
+// DEVICE-SAME: ) #[[ATTR0]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[X:%.*]] = alloca i32, align 4
+// DEVICE-NEXT: call void @_Z3bazv() #[[ATTR3:[0-9]+]]
+// DEVICE-NEXT: [[TMP0:%.*]] = call i32 asm "trap
+// DEVICE-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
+// DEVICE-NEXT: call void asm sideeffect "trap", ""() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
+// DEVICE-NEXT: call void asm sideeffect "nop", ""() #[[ATTR5:[0-9]+]], !srcloc [[META5:![0-9]+]]
+// DEVICE-NEXT: ret void
+//
+// HOST-LABEL: define dso_local void @_Z3barv(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[X:%.*]] = alloca i32, align 4
+// HOST-NEXT: call void @_Z3bazv()
+// HOST-NEXT: [[TMP0:%.*]] = call i32 asm "trap
+// HOST-NEXT: store i32 [[TMP0]], ptr [[X]], align 4
+// HOST-NEXT: call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META3:![0-9]+]]
+// HOST-NEXT: call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
+// HOST-NEXT: ret void
+//
__host__ __device__ void bar() {
- // DEVICE: call void @_Z3bazv() [[CALL_ATTR:#[0-9]+]]
baz();
- // DEVICE: call i32 asm "trap;", "=l"() [[ASM_ATTR:#[0-9]+]]
int x;
asm ("trap;" : "=l"(x));
- // DEVICE: call void asm sideeffect "trap;", ""() [[ASM_ATTR:#[0-9]+]]
- asm volatile ("trap;");
+ asm volatile ("trap");
+ [[clang::noconvergent]] { asm volatile ("nop"); }
}
----------------
arsenm wrote:
can you also test using it on an asm function declaration
https://github.com/llvm/llvm-project/pull/100637
More information about the cfe-commits
mailing list