[clang] [clang][CUDA] Add 'noconvergent' function and statement attribute (PR #100637)

via cfe-commits cfe-commits at lists.llvm.org
Mon Jul 29 10:20:02 PDT 2024


https://github.com/darkbuck updated https://github.com/llvm/llvm-project/pull/100637

>From d9de73264bf4d555e7e09a2c2687eae72c1fa19e Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 15:19:15 -0400
Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 clang/include/clang/Basic/Attr.td   |  3 ++-
 clang/lib/CodeGen/CGCall.cpp        |  5 +++++
 clang/lib/CodeGen/CGStmt.cpp        | 33 ++++++++++++++++++++---------
 clang/lib/CodeGen/CodeGenFunction.h |  3 +++
 clang/lib/Sema/SemaStmtAttr.cpp     | 16 ++++++++++++++
 clang/test/SemaOpenCL/convergent.cl |  4 ++--
 6 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index 4825979a974d2..c3bcaa5d5f235 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -2042,7 +2042,8 @@ def NoDuplicate : InheritableAttr {
 
 def Convergent : InheritableAttr {
   let Spellings = [Clang<"convergent">];
-  let Subjects = SubjectList<[Function]>;
+  let Subjects = SubjectList<[Function, Stmt], WarnDiag,
+                             "functions and statements">;
   let Documentation = [ConvergentDocs];
   let SimpleHandler = 1;
 }
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 2f3dd5d01fa6c..d73feb4382acd 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5636,6 +5636,11 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
     Attrs =
         Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::AlwaysInline);
 
+  // Add call-site convergent attribute if exists.
+  if (InConvergentAttributedStmt)
+    Attrs =
+        Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent);
+
   // Apply some call-site-specific attributes.
   // TODO: work this into building the attribute set.
 
diff --git a/clang/lib/CodeGen/CGStmt.cpp b/clang/lib/CodeGen/CGStmt.cpp
index aa97f685ac7a9..99559dfe075fb 100644
--- a/clang/lib/CodeGen/CGStmt.cpp
+++ b/clang/lib/CodeGen/CGStmt.cpp
@@ -723,6 +723,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
   bool nomerge = false;
   bool noinline = false;
   bool alwaysinline = false;
+  bool convergent = false;
   const CallExpr *musttail = nullptr;
 
   for (const auto *A : S.getAttrs()) {
@@ -738,6 +739,9 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
     case attr::AlwaysInline:
       alwaysinline = true;
       break;
+    case attr::Convergent:
+      convergent = true;
+      break;
     case attr::MustTail: {
       const Stmt *Sub = S.getSubStmt();
       const ReturnStmt *R = cast<ReturnStmt>(Sub);
@@ -756,6 +760,7 @@ void CodeGenFunction::EmitAttributedStmt(const AttributedStmt &S) {
   SaveAndRestore save_nomerge(InNoMergeAttributedStmt, nomerge);
   SaveAndRestore save_noinline(InNoInlineAttributedStmt, noinline);
   SaveAndRestore save_alwaysinline(InAlwaysInlineAttributedStmt, alwaysinline);
+  SaveAndRestore save_convergent(InConvergentAttributedStmt, convergent);
   SaveAndRestore save_musttail(MustTailCall, musttail);
   EmitStmt(S.getSubStmt(), S.getAttrs());
 }
@@ -2465,7 +2470,8 @@ static llvm::MDNode *getAsmSrcLocInfo(const StringLiteral *Str,
 
 static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
                               bool HasUnwindClobber, bool ReadOnly,
-                              bool ReadNone, bool NoMerge, const AsmStmt &S,
+                              bool ReadNone, bool NoMerge, bool Convergent,
+                              const AsmStmt &S,
                               const std::vector<llvm::Type *> &ResultRegTypes,
                               const std::vector<llvm::Type *> &ArgElemTypes,
                               CodeGenFunction &CGF,
@@ -2475,6 +2481,10 @@ static void UpdateAsmCallInst(llvm::CallBase &Result, bool HasSideEffect,
 
   if (NoMerge)
     Result.addFnAttr(llvm::Attribute::NoMerge);
+
+  if (Convergent)
+    Result.addFnAttr(llvm::Attribute::Convergent);
+
   // Attach readnone and readonly attributes.
   if (!HasSideEffect) {
     if (ReadNone)
@@ -3037,9 +3047,10 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
   if (IsGCCAsmGoto) {
     CBR = Builder.CreateCallBr(IA, Fallthrough, Transfer, Args);
     EmitBlock(Fallthrough);
-    UpdateAsmCallInst(*CBR, HasSideEffect, false, ReadOnly, ReadNone,
-                      InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
-                      *this, RegResults);
+    UpdateAsmCallInst(*CBR, HasSideEffect, /*HasUnwindClobber=*/false, ReadOnly,
+                      ReadNone, InNoMergeAttributedStmt,
+                      InConvergentAttributedStmt, S, ResultRegTypes,
+                      ArgElemTypes, *this, RegResults);
     // Because we are emitting code top to bottom, we don't have enough
     // information at this point to know precisely whether we have a critical
     // edge. If we have outputs, split all indirect destinations.
@@ -3067,15 +3078,17 @@ void CodeGenFunction::EmitAsmStmt(const AsmStmt &S) {
     }
   } else if (HasUnwindClobber) {
     llvm::CallBase *Result = EmitCallOrInvoke(IA, Args, "");
-    UpdateAsmCallInst(*Result, HasSideEffect, true, ReadOnly, ReadNone,
-                      InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
-                      *this, RegResults);
+    UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/true,
+                      ReadOnly, ReadNone, InNoMergeAttributedStmt,
+                      InConvergentAttributedStmt, S, ResultRegTypes,
+                      ArgElemTypes, *this, RegResults);
   } else {
     llvm::CallInst *Result =
         Builder.CreateCall(IA, Args, getBundlesForFunclet(IA));
-    UpdateAsmCallInst(*Result, HasSideEffect, false, ReadOnly, ReadNone,
-                      InNoMergeAttributedStmt, S, ResultRegTypes, ArgElemTypes,
-                      *this, RegResults);
+    UpdateAsmCallInst(*Result, HasSideEffect, /*HasUnwindClobber=*/false,
+                      ReadOnly, ReadNone, InNoMergeAttributedStmt,
+                      InConvergentAttributedStmt, S, ResultRegTypes,
+                      ArgElemTypes, *this, RegResults);
   }
 
   EmitAsmStores(*this, S, RegResults, ResultRegTypes, ResultTruncRegTypes,
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 67e3019565cd0..329120b70fd49 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -612,6 +612,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   /// True if the current statement has always_inline attribute.
   bool InAlwaysInlineAttributedStmt = false;
 
+  /// True if the current statement has convergent attribute.
+  bool InConvergentAttributedStmt = false;
+
   // The CallExpr within the current statement that the musttail attribute
   // applies to.  nullptr if there is no 'musttail' on the current statement.
   const CallExpr *MustTailCall = nullptr;
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index 7f452d177c16f..ff743d9f9df20 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -230,6 +230,20 @@ static Attr *handleNoMergeAttr(Sema &S, Stmt *St, const ParsedAttr &A,
   return ::new (S.Context) NoMergeAttr(S.Context, A);
 }
 
+static Attr *handleConvergentAttr(Sema &S, Stmt *St, const ParsedAttr &A,
+                                  SourceRange Range) {
+  NoMergeAttr NMA(S.Context, A);
+  CallExprFinder CEF(S, St);
+
+  if (!CEF.foundCallExpr() && !CEF.foundAsmStmt()) {
+    S.Diag(St->getBeginLoc(), diag::warn_attribute_ignored_no_calls_in_stmt)
+        << A;
+    return nullptr;
+  }
+
+  return ::new (S.Context) ConvergentAttr(S.Context, A);
+}
+
 template <typename OtherAttr, int DiagIdx>
 static bool CheckStmtInlineAttr(Sema &SemaRef, const Stmt *OrigSt,
                                 const Stmt *CurSt,
@@ -672,6 +686,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A,
     return handleCodeAlignAttr(S, St, A);
   case ParsedAttr::AT_MSConstexpr:
     return handleMSConstexprAttr(S, St, A, Range);
+  case ParsedAttr::AT_Convergent:
+    return handleConvergentAttr(S, St, A, Range);
   default:
     // N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a
     // declaration attribute is not written on a statement, but this code is
diff --git a/clang/test/SemaOpenCL/convergent.cl b/clang/test/SemaOpenCL/convergent.cl
index 1b7fda41fc0c8..a00e65cea0176 100644
--- a/clang/test/SemaOpenCL/convergent.cl
+++ b/clang/test/SemaOpenCL/convergent.cl
@@ -4,9 +4,9 @@ void f1(void) __attribute__((convergent));
 
 void f2(void) __attribute__((convergent(1))); // expected-error {{'convergent' attribute takes no arguments}}
 
-void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions}}
+void f3(int a __attribute__((convergent))); // expected-warning {{'convergent' attribute only applies to functions and statements}}
 
 void f4(void) {
-  int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions}}
+  int var1 __attribute__((convergent)); // expected-warning {{'convergent' attribute only applies to functions and statements}}
 }
 

>From 72631243e58ce58daa2535675aa5a8893267e7a0 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 15:34:11 -0400
Subject: [PATCH 2/8] Fix clang-formatting

Created using spr 1.3.4
---
 clang/lib/CodeGen/CGCall.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index d73feb4382acd..d11fe5795c52a 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5638,8 +5638,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
 
   // Add call-site convergent attribute if exists.
   if (InConvergentAttributedStmt)
-    Attrs =
-        Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent);
+    Attrs = Attrs.addFnAttribute(getLLVMContext(), llvm::Attribute::Convergent);
 
   // Apply some call-site-specific attributes.
   // TODO: work this into building the attribute set.

>From ef8ebb9e00f6f281a07decd79021dbe48bf1e29f Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 18:54:31 -0400
Subject: [PATCH 3/8] cleanup

Created using spr 1.3.4
---
 clang/test/CodeGen/convergent.cpp | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp
index ae782710f04fb..1d9c07d26bf71 100644
--- a/clang/test/CodeGen/convergent.cpp
+++ b/clang/test/CodeGen/convergent.cpp
@@ -16,14 +16,12 @@ class B : public A {
 
 bool bar();
 [[clang::convergent]] void f(bool, bool);
-//[[clang::convergent]] void (*fptr)(void);
 
 void foo(int i, A *ap, B *bp) {
   [[clang::convergent]] bar();
   [[clang::convergent]] (i = 4, bar());
   [[clang::convergent]] (void)(bar());
   f(bar(), bar());
-  //fptr();
   [[clang::convergent]] [] { bar(); bar(); }(); // convergent only applies to the anonymous function call
   [[clang::convergent]] for (bar(); bar(); bar()) {}
   [[clang::convergent]] { asm("nop"); }
@@ -68,8 +66,6 @@ void something_else_again() {
 // CHECK: call noundef zeroext i1 @_Z3barv(){{$}}
 // CHECK: call noundef zeroext i1 @_Z3barv(){{$}}
 // CHECK: call void @_Z1fbb({{.*}}) #[[ATTR0]]
-// XXX: %[[FPTR:.*]] = load ptr, ptr @fptr
-// XXX-NEXT: call void %[[FPTR]]() #[[ATTR0]]
 // CHECK: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"{{.*}} #[[ATTR0]]
 // CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]]
 // CHECK-LABEL: for.cond:

>From fa48a1230bb0a32aba5ee9229db6ccdf15a6e51b Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 19:13:21 -0400
Subject: [PATCH 4/8] update doc

Created using spr 1.3.4
---
 clang/include/clang/Basic/AttrDocs.td | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 99738812c8157..cda583a69fa77 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -1357,10 +1357,11 @@ of the condition.
 def ConvergentDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-The ``convergent`` attribute can be placed on a function declaration. It is
-translated into the LLVM ``convergent`` attribute, which indicates that the call
-instructions of a function with this attribute cannot be made control-dependent
-on any additional values.
+The ``convergent`` attribute can be placed on a function declaration or a
+statement containing call expressions. It is translated into the LLVM
+``convergent`` attribute, which indicates that the call instructions of a
+function with this attribute cannot be made control-dependent on any additional
+values.
 
 In languages designed for SPMD/SIMT programming model, e.g. OpenCL or CUDA,
 the call instructions of a function with this attribute must be executed by
@@ -1379,6 +1380,10 @@ Sample usage:
   // Setting it as a C++11 attribute is also valid in a C++ program.
   // void convfunc(void) [[clang::convergent]];
 
+  int f() {
+    [[clang::convergent]] foo(arg);
+    // The call to 'foo' has attribute 'convergent'.
+  }
   }];
 }
 

>From e6b536822c0d81bbcde5c3dacb8db3b9108b0a7b Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 19:22:34 -0400
Subject: [PATCH 5/8] autogen codegen test checks

Created using spr 1.3.4
---
 clang/test/CodeGen/convergent.cpp | 130 +++++++++++++++++-------------
 1 file changed, 74 insertions(+), 56 deletions(-)

diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp
index 1d9c07d26bf71..867cb145312ed 100644
--- a/clang/test/CodeGen/convergent.cpp
+++ b/clang/test/CodeGen/convergent.cpp
@@ -1,3 +1,4 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
 // RUN: %clang_cc1 -emit-llvm %s -triple x86_64-unknown-linux-gnu -o - | FileCheck %s
 
 class A {
@@ -17,6 +18,74 @@ class B : public A {
 bool bar();
 [[clang::convergent]] void f(bool, bool);
 
+// CHECK-LABEL: define dso_local void @_Z3fooiP1AP1B(
+// CHECK-SAME: i32 noundef [[I:%.*]], ptr noundef [[AP:%.*]], ptr noundef [[BP:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[I_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[AP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[BP_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[REF_TMP:%.*]] = alloca [[CLASS_ANON:%.*]], align 1
+// CHECK-NEXT:    [[A:%.*]] = alloca [[CLASS_A:%.*]], align 8
+// CHECK-NEXT:    [[B:%.*]] = alloca [[CLASS_B:%.*]], align 8
+// CHECK-NEXT:    [[NEWA:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i32 [[I]], ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[AP]], ptr [[AP_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[BP]], ptr [[BP_ADDR]], align 8
+// CHECK-NEXT:    [[CALL:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6:[0-9]+]]
+// CHECK-NEXT:    store i32 4, ptr [[I_ADDR]], align 4
+// CHECK-NEXT:    [[CALL1:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]]
+// CHECK-NEXT:    [[CALL2:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]]
+// CHECK-NEXT:    [[CALL3:%.*]] = call noundef zeroext i1 @_Z3barv()
+// CHECK-NEXT:    [[CALL4:%.*]] = call noundef zeroext i1 @_Z3barv()
+// CHECK-NEXT:    call void @_Z1fbb(i1 noundef zeroext [[CALL3]], i1 noundef zeroext [[CALL4]]) #[[ATTR6]]
+// CHECK-NEXT:    call void @"_ZZ3fooiP1AP1BENK3$_0clEv"(ptr noundef nonnull align 1 dereferenceable(1) [[REF_TMP]]) #[[ATTR6]]
+// CHECK-NEXT:    [[CALL5:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]]
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[CALL6:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]]
+// CHECK-NEXT:    br i1 [[CALL6]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[CALL7:%.*]] = call noundef zeroext i1 @_Z3barv() #[[ATTR6]]
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
+// CHECK:       [[FOR_END]]:
+// CHECK-NEXT:    call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR7:[0-9]+]], !srcloc [[META4:![0-9]+]]
+// CHECK-NEXT:    [[CALL8:%.*]] = call noundef zeroext i1 @_Z3barv()
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[AP_ADDR]], align 8
+// CHECK-NEXT:    [[VTABLE:%.*]] = load ptr, ptr [[TMP0]], align 8
+// CHECK-NEXT:    [[VFN:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE]], i64 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[VFN]], align 8
+// CHECK-NEXT:    call void [[TMP1]](ptr noundef nonnull align 8 dereferenceable(8) [[TMP0]]) #[[ATTR6]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[BP_ADDR]], align 8
+// CHECK-NEXT:    [[VTABLE9:%.*]] = load ptr, ptr [[TMP2]], align 8
+// CHECK-NEXT:    [[VFN10:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE9]], i64 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[VFN10]], align 8
+// CHECK-NEXT:    call void [[TMP3]](ptr noundef nonnull align 8 dereferenceable(8) [[TMP2]])
+// CHECK-NEXT:    call void @_ZN1AC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR6]]
+// CHECK-NEXT:    call void @_ZN1A1fEv(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR6]]
+// CHECK-NEXT:    call void @_ZN1A1gEv(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR6]]
+// CHECK-NEXT:    call void @_ZN1A2f1Ev() #[[ATTR6]]
+// CHECK-NEXT:    call void @_ZN1BC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[B]])
+// CHECK-NEXT:    call void @_ZN1B1gEv(ptr noundef nonnull align 8 dereferenceable(8) [[B]])
+// CHECK-NEXT:    [[CALL11:%.*]] = call noalias noundef nonnull ptr @_Znwm(i64 noundef 8) #[[ATTR8:[0-9]+]]
+// CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[CALL11]], i8 0, i64 8, i1 false)
+// CHECK-NEXT:    call void @_ZN1BC1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[CALL11]])
+// CHECK-NEXT:    store ptr [[CALL11]], ptr [[NEWA]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[NEWA]], align 8
+// CHECK-NEXT:    [[ISNULL:%.*]] = icmp eq ptr [[TMP4]], null
+// CHECK-NEXT:    br i1 [[ISNULL]], label %[[DELETE_END:.*]], label %[[DELETE_NOTNULL:.*]]
+// CHECK:       [[DELETE_NOTNULL]]:
+// CHECK-NEXT:    [[VTABLE12:%.*]] = load ptr, ptr [[TMP4]], align 8
+// CHECK-NEXT:    [[VFN13:%.*]] = getelementptr inbounds ptr, ptr [[VTABLE12]], i64 1
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[VFN13]], align 8
+// CHECK-NEXT:    call void [[TMP5]](ptr noundef nonnull align 8 dereferenceable(8) [[TMP4]]) #[[ATTR7]]
+// CHECK-NEXT:    br label %[[DELETE_END]]
+// CHECK:       [[DELETE_END]]:
+// CHECK-NEXT:    call void @_ZN1BD1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[B]]) #[[ATTR9:[0-9]+]]
+// CHECK-NEXT:    call void @_ZN1AD1Ev(ptr noundef nonnull align 8 dereferenceable(8) [[A]]) #[[ATTR7]]
+// CHECK-NEXT:    ret void
+//
 void foo(int i, A *ap, B *bp) {
   [[clang::convergent]] bar();
   [[clang::convergent]] (i = 4, bar());
@@ -41,59 +110,8 @@ void foo(int i, A *ap, B *bp) {
   A *newA = new B();
   delete newA;
 }
-
-int g(int i);
-
-void something() {
-  g(1);
-}
-
-[[clang::convergent]] int g(int i);
-
-void something_else() {
-  g(1);
-}
-
-int g(int i) { return i; }
-
-void something_else_again() {
-  g(1);
-}
-
-// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0:[0-9]+]]
-// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]]
-// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]]
-// CHECK: call noundef zeroext i1 @_Z3barv(){{$}}
-// CHECK: call noundef zeroext i1 @_Z3barv(){{$}}
-// CHECK: call void @_Z1fbb({{.*}}) #[[ATTR0]]
-// CHECK: call void @"_ZZ3fooiP1AP1BENK3$_0clEv"{{.*}} #[[ATTR0]]
-// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]]
-// CHECK-LABEL: for.cond:
-// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]]
-// CHECK-LABEL: for.inc:
-// CHECK: call noundef zeroext i1 @_Z3barv() #[[ATTR0]]
-// CHECK: call void asm sideeffect "nop"{{.*}} #[[ATTR1:[0-9]+]]
-// CHECK: call noundef zeroext i1 @_Z3barv(){{$}}
-// CHECK: load ptr, ptr
-// CHECK: load ptr, ptr
-// CHECK: %[[AG:.*]] = load ptr, ptr
-// CHECK-NEXT: call void %[[AG]](ptr {{.*}}) #[[ATTR0]]
-// CHECK: load ptr, ptr
-// CHECK: load ptr, ptr
-// CHECK: %[[BG:.*]] = load ptr, ptr
-// CHECK-NEXT: call void %[[BG]](ptr noundef{{.*}}
-// CHECK: call void @_ZN1AC1Ev({{.*}}) #[[ATTR0]]
-// CHECK: call void @_ZN1A1fEv({{.*}}) #[[ATTR0]]
-// CHECK: call void @_ZN1A1gEv({{.*}}) #[[ATTR0]]
-// CHECK: call void @_ZN1A2f1Ev() #[[ATTR0]]
-// CHECK: call void @_ZN1BC1Ev({{.*}}){{$}}
-// CHECK: call void @_ZN1B1gEv({{.*}}){{$}}
-// CHECK: call void @_ZN1BC1Ev({{.*}}){{$}}
-// CHECK: load ptr, ptr
-// CHECK: load ptr, ptr
-// CHECK: %[[AG:.*]] = load ptr, ptr
-// CHECK-NEXT: call void %[[AG]](ptr {{.*}}) #[[ATTR1]]
-// CHECK: call void  @_ZN1AD1Ev(ptr {{.*}}) #[[ATTR1]]
-
-// CHECK-DAG: attributes #[[ATTR0]] = {{{.*}}convergent{{.*}}}
-// CHECK-DAG: attributes #[[ATTR1]] = {{{.*}}convergent{{.*}}}
+//.
+// CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
+// CHECK: [[META3]] = !{!"llvm.loop.mustprogress"}
+// CHECK: [[META4]] = !{i64 5689}
+//.

>From 9bee8623e62edb3e4190abcad443039c2b42ec84 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 19:44:24 -0400
Subject: [PATCH 6/8] hack to generate attribute def checks

Created using spr 1.3.4
---
 clang/test/CodeGen/convergent.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp
index 867cb145312ed..9eb9155e79f39 100644
--- a/clang/test/CodeGen/convergent.cpp
+++ b/clang/test/CodeGen/convergent.cpp
@@ -111,6 +111,13 @@ void foo(int i, A *ap, B *bp) {
   delete newA;
 }
 //.
+// CHECK: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// CHECK: attributes #[[ATTR6]] = { convergent }
+// CHECK: attributes #[[ATTR7]] = { convergent nounwind }
+// CHECK: attributes #[[ATTR8]] = { builtin allocsize(0) }
+// CHECK: attributes #[[ATTR9]] = { nounwind }
+//.
 // CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
 // CHECK: [[META3]] = !{!"llvm.loop.mustprogress"}
 // CHECK: [[META4]] = !{i64 5689}

>From b9223f5be05bf0ccae0b0447bf7a81f484af5e75 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Thu, 25 Jul 2024 22:41:30 -0400
Subject: [PATCH 7/8] update test, srcloc num is changed

Created using spr 1.3.4
---
 clang/test/CodeGen/convergent.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/convergent.cpp b/clang/test/CodeGen/convergent.cpp
index 4979380400774..abc6856fc4026 100644
--- a/clang/test/CodeGen/convergent.cpp
+++ b/clang/test/CodeGen/convergent.cpp
@@ -129,5 +129,5 @@ void foo(int i, A *ap, B *bp) {
 // CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
 // CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
 // CHECK: [[META3]] = !{!"llvm.loop.mustprogress"}
-// CHECK: [[META4]] = !{i64 5689}
+// CHECK: [[META4]] = !{i64 5791}
 //.

>From ee4cf7119f5c96801601d37087209374e200f084 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao at gmail.com>
Date: Mon, 29 Jul 2024 13:19:46 -0400
Subject: [PATCH 8/8] add new tests and refine doc

Created using spr 1.3.4
---
 clang/include/clang/Basic/AttrDocs.td | 19 ++++++-----
 clang/test/CodeGenCUDA/convergent.cu  | 47 ++++++++++++++++++---------
 2 files changed, 42 insertions(+), 24 deletions(-)

diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 7db5d37ccc070..c743b68f70dfb 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -1385,14 +1385,17 @@ Sample usage:
 def NoConvergentDocs : Documentation {
   let Category = DocCatFunction;
   let Content = [{
-The ``noconvergent`` attribute removes the LLVM ``convergent`` attribute if
-present. If a statement is marked ``noconvergent`` and contains calls,
-``convergent`` attributes on those calls are removed as well.
-
-In languages following SPMD/SIMT programming model, e.g. CUDA, mark function
-declarations and calls with ``convergent`` by default for the correctness. This
-``noconvergent`` attribute could be used to remove that ``convergent``
-attribute when it's safe.
+This attribute prevents a function from being treated as convergent, which
+means that optimizations can only move calls to that function to
+control-equivalent blocks. If a statement is marked as ``noconvergent`` and
+contains calls, it also prevents those calls from being treated as convergent.
+In other words, those calls are not restricted to only being moved to
+control-equivalent blocks.
+
+In languages following SPMD/SIMT programming model, e.g., CUDA/HIP, function
+declarations and calls are treated as convergent by default for correctness.
+This ``noconvergent`` attribute is helpful for developers to prevent them from
+being treated as convergent when it's safe.
 
 .. code-block:: c
 
diff --git a/clang/test/CodeGenCUDA/convergent.cu b/clang/test/CodeGenCUDA/convergent.cu
index 2c036c7d5470d..b187f3a8a32d6 100644
--- a/clang/test/CodeGenCUDA/convergent.cu
+++ b/clang/test/CodeGenCUDA/convergent.cu
@@ -25,15 +25,23 @@ __device__ void foo() {}
 [[clang::noconvergent]] __device__ void bax() {}
 
 __host__ __device__ void baz();
+
+__host__ __device__ float aliasf0(int) asm("something");
+__host__ __device__ [[clang::noconvergent]] float aliasf1(int) asm("somethingelse");
+
 // DEVICE-LABEL: define dso_local void @_Z3barv(
 // DEVICE-SAME: ) #[[ATTR0]] {
 // DEVICE-NEXT:  [[ENTRY:.*:]]
 // DEVICE-NEXT:    [[X:%.*]] = alloca i32, align 4
-// DEVICE-NEXT:    call void @_Z3bazv() #[[ATTR3:[0-9]+]]
-// DEVICE-NEXT:    [[TMP0:%.*]] = call i32 asm "trap
+// DEVICE-NEXT:    call void @_Z3bazv() #[[ATTR4:[0-9]+]]
+// DEVICE-NEXT:    [[TMP0:%.*]] = call i32 asm "trap", "=l"() #[[ATTR5:[0-9]+]], !srcloc [[META3:![0-9]+]]
 // DEVICE-NEXT:    store i32 [[TMP0]], ptr [[X]], align 4
-// DEVICE-NEXT:    call void asm sideeffect "trap", ""() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
-// DEVICE-NEXT:    call void asm sideeffect "nop", ""() #[[ATTR5:[0-9]+]], !srcloc [[META5:![0-9]+]]
+// DEVICE-NEXT:    call void asm sideeffect "trap", ""() #[[ATTR4]], !srcloc [[META4:![0-9]+]]
+// DEVICE-NEXT:    call void asm sideeffect "nop", ""() #[[ATTR6:[0-9]+]], !srcloc [[META5:![0-9]+]]
+// DEVICE-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// DEVICE-NEXT:    [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]]) #[[ATTR4]]
+// DEVICE-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// DEVICE-NEXT:    [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]]) #[[ATTR6]]
 // DEVICE-NEXT:    ret void
 //
 // HOST-LABEL: define dso_local void @_Z3barv(
@@ -41,18 +49,24 @@ __host__ __device__ void baz();
 // HOST-NEXT:  [[ENTRY:.*:]]
 // HOST-NEXT:    [[X:%.*]] = alloca i32, align 4
 // HOST-NEXT:    call void @_Z3bazv()
-// HOST-NEXT:    [[TMP0:%.*]] = call i32 asm "trap
+// HOST-NEXT:    [[TMP0:%.*]] = call i32 asm "trap", "=l,~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc [[META2:![0-9]+]]
 // HOST-NEXT:    store i32 [[TMP0]], ptr [[X]], align 4
 // HOST-NEXT:    call void asm sideeffect "trap", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3:[0-9]+]], !srcloc [[META3:![0-9]+]]
 // HOST-NEXT:    call void asm sideeffect "nop", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR3]], !srcloc [[META4:![0-9]+]]
+// HOST-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X]], align 4
+// HOST-NEXT:    [[CALL:%.*]] = call contract noundef float @something(i32 noundef [[TMP1]])
+// HOST-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+// HOST-NEXT:    [[CALL1:%.*]] = call contract noundef float @somethingelse(i32 noundef [[TMP2]])
 // HOST-NEXT:    ret void
 //
 __host__ __device__ void bar() {
   baz();
   int x;
-  asm ("trap;" : "=l"(x));
+  asm ("trap" : "=l"(x));
   asm volatile ("trap");
   [[clang::noconvergent]] { asm volatile ("nop"); }
+  aliasf0(x);
+  aliasf1(x);
 }
 
 
@@ -60,25 +74,26 @@ __host__ __device__ void bar() {
 // DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
 // DEVICE: attributes #[[ATTR1]] = { mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
 // DEVICE: attributes #[[ATTR2:[0-9]+]] = { convergent nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
-// DEVICE: attributes #[[ATTR3]] = { convergent nounwind }
-// DEVICE: attributes #[[ATTR4:[0-9]+]] = { convergent nounwind memory(none) }
-// DEVICE: attributes #[[ATTR5]] = { nounwind }
+// DEVICE: attributes #[[ATTR3:[0-9]+]] = { nounwind "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+ptx32" }
+// DEVICE: attributes #[[ATTR4]] = { convergent nounwind }
+// DEVICE: attributes #[[ATTR5]] = { convergent nounwind memory(none) }
+// DEVICE: attributes #[[ATTR6]] = { nounwind }
 //.
 // HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
 // HOST: attributes #[[ATTR1:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
-// HOST: attributes #[[ATTR2:[0-9]+]] = { nounwind memory(none) }
+// HOST: attributes #[[ATTR2]] = { nounwind memory(none) }
 // HOST: attributes #[[ATTR3]] = { nounwind }
 //.
 // DEVICE: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // DEVICE: [[META1:![0-9]+]] = !{i32 4, !"nvvm-reflect-ftz", i32 0}
 // DEVICE: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// DEVICE: [[META3:![0-9]+]] = !{i64 2184}
-// DEVICE: [[META4]] = !{i64 2220}
-// DEVICE: [[META5]] = !{i64 2271}
+// DEVICE: [[META3]] = !{i64 3120}
+// DEVICE: [[META4]] = !{i64 3155}
+// DEVICE: [[META5]] = !{i64 3206}
 //.
 // HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 // HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// HOST: [[META2:![0-9]+]] = !{i64 2184}
-// HOST: [[META3]] = !{i64 2220}
-// HOST: [[META4]] = !{i64 2271}
+// HOST: [[META2]] = !{i64 3120}
+// HOST: [[META3]] = !{i64 3155}
+// HOST: [[META4]] = !{i64 3206}
 //.



More information about the cfe-commits mailing list