[clang] b89bb77 - Reapply "[HLSL] set alwaysinline on HLSL functions (#106588)"

Thurston Dang via cfe-commits cfe-commits at lists.llvm.org
Tue Sep 17 15:55:34 PDT 2024


Author: Thurston Dang
Date: 2024-09-17T22:54:52Z
New Revision: b89bb7775d155fc787ab3170f3fa38449069ecb3

URL: https://github.com/llvm/llvm-project/commit/b89bb7775d155fc787ab3170f3fa38449069ecb3
DIFF: https://github.com/llvm/llvm-project/commit/b89bb7775d155fc787ab3170f3fa38449069ecb3.diff

LOG: Reapply "[HLSL] set alwaysinline on HLSL functions (#106588)"

This reverts commit 4a63f4d301c0e044073e1b1f8f110015ec1778a1.

It was reverted because of a buildbot breakage, but the fix-forward has
landed (https://github.com/llvm/llvm-project/pull/109023).

Added: 
    clang/test/CodeGenHLSL/inline-constructors.hlsl
    clang/test/CodeGenHLSL/inline-functions.hlsl

Modified: 
    clang/lib/CodeGen/CGHLSLRuntime.cpp
    clang/lib/CodeGen/CodeGenModule.cpp
    clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
    clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
    clang/test/CodeGenHLSL/GlobalDestructors.hlsl
    clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
    clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index b6e6555e63fca1..bec0a29e34fcb5 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -338,6 +338,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
                 NumThreadsAttr->getZ());
     Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
   }
+  Fn->addFnAttr(llvm::Attribute::NoInline);
 }
 
 static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {

diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba2d6588900a11..17b82b205063d4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2473,11 +2473,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     B.addAttribute(llvm::Attribute::StackProtectReq);
 
   if (!D) {
+    // Non-entry HLSL functions must always be inlined.
+    if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
+      B.addAttribute(llvm::Attribute::AlwaysInline);
     // If we don't have a declaration to control inlining, the function isn't
     // explicitly marked as alwaysinline for semantic reasons, and inlining is
     // disabled, mark the function as noinline.
-    if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
-        CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
+    else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
+             CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
       B.addAttribute(llvm::Attribute::NoInline);
 
     F->addFnAttrs(B);
@@ -2504,9 +2507,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
   ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
   ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
 
-  // Add optnone, but do so only if the function isn't always_inline.
-  if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
-      !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+  // Non-entry HLSL functions must always be inlined.
+  if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
+      !D->hasAttr<NoInlineAttr>()) {
+    B.addAttribute(llvm::Attribute::AlwaysInline);
+  } else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
+             !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+    // Add optnone, but do so only if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::OptimizeNone);
 
     // OptimizeNone implies noinline; we should not be inlining such functions.
@@ -2526,7 +2533,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
     B.addAttribute(llvm::Attribute::NoInline);
   } else if (D->hasAttr<NoDuplicateAttr>()) {
     B.addAttribute(llvm::Attribute::NoDuplicate);
-  } else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+  } else if (D->hasAttr<NoInlineAttr>() &&
+             !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
     // Add noinline if the function isn't always_inline.
     B.addAttribute(llvm::Attribute::NoInline);
   } else if (D->hasAttr<AlwaysInlineAttr>() &&

diff  --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index f954c9d2f029f2..b39311ad67cd62 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 int i;
 
@@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
 }
 
 __attribute__((constructor)) void then_call_me(void) {
-  i = 12;
+  i = 13;
 }
 
 __attribute__((destructor)) void call_me_last(void) {
@@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
 // CHECK-NOT:@llvm.global_ctors
 // CHECK-NOT:@llvm.global_dtors
 
-//CHECK: define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT:   call void @"?call_me_first@@YAXXZ"()
-//CHECK-NEXT:   call void @"?then_call_me@@YAXXZ"()
-//CHECK-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT:   call void @"?call_me_last@@YAXXZ"(
-//CHECK-NEXT:   ret void
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify function constructors are emitted
+// NOINLINE-NEXT:   call void @"?call_me_first@@YAXXZ"()
+// NOINLINE-NEXT:   call void @"?then_call_me@@YAXXZ"()
+// NOINLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT:   call void @"?call_me_last@@YAXXZ"(
+// NOINLINE-NEXT:   ret void
+
+// Verify constructor calls are inlined when AlwaysInline is run
+// INLINE-NEXT:   alloca
+// INLINE-NEXT:   store i32 12
+// INLINE-NEXT:   store i32 13
+// INLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// INLINE-NEXT:   store i32 %
+// INLINE-NEXT:   store i32 0
+// INLINE:   ret void

diff  --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 2c5c4e19c3296d..78f6475462bc47 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
 
 // Make sure global variable for ctors exist for lib profile.
 // CHECK:@llvm.global_ctors
@@ -11,7 +12,11 @@ void FirstEntry() {}
 
 // CHECK: define void @FirstEntry()
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT:   call void @"?FirstEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
 
 [shader("compute")]
 [numthreads(1,1,1)]
@@ -19,5 +24,15 @@ void SecondEntry() {}
 
 // CHECK: define void @SecondEntry()
 // CHECK-NEXT: entry:
-// CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
-// CHECK-NEXT:   call void @"?SecondEntry@@YAXXZ"()
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT:   call void @"?SecondEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+
+// Verify the constructor is alwaysinline
+// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline

diff  --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index 24c3c039fc6192..ea28354222f885 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,10 +1,18 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
 
-// Make sure global variable for dtors exist for lib profile.
+// Tests that constructors and destructors are appropriately generated for globals
+// and that their calls are inlined when AlwaysInline is run
+// but global variables are retained for the library profiles
+
+// Make sure global variable for ctors/dtors exist for lib profile.
+// LIB:@llvm.global_ctors
 // LIB:@llvm.global_dtors
-// Make sure global variable for dtors removed for compute profile.
-// CS-NOT:llvm.global_dtors
+// Make sure global variable for ctors/dtors removed for compute profile.
+// CS-NOT:@llvm.global_ctors
+// CS-NOT:@llvm.global_dtors
 
 struct Tail {
   Tail() {
@@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
   Wag();
 }
 
-// Make sure global variable for ctors/dtors removed.
-// CHECK-NOT:@llvm.global_ctors
-// CHECK-NOT:@llvm.global_dtors
-//CHECK:      define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT:   call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
-//CHECK-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT:   call void @_GLOBAL__D_a()
-//CHECK-NEXT:   ret void
+// CHECK:      define void @main()
+// CHECK-NEXT: entry:
+// Verify destructor is emitted
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
+// NOINLINE-NEXT:   %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT:   call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT:   call void @_GLOBAL__D_a()
+// NOINLINE-NEXT:   ret void
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// INLINE:   ret void
 
 // This is really just a sanity check I needed for myself to verify that
 // function scope static variables also get destroyed properly.
 
-//CHECK: define internal void @_GLOBAL__D_a()
-//CHECK-NEXT: entry:
-//CHECK-NEXT:   call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
-//CHECK-NEXT:   call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
-//CHECK-NEXT:   ret void
+// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
+// NOINLINE-NEXT: entry:
+// NOINLINE-NEXT:   call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
+// NOINLINE-NEXT:   call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
+// NOINLINE-NEXT:   ret void
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline

diff  --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
index baddfcf2cf1d52..174f4c3eaaad26 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV
 
 RWBuffer<float> Buf;

diff  --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
index da8a1e538ec5e7..2a350c1619bd6e 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
@@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
 // Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
 // and confusing to follow so the match here is pretty weak.
 
-// CHECK: define internal void @"?main@@YAXI at Z"
-// CHECK-NOT: call
+// CHECK: define void @main()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// CHECK-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
 // CHECK: ret void

diff  --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
new file mode 100644
index 00000000000000..995878a9c0f798
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that implicit constructor calls for user classes will always be inlined.
+
+struct Weed {
+  Weed() {Count += 1;}
+  [[maybe_unused]] void pull() {Count--;}
+  static int weedCount() { return Count; }
+private:
+  static int Count;
+
+} YardWeeds;
+
+int Weed::Count = 1; // It begins. . .
+
+struct Kitty {
+  unsigned burrsInFur;
+
+  Kitty() {
+    burrsInFur = 0;
+  }
+
+  void wanderInYard(int hours) {
+    burrsInFur = hours*Weed::weedCount()/8;
+  }
+
+  void lick() {
+    if(burrsInFur) {
+      burrsInFur--;
+      Weed w;
+    }
+  }
+
+} Nion;
+
+void NionsDay(int hours) {
+  static Kitty Nion;
+  Nion.wanderInYard(hours);
+  while(Nion.burrsInFur) Nion.lick();
+}
+
+// CHECK:      define void @main()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:    call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK:         ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+  NionsDay(10);
+}
+
+
+// CHECK:      define void @rainyMain()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT:   call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT:   call void @"?rainyMain@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT:      call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK:           ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void rainyMain() {
+  NionsDay(1);
+}
+

diff  --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
new file mode 100644
index 00000000000000..7dd905e966e069
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that user functions will always be inlined.
+// This includes exported functions and mangled entry point implementation functions.
+// The unmangled entry functions must not be alwaysinlined.
+
+#define MAX 100
+
+float nums[MAX];
+
+// Verify that all functions have the alwaysinline attribute
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?swap@@YAXY0GE at III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// NOINLINE: ret void
+// Swap the values of Buf at indices ix1 and ix2
+void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
+  float tmp = Buf[ix1];
+  Buf[ix1] = Buf[ix2];
+  Buf[ix2] = tmp;
+}
+
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?BubbleSort@@YAXY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// NOINLINE: ret void
+// Inefficiently sort Buf in place
+void BubbleSort(unsigned Buf[MAX], unsigned size) {
+  bool swapped = true;
+  while (swapped) {
+    swapped = false;
+    for (unsigned i = 1; i < size; i++) {
+      if (Buf[i] < Buf[i-1]) {
+	swap(Buf, i, i-1);
+	swapped = true;
+      }
+    }
+  }
+}
+
+// Note ExtAttr is the inlined export set of attribs
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE at II@Z"(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
+// CHECK: ret i32
+// Sort Buf and remove any duplicate values
+// returns the number of values left
+export
+unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
+  BubbleSort(Buf, size);
+  unsigned insertPt = 0;
+  for (unsigned i = 1; i < size; i++) {
+    if (Buf[i] == Buf[i-1])
+      insertPt++;
+    else
+      Buf[insertPt] = Buf[i];
+  }
+  return insertPt;
+}
+
+
+RWBuffer<unsigned> Indices;
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main@@YAXI at Z"(i32 noundef %GI) [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: {{.*}}noinline
+// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main(unsigned int GI : SV_GroupIndex) {
+  unsigned tmpIndices[MAX];
+  if (GI > MAX) return;
+  for (unsigned i = 1; i < GI; i++)
+    tmpIndices[i] = Indices[i];
+  RemoveDupes(tmpIndices, GI);
+  for (unsigned i = 1; i < GI; i++)
+    tmpIndices[i] = Indices[i];
+}
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main10@@YAXXZ"() [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: {{.*}}noinline
+// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT:   call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main10() {
+  main(10);
+}
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[EntryAttr]] = {{.*}} noinline


        


More information about the cfe-commits mailing list