[clang] b89bb77 - Reapply "[HLSL] set alwaysinline on HLSL functions (#106588)"
Thurston Dang via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 17 15:55:34 PDT 2024
Author: Thurston Dang
Date: 2024-09-17T22:54:52Z
New Revision: b89bb7775d155fc787ab3170f3fa38449069ecb3
URL: https://github.com/llvm/llvm-project/commit/b89bb7775d155fc787ab3170f3fa38449069ecb3
DIFF: https://github.com/llvm/llvm-project/commit/b89bb7775d155fc787ab3170f3fa38449069ecb3.diff
LOG: Reapply "[HLSL] set alwaysinline on HLSL functions (#106588)"
This reverts commit 4a63f4d301c0e044073e1b1f8f110015ec1778a1.
It was reverted because of a buildbot breakage, but the fix-forward has
landed (https://github.com/llvm/llvm-project/pull/109023).
Added:
clang/test/CodeGenHLSL/inline-constructors.hlsl
clang/test/CodeGenHLSL/inline-functions.hlsl
Modified:
clang/lib/CodeGen/CGHLSLRuntime.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
clang/test/CodeGenHLSL/GlobalDestructors.hlsl
clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index b6e6555e63fca1..bec0a29e34fcb5 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -338,6 +338,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
NumThreadsAttr->getZ());
Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
}
+ Fn->addFnAttr(llvm::Attribute::NoInline);
}
static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba2d6588900a11..17b82b205063d4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2473,11 +2473,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::StackProtectReq);
if (!D) {
+ // Non-entry HLSL functions must always be inlined.
+ if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
+ B.addAttribute(llvm::Attribute::AlwaysInline);
// If we don't have a declaration to control inlining, the function isn't
// explicitly marked as alwaysinline for semantic reasons, and inlining is
// disabled, mark the function as noinline.
- if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
- CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
+ else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
+ CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);
F->addFnAttrs(B);
@@ -2504,9 +2507,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
- // Add optnone, but do so only if the function isn't always_inline.
- if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
- !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+ // Non-entry HLSL functions must always be inlined.
+ if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
+ !D->hasAttr<NoInlineAttr>()) {
+ B.addAttribute(llvm::Attribute::AlwaysInline);
+ } else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
+ !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+ // Add optnone, but do so only if the function isn't always_inline.
B.addAttribute(llvm::Attribute::OptimizeNone);
// OptimizeNone implies noinline; we should not be inlining such functions.
@@ -2526,7 +2533,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<NoDuplicateAttr>()) {
B.addAttribute(llvm::Attribute::NoDuplicate);
- } else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+ } else if (D->hasAttr<NoInlineAttr>() &&
+ !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add noinline if the function isn't always_inline.
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<AlwaysInlineAttr>() &&
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index f954c9d2f029f2..b39311ad67cd62 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
int i;
@@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
}
__attribute__((constructor)) void then_call_me(void) {
- i = 12;
+ i = 13;
}
__attribute__((destructor)) void call_me_last(void) {
@@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
// CHECK-NOT:@llvm.global_ctors
// CHECK-NOT:@llvm.global_dtors
-//CHECK: define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"()
-//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"()
-//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"(
-//CHECK-NEXT: ret void
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify function constructors are emitted
+// NOINLINE-NEXT: call void @"?call_me_first@@YAXXZ"()
+// NOINLINE-NEXT: call void @"?then_call_me@@YAXXZ"()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT: call void @"?call_me_last@@YAXXZ"(
+// NOINLINE-NEXT: ret void
+
+// Verify constructor calls are inlined when AlwaysInline is run
+// INLINE-NEXT: alloca
+// INLINE-NEXT: store i32 12
+// INLINE-NEXT: store i32 13
+// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// INLINE-NEXT: store i32 %
+// INLINE-NEXT: store i32 0
+// INLINE: ret void
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 2c5c4e19c3296d..78f6475462bc47 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
@@ -11,7 +12,11 @@ void FirstEntry() {}
// CHECK: define void @FirstEntry()
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT: call void @"?FirstEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
@@ -19,5 +24,15 @@ void SecondEntry() {}
// CHECK: define void @SecondEntry()
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
-// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"()
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT: call void @"?SecondEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+
+// Verify the constructor is alwaysinline
+// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index 24c3c039fc6192..ea28354222f885 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,10 +1,18 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// Make sure global variable for dtors exist for lib profile.
+// Tests that constructors and destructors are appropriately generated for globals
+// and that their calls are inlined when AlwaysInline is run
+// but global variables are retained for the library profiles
+
+// Make sure global variable for ctors/dtors exist for lib profile.
+// LIB:@llvm.global_ctors
// LIB:@llvm.global_dtors
-// Make sure global variable for dtors removed for compute profile.
-// CS-NOT:llvm.global_dtors
+// Make sure global variable for ctors/dtors removed for compute profile.
+// CS-NOT:@llvm.global_ctors
+// CS-NOT:@llvm.global_dtors
struct Tail {
Tail() {
@@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
Wag();
}
-// Make sure global variable for ctors/dtors removed.
-// CHECK-NOT:@llvm.global_ctors
-// CHECK-NOT:@llvm.global_dtors
-//CHECK: define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
-//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT: call void @_GLOBAL__D_a()
-//CHECK-NEXT: ret void
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify destructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT: call void @_GLOBAL__D_a()
+// NOINLINE-NEXT: ret void
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// INLINE: ret void
// This is really just a sanity check I needed for myself to verify that
// function scope static variables also get destroyed properly.
-//CHECK: define internal void @_GLOBAL__D_a()
-//CHECK-NEXT: entry:
-//CHECK-NEXT: call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
-//CHECK-NEXT: call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
-//CHECK-NEXT: ret void
+// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
+// NOINLINE-NEXT: entry:
+// NOINLINE-NEXT: call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
+// NOINLINE-NEXT: call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
+// NOINLINE-NEXT: ret void
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
index baddfcf2cf1d52..174f4c3eaaad26 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV
RWBuffer<float> Buf;
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
index da8a1e538ec5e7..2a350c1619bd6e 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
@@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
// and confusing to follow so the match here is pretty weak.
-// CHECK: define internal void @"?main@@YAXI at Z"
-// CHECK-NOT: call
+// CHECK: define void @main()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// CHECK-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
new file mode 100644
index 00000000000000..995878a9c0f798
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that implicit constructor calls for user classes will always be inlined.
+
+struct Weed {
+ Weed() {Count += 1;}
+ [[maybe_unused]] void pull() {Count--;}
+ static int weedCount() { return Count; }
+private:
+ static int Count;
+
+} YardWeeds;
+
+int Weed::Count = 1; // It begins. . .
+
+struct Kitty {
+ unsigned burrsInFur;
+
+ Kitty() {
+ burrsInFur = 0;
+ }
+
+ void wanderInYard(int hours) {
+ burrsInFur = hours*Weed::weedCount()/8;
+ }
+
+ void lick() {
+ if(burrsInFur) {
+ burrsInFur--;
+ Weed w;
+ }
+ }
+
+} Nion;
+
+void NionsDay(int hours) {
+ static Kitty Nion;
+ Nion.wanderInYard(hours);
+ while(Nion.burrsInFur) Nion.lick();
+}
+
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+ NionsDay(10);
+}
+
+
+// CHECK: define void @rainyMain()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT: call void @"?rainyMain@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void rainyMain() {
+ NionsDay(1);
+}
+
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
new file mode 100644
index 00000000000000..7dd905e966e069
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that user functions will always be inlined.
+// This includes exported functions and mangled entry point implementation functions.
+// The unmangled entry functions must not be alwaysinlined.
+
+#define MAX 100
+
+float nums[MAX];
+
+// Verify that all functions have the alwaysinline attribute
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?swap@@YAXY0GE at III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// NOINLINE: ret void
+// Swap the values of Buf at indices ix1 and ix2
+void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
+ float tmp = Buf[ix1];
+ Buf[ix1] = Buf[ix2];
+ Buf[ix2] = tmp;
+}
+
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?BubbleSort@@YAXY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// NOINLINE: ret void
+// Inefficiently sort Buf in place
+void BubbleSort(unsigned Buf[MAX], unsigned size) {
+ bool swapped = true;
+ while (swapped) {
+ swapped = false;
+ for (unsigned i = 1; i < size; i++) {
+ if (Buf[i] < Buf[i-1]) {
+ swap(Buf, i, i-1);
+ swapped = true;
+ }
+ }
+ }
+}
+
+// Note ExtAttr is the inlined export set of attribs
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE at II@Z"(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
+// CHECK: ret i32
+// Sort Buf and remove any duplicate values
+// returns the number of values left
+export
+unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
+ BubbleSort(Buf, size);
+ unsigned insertPt = 0;
+ for (unsigned i = 1; i < size; i++) {
+ if (Buf[i] == Buf[i-1])
+ insertPt++;
+ else
+ Buf[insertPt] = Buf[i];
+ }
+ return insertPt;
+}
+
+
+RWBuffer<unsigned> Indices;
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main@@YAXI at Z"(i32 noundef %GI) [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: {{.*}}noinline
+// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main(unsigned int GI : SV_GroupIndex) {
+ unsigned tmpIndices[MAX];
+ if (GI > MAX) return;
+ for (unsigned i = 1; i < GI; i++)
+ tmpIndices[i] = Indices[i];
+ RemoveDupes(tmpIndices, GI);
+ for (unsigned i = 1; i < GI; i++)
+ tmpIndices[i] = Indices[i];
+}
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main10@@YAXXZ"() [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: {{.*}}noinline
+// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main10() {
+ main(10);
+}
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
More information about the cfe-commits
mailing list