[clang] a729e70 - [HLSL] set alwaysinline on HLSL functions (#106588)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 17 10:09:46 PDT 2024
Author: Greg Roth
Date: 2024-09-17T10:09:42-07:00
New Revision: a729e706de3fc6ebee49ede3c50afb47f2e29191
URL: https://github.com/llvm/llvm-project/commit/a729e706de3fc6ebee49ede3c50afb47f2e29191
DIFF: https://github.com/llvm/llvm-project/commit/a729e706de3fc6ebee49ede3c50afb47f2e29191.diff
LOG: [HLSL] set alwaysinline on HLSL functions (#106588)
HLSL inlines all its functions by default. This uses the alwaysinline
attribute to make the alwaysinliner pass inline any function not
explicitly marked noinline by the user or autogeneration. The
alwayslinline marking takes place in `SetLLVMFunctionAttributesForDefinitions`
where all other inlining interactions are determined.
The outermost entry function is marked noinline because there's no
reason to inline it. Any user calls to an entry function will instead call
the internal mangled version of the entry function.
Adds tests for function and constructor inlining and augments some
existing tests to verify correct inlining of implicitly created
functions as well.
Incidentally restore RUN line that I believe was mistakenly removed as
part of #88918
Fixes #89282
Added:
clang/test/CodeGenHLSL/inline-constructors.hlsl
clang/test/CodeGenHLSL/inline-functions.hlsl
Modified:
clang/lib/CodeGen/CGHLSLRuntime.cpp
clang/lib/CodeGen/CodeGenModule.cpp
clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
clang/test/CodeGenHLSL/GlobalDestructors.hlsl
clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index b6e6555e63fca1..bec0a29e34fcb5 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -338,6 +338,7 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
NumThreadsAttr->getZ());
Fn->addFnAttr(NumThreadsKindStr, NumThreadsStr);
}
+ Fn->addFnAttr(llvm::Attribute::NoInline);
}
static Value *buildVectorInput(IRBuilder<> &B, Function *F, llvm::Type *Ty) {
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index ba2d6588900a11..17b82b205063d4 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -2473,11 +2473,14 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::StackProtectReq);
if (!D) {
+ // Non-entry HLSL functions must always be inlined.
+ if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline))
+ B.addAttribute(llvm::Attribute::AlwaysInline);
// If we don't have a declaration to control inlining, the function isn't
// explicitly marked as alwaysinline for semantic reasons, and inlining is
// disabled, mark the function as noinline.
- if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
- CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
+ else if (!F->hasFnAttribute(llvm::Attribute::AlwaysInline) &&
+ CodeGenOpts.getInlining() == CodeGenOptions::OnlyAlwaysInlining)
B.addAttribute(llvm::Attribute::NoInline);
F->addFnAttrs(B);
@@ -2504,9 +2507,13 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
ShouldAddOptNone &= !D->hasAttr<MinSizeAttr>();
ShouldAddOptNone &= !D->hasAttr<AlwaysInlineAttr>();
- // Add optnone, but do so only if the function isn't always_inline.
- if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
- !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+ // Non-entry HLSL functions must always be inlined.
+ if (getLangOpts().HLSL && !F->hasFnAttribute(llvm::Attribute::NoInline) &&
+ !D->hasAttr<NoInlineAttr>()) {
+ B.addAttribute(llvm::Attribute::AlwaysInline);
+ } else if ((ShouldAddOptNone || D->hasAttr<OptimizeNoneAttr>()) &&
+ !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+ // Add optnone, but do so only if the function isn't always_inline.
B.addAttribute(llvm::Attribute::OptimizeNone);
// OptimizeNone implies noinline; we should not be inlining such functions.
@@ -2526,7 +2533,8 @@ void CodeGenModule::SetLLVMFunctionAttributesForDefinition(const Decl *D,
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<NoDuplicateAttr>()) {
B.addAttribute(llvm::Attribute::NoDuplicate);
- } else if (D->hasAttr<NoInlineAttr>() && !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
+ } else if (D->hasAttr<NoInlineAttr>() &&
+ !F->hasFnAttribute(llvm::Attribute::AlwaysInline)) {
// Add noinline if the function isn't always_inline.
B.addAttribute(llvm::Attribute::NoInline);
} else if (D->hasAttr<AlwaysInlineAttr>() &&
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
index f954c9d2f029f2..b39311ad67cd62 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorFunction.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
int i;
@@ -7,7 +8,7 @@ __attribute__((constructor)) void call_me_first(void) {
}
__attribute__((constructor)) void then_call_me(void) {
- i = 12;
+ i = 13;
}
__attribute__((destructor)) void call_me_last(void) {
@@ -21,11 +22,21 @@ void main(unsigned GI : SV_GroupIndex) {}
// CHECK-NOT:@llvm.global_ctors
// CHECK-NOT:@llvm.global_dtors
-//CHECK: define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT: call void @"?call_me_first@@YAXXZ"()
-//CHECK-NEXT: call void @"?then_call_me@@YAXXZ"()
-//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT: call void @"?call_me_last@@YAXXZ"(
-//CHECK-NEXT: ret void
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify function constructors are emitted
+// NOINLINE-NEXT: call void @"?call_me_first@@YAXXZ"()
+// NOINLINE-NEXT: call void @"?then_call_me@@YAXXZ"()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT: call void @"?call_me_last@@YAXXZ"(
+// NOINLINE-NEXT: ret void
+
+// Verify constructor calls are inlined when AlwaysInline is run
+// INLINE-NEXT: alloca
+// INLINE-NEXT: store i32 12
+// INLINE-NEXT: store i32 13
+// INLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// INLINE-NEXT: store i32 %
+// INLINE-NEXT: store i32 0
+// INLINE: ret void
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 2c5c4e19c3296d..78f6475462bc47 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
@@ -11,7 +12,11 @@ void FirstEntry() {}
// CHECK: define void @FirstEntry()
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT: call void @"?FirstEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
[shader("compute")]
[numthreads(1,1,1)]
@@ -19,5 +24,15 @@ void SecondEntry() {}
// CHECK: define void @SecondEntry()
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
-// CHECK-NEXT: call void @"?SecondEntry@@YAXXZ"()
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl()
+// NOINLINE-NEXT: call void @"?SecondEntry@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+
+// Verify the constructor is alwaysinline
+// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[IntAttr:\#[0-9]+]]
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index 24c3c039fc6192..ea28354222f885 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,10 +1,18 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// Make sure global variable for dtors exist for lib profile.
+// Tests that constructors and destructors are appropriately generated for globals
+// and that their calls are inlined when AlwaysInline is run
+// but global variables are retained for the library profiles
+
+// Make sure global variable for ctors/dtors exist for lib profile.
+// LIB:@llvm.global_ctors
// LIB:@llvm.global_dtors
-// Make sure global variable for dtors removed for compute profile.
-// CS-NOT:llvm.global_dtors
+// Make sure global variable for ctors/dtors removed for compute profile.
+// CS-NOT:@llvm.global_ctors
+// CS-NOT:@llvm.global_dtors
struct Tail {
Tail() {
@@ -46,22 +54,25 @@ void main(unsigned GI : SV_GroupIndex) {
Wag();
}
-// Make sure global variable for ctors/dtors removed.
-// CHECK-NOT:@llvm.global_ctors
-// CHECK-NOT:@llvm.global_dtors
-//CHECK: define void @main()
-//CHECK-NEXT: entry:
-//CHECK-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
-//CHECK-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-//CHECK-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
-//CHECK-NEXT: call void @_GLOBAL__D_a()
-//CHECK-NEXT: ret void
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify destructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_GlobalDestructors.hlsl()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// NOINLINE-NEXT: call void @_GLOBAL__D_a()
+// NOINLINE-NEXT: ret void
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// INLINE: ret void
// This is really just a sanity check I needed for myself to verify that
// function scope static variables also get destroyed properly.
-//CHECK: define internal void @_GLOBAL__D_a()
-//CHECK-NEXT: entry:
-//CHECK-NEXT: call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
-//CHECK-NEXT: call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
-//CHECK-NEXT: ret void
+// NOINLINE: define internal void @_GLOBAL__D_a() [[IntAttr:\#[0-9]+]]
+// NOINLINE-NEXT: entry:
+// NOINLINE-NEXT: call void @"??1Tail@@QAA at XZ"(ptr @"?T@?1??Wag@@YAXXZ at 4UTail@@A")
+// NOINLINE-NEXT: call void @"??1Pupper@@QAA at XZ"(ptr @"?GlobalPup@@3UPupper@@A")
+// NOINLINE-NEXT: ret void
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
index baddfcf2cf1d52..174f4c3eaaad26 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-constructor.hlsl
@@ -1,3 +1,4 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s
// RUN: %clang_cc1 -triple spirv-vulkan-library -x hlsl -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s --check-prefix=CHECK-SPIRV
RWBuffer<float> Buf;
diff --git a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
index da8a1e538ec5e7..2a350c1619bd6e 100644
--- a/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
+++ b/clang/test/CodeGenHLSL/builtins/RWBuffer-subscript.hlsl
@@ -11,6 +11,7 @@ void main(unsigned GI : SV_GroupIndex) {
// Even at -O0 the subscript operators get inlined. The -O0 IR is a bit messy
// and confusing to follow so the match here is pretty weak.
-// CHECK: define internal void @"?main@@YAXI at Z"
-// CHECK-NOT: call
+// CHECK: define void @main()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// CHECK-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
new file mode 100644
index 00000000000000..995878a9c0f798
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -0,0 +1,76 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that implicit constructor calls for user classes will always be inlined.
+
+struct Weed {
+ Weed() {Count += 1;}
+ [[maybe_unused]] void pull() {Count--;}
+ static int weedCount() { return Count; }
+private:
+ static int Count;
+
+} YardWeeds;
+
+int Weed::Count = 1; // It begins. . .
+
+struct Kitty {
+ unsigned burrsInFur;
+
+ Kitty() {
+ burrsInFur = 0;
+ }
+
+ void wanderInYard(int hours) {
+ burrsInFur = hours*Weed::weedCount()/8;
+ }
+
+ void lick() {
+ if(burrsInFur) {
+ burrsInFur--;
+ Weed w;
+ }
+ }
+
+} Nion;
+
+void NionsDay(int hours) {
+ static Kitty Nion;
+ Nion.wanderInYard(hours);
+ while(Nion.burrsInFur) Nion.lick();
+}
+
+// CHECK: define void @main()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE-NEXT: call void @"?main@@YAXI at Z"(i32 %0)
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void main(unsigned GI : SV_GroupIndex) {
+ NionsDay(10);
+}
+
+
+// CHECK: define void @rainyMain()
+// CHECK-NEXT: entry:
+// Verify constructor is emitted
+// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
+// NOINLINE-NEXT: call void @"?rainyMain@@YAXXZ"()
+// Verify inlining leaves only calls to "llvm." intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+[shader("compute")]
+[numthreads(1,1,1)]
+void rainyMain() {
+ NionsDay(1);
+}
+
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
new file mode 100644
index 00000000000000..7dd905e966e069
--- /dev/null
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -0,0 +1,116 @@
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+
+// Tests that user functions will always be inlined.
+// This includes exported functions and mangled entry point implementation functions.
+// The unmangled entry functions must not be alwaysinlined.
+
+#define MAX 100
+
+float nums[MAX];
+
+// Verify that all functions have the alwaysinline attribute
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?swap@@YAXY0GE at III@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// NOINLINE: ret void
+// Swap the values of Buf at indices ix1 and ix2
+void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
+ float tmp = Buf[ix1];
+ Buf[ix1] = Buf[ix2];
+ Buf[ix2] = tmp;
+}
+
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @"?BubbleSort@@YAXY0GE at II@Z"(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// NOINLINE: ret void
+// Inefficiently sort Buf in place
+void BubbleSort(unsigned Buf[MAX], unsigned size) {
+ bool swapped = true;
+ while (swapped) {
+ swapped = false;
+ for (unsigned i = 1; i < size; i++) {
+ if (Buf[i] < Buf[i-1]) {
+ swap(Buf, i, i-1);
+ swapped = true;
+ }
+ }
+ }
+}
+
+// Note ExtAttr is the inlined export set of attribs
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define noundef i32 @"?RemoveDupes@@YAIY0GE at II@Z"(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
+// CHECK: ret i32
+// Sort Buf and remove any duplicate values
+// returns the number of values left
+export
+unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
+ BubbleSort(Buf, size);
+ unsigned insertPt = 0;
+ for (unsigned i = 1; i < size; i++) {
+ if (Buf[i] == Buf[i-1])
+ insertPt++;
+ else
+ Buf[insertPt] = Buf[i];
+ }
+ return insertPt;
+}
+
+
+RWBuffer<unsigned> Indices;
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main@@YAXI at Z"(i32 noundef %GI) [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: {{.*}}noinline
+// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main(unsigned int GI : SV_GroupIndex) {
+ unsigned tmpIndices[MAX];
+ if (GI > MAX) return;
+ for (unsigned i = 1; i < GI; i++)
+ tmpIndices[i] = Indices[i];
+ RemoveDupes(tmpIndices, GI);
+ for (unsigned i = 1; i < GI; i++)
+ tmpIndices[i] = Indices[i];
+}
+
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @"?main10@@YAXXZ"() [[IntAttr]]
+// NOINLINE: ret void
+
+// The unmangled version is not inlined, EntryAttr reflects that
+// CHECK: Function Attrs: {{.*}}noinline
+// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK: ret void
+
+[numthreads(1,1,1)]
+[shader("compute")]
+void main10() {
+ main(10);
+}
+
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
More information about the cfe-commits
mailing list