[clang] [HLSL] Appropriately set function attribute optnone (PR #125937)
S. Bharadwaj Yadavalli via cfe-commits
cfe-commits at lists.llvm.org
Tue Feb 11 07:32:04 PST 2025
https://github.com/bharadwajy updated https://github.com/llvm/llvm-project/pull/125937
>From 63a728dd7f5efff32a2f90608adfdc13d540f34f Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Tue, 4 Feb 2025 12:48:09 -0500
Subject: [PATCH 1/3] [HLSL] Set function optnone attribute appropriately
When optimization is disabled, set optnone attribute
- for all module functions when targetting Library shaders
- only for entry function when targetting non-Library shaders
Update tests in accordance with the change.
---
clang/lib/CodeGen/CGHLSLRuntime.cpp | 10 +
.../CodeGenHLSL/GlobalConstructorLib.hlsl | 8 +-
clang/test/CodeGenHLSL/GlobalDestructors.hlsl | 4 +-
.../test/CodeGenHLSL/inline-constructors.hlsl | 16 +-
clang/test/CodeGenHLSL/inline-functions.hlsl | 189 +++++++++++++-----
.../CodeGenHLSL/this-assignment-overload.hlsl | 6 +-
6 files changed, 174 insertions(+), 59 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 2ce54cc3c52ef..24acc9a559be2 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -345,6 +345,9 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
WaveSizeAttr->getPreferred());
Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
}
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+ Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+ }
Fn->addFnAttr(llvm::Attribute::NoInline);
}
@@ -446,6 +449,13 @@ void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
const StringRef ExportAttrKindStr = "hlsl.export";
Fn->addFnAttr(ExportAttrKindStr);
}
+ llvm::Triple T(Fn->getParent()->getTargetTriple());
+ if (T.getEnvironment() == llvm::Triple::EnvironmentType::Library) {
+ if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
+ Fn->addFnAttr(llvm::Attribute::OptimizeNone);
+ Fn->addFnAttr(llvm::Attribute::NoInline);
+ }
+ }
}
static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 09c44f6242c53..39d7c73e832a1 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
// CHECK: ret void
-// Verify the constructor is alwaysinline
-// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
+// Verify the constructor is optnone
+// NOINLINE: ; Function Attrs: {{.*}} optnone
// NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]]
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
// NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
-// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
+// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index f98318601134b..8961e1a7e59cd 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
// Tests that constructors and destructors are appropriately generated for globals
// and that their calls are inlined when AlwaysInline is run
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index b0d5a783fb372..298d7d4272678 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_LIB
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// Tests that implicit constructor calls for user classes will always be inlined.
@@ -50,6 +50,10 @@ void NionsDay(int hours) {
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @_Z4mainj(i32 %0)
+// NOINLINE_LIB: call void @_ZN4WeedC1Ev
+// NOINLINE_LIB-NEXT: call void @_ZN5KittyC1Ev
+// NOINLINE_LIB-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
+// NOINLINE_LIB-NEXT: call void @_Z4mainj(i32 %0)
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index e78d04ec9594f..4a7217e3a89e3 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,22 +1,35 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE
-
-// Tests that user functions will always be inlined.
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_NOPASS
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_PASS
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
+
+// Tests inlining of user functions based on specified optimization options.
// This includes exported functions and mangled entry point implementation functions.
-// The unmangled entry functions must not be alwaysinlined.
#define MAX 100
float nums[MAX];
-// Verify that all functions have the alwaysinline attribute
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
-// NOINLINE: ret void
+// Check optnone attribute for library target compilation
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_PASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for opt compilation to library target
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %ix1, i32 noundef %ix2) {{[a-z_ ]*}} [[SwapOptAttr:\#[0-9]+]]
+
+// CHECK: ret void
+
// Swap the values of Buf at indices ix1 and ix2
void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
float tmp = Buf[ix1];
@@ -24,9 +37,24 @@ void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
Buf[ix2] = tmp;
}
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
-// NOINLINE: ret void
+// Check optnone attribute for library target compilation
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for non-entry functions of compute target compilation
+// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_PASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
+
+// Check alwaysinline attribute for opt compilation to library target
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define void @_Z10BubbleSortA100_jj(ptr noundef readonly byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[BubOptAttr:\#[0-9]+]]
+
+// CHECK: ret void
+
// Inefficiently sort Buf in place
void BubbleSort(unsigned Buf[MAX], unsigned size) {
bool swapped = true;
@@ -41,12 +69,26 @@ void BubbleSort(unsigned Buf[MAX], unsigned size) {
}
}
-// Note ExtAttr is the inlined export set of attribs
-// CHECK: Function Attrs: alwaysinline
-// CHECK: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
-// CHECK: ret i32
+// Check optnone attribute for library target compilation of exported function
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
// Sort Buf and remove any duplicate values
// returns the number of values left
+
+// Check alwaysinline attribute for exported function of compute target compilation
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for exported function of compute target compilation
+// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
+// CHECK_CS_OPTNONE_PASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+
+// Check alwaysinline attribute for exported function of library target compilation
+// CHECK_OPT: Function Attrs: alwaysinline
+// CHECK_OPT: define noundef i32 @_Z11RemoveDupesA100_jj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[RemOptAttr:\#[0-9]+]]
+
+// CHECK: ret i32
+
export
unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
BubbleSort(Buf, size);
@@ -63,19 +105,44 @@ unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
RWBuffer<unsigned> Indices;
-// The mangled version of main only remains without inlining
-// because it has internal linkage from the start
-// Note main functions get the norecurse attrib, which IntAttr reflects
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
-// NOINLINE: ret void
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// Internal function attributes are the same as those of source function's
+// CHECK_LIB_OPTNONE: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
+// CHECK_LIB_OPTNONE: ret void
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
+// Internal function attributes are different from those of source function's
+// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
+// CHECK_CS_OPTNONE_NOPASS: ret void
+
+// Check internal function @_Z4mainj is not generated when LLVM passes enabled
+// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z4mainj
+
+// Check internal function @_Z4mainj is not generated as it should be inlined
+// for opt builds
+// CHECK_OPT-NOT: define internal void @_Z4mainj
// The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: {{.*}}noinline
-// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure function calls are inlined when AlwaysInline is run
-// This only leaves calls to llvm. intrinsics
-// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_LIB_OPTNONE: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_LIB_OPTNONE: call void @_Z4mainj
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}optnone
+// CHECK_CS_OPTNONE_NOPASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_CS_OPTNONE_NOPASS: call void @_Z4mainj
+
+// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE_PASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is inlined when LLVM passes are enabled
+// CHECK_CS_OPTNONE_PASS: _Z4mainj.exit:
+
+// CHECK_OPT: Function Attrs: {{.*}}noinline
+// CHECK_OPT: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure internal function is inlined as optimization is enabled
+// CHECK_OPT: _Z4mainj.exit:
+
// CHECK: ret void
[numthreads(1,1,1)]
@@ -90,19 +157,41 @@ void main(unsigned int GI : SV_GroupIndex) {
tmpIndices[i] = Indices[i];
}
-// The mangled version of main only remains without inlining
-// because it has internal linkage from the start
-// Note main functions get the norecurse attrib, which IntAttr reflects
-// NOINLINE: Function Attrs: alwaysinline
-// NOINLINE: define internal void @_Z6main10v() [[IntAttr]]
-// NOINLINE: ret void
+// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
+// CHECK_LIB_OPTNONE: define internal void @_Z6main10v() [[ExtAttr]]
+// CHECK_LIB_OPTNONE: ret void
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z6main10v() [[ExtAttr]]
+// CHECK_CS_OPTNONE_NOPASS: ret void
+
+// Check internal function @_Z6main10v is not generated when LLVM passes are enabled
+// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z6main10v
+
+// Check internal function @_Z6main10v is not generated as it should be inlined
+// CHECK_OPT-NOT: define internal void @_Z6main10v
// The unmangled version is not inlined, EntryAttr reflects that
-// CHECK: Function Attrs: {{.*}}noinline
-// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure function calls are inlined when AlwaysInline is run
-// This only leaves calls to llvm. intrinsics
-// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
+// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
+// CHECK_LIB_OPTNONE: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_LIB_OPTNONE: call void @_Z6main10v
+
+// CHECK_CS_OPTNONE_NOPASS: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE_NOPASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is not inlined when optimization is disabled
+// CHECK_CS_OPTNONE_NOPASS: call void @_Z6main10v
+
+// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
+// CHECK_CS_OPTNONE_PASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Check internal function is inlined as optimization is enabled when LLVM passes
+// are enabled
+// CHECK_CS_OPTNONE_PASS: _Z6main10v.exit:
+
+// CHECK_OPT: Function Attrs: {{.*}}noinline
+// CHECK_OPT: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure internal function is inlined as optimization is enabled
+// CHECK_OPT: _Z6main10v.exit:
// CHECK: ret void
[numthreads(1,1,1)]
@@ -111,6 +200,16 @@ void main10() {
main(10);
}
-// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
-// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
-// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
+// CHECK_LIB_OPTNONE: attributes [[ExtAttr]] = {{.*}} optnone
+// CHECK_LIB_OPTNONE: attributes [[ExportAttr]] = {{.*}} optnone
+
+// CHECK_CS_OPTNONE_NOPASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
+// CHECK_CS_OPTNONE_NOPASS: attributes [[EntryAttr]] = {{.*}} noinline
+
+// CHECK_CS_OPTNONE_PASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
+// CHECK_CS_OPTNONE_PASS: attributes [[EntryAttr]] = {{.*}} noinline
+
+// CHECK_OPT: attributes [[SwapOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[BubOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[RemOptAttr]] ={{.*}} alwaysinline
+// CHECK_OPT: attributes [[EntryAttr]] ={{.*}} noinline
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 5a3bdc3d4d38e..5c0e2a71f94b8 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -25,7 +25,7 @@ void main() {
}
// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK: define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
+// CHECK: define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr:\#[0-9]+]] align 2 {
// CHECK-NEXT:entry:
// CHECK-NEXT:%this.addr = alloca ptr, align 4
// CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -42,7 +42,7 @@ void main() {
// CHECK-NEXT:%0 = load i32, ptr %First2, align 4
// CHECK-NEXT:ret i32 %0
-// CHECK: define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
+// CHECK: define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr]] align 2 {
// CHECK-NEXT:entry:
// CHECK-NEXT:%this.addr = alloca ptr, align 4
// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4
@@ -53,3 +53,5 @@ void main() {
// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
// CHECK-NEXT:%0 = load i32, ptr %Second, align 4
// CHECK-NEXT:ret i32 %0
+
+// CHECK: attributes [[Attr]] = {{.*}}alwaysinline
>From 00897b8e279c2baffaa2c6db2524ec1a61de46e0 Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Fri, 7 Feb 2025 18:03:19 -0500
Subject: [PATCH 2/3] [HLSL] Delete the change that sets non-entry functions of
a library shader as optnone if optimization is disabled.
Update corresponding test changes
---
clang/lib/CodeGen/CGHLSLRuntime.cpp | 7 -
.../CodeGenHLSL/GlobalConstructorLib.hlsl | 8 +-
clang/test/CodeGenHLSL/GlobalDestructors.hlsl | 4 +-
.../test/CodeGenHLSL/inline-constructors.hlsl | 16 +-
clang/test/CodeGenHLSL/inline-functions.hlsl | 191 +++++-------------
.../CodeGenHLSL/this-assignment-overload.hlsl | 6 +-
6 files changed, 61 insertions(+), 171 deletions(-)
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index 24acc9a559be2..f83916746bfaf 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -449,13 +449,6 @@ void CGHLSLRuntime::setHLSLFunctionAttributes(const FunctionDecl *FD,
const StringRef ExportAttrKindStr = "hlsl.export";
Fn->addFnAttr(ExportAttrKindStr);
}
- llvm::Triple T(Fn->getParent()->getTargetTriple());
- if (T.getEnvironment() == llvm::Triple::EnvironmentType::Library) {
- if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
- Fn->addFnAttr(llvm::Attribute::OptimizeNone);
- Fn->addFnAttr(llvm::Attribute::NoInline);
- }
- }
}
static void gatherFunctions(SmallVectorImpl<Function *> &Fns, llvm::Module &M,
diff --git a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
index 39d7c73e832a1..09c44f6242c53 100644
--- a/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalConstructorLib.hlsl
@@ -1,5 +1,5 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=CHECK,INLINE
// Make sure global variable for ctors exist for lib profile.
// CHECK:@llvm.global_ctors
@@ -31,12 +31,12 @@ void SecondEntry() {}
// CHECK: ret void
-// Verify the constructor is optnone
-// NOINLINE: ; Function Attrs: {{.*}} optnone
+// Verify the constructor is alwaysinline
+// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
// NOINLINE-NEXT: define linkonce_odr void @_ZN4hlsl8RWBufferIfEC2Ev({{.*}} [[CtorAttr:\#[0-9]+]]
// NOINLINE: ; Function Attrs: {{.*}}alwaysinline
// NOINLINE-NEXT: define internal void @_GLOBAL__sub_I_GlobalConstructorLib.hlsl() [[InitAttr:\#[0-9]+]]
// NOINLINE-DAG: attributes [[InitAttr]] = {{.*}} alwaysinline
-// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} optnone
+// NOINLINE-DAG: attributes [[CtorAttr]] = {{.*}} alwaysinline
diff --git a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
index 8961e1a7e59cd..f98318601134b 100644
--- a/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
+++ b/clang/test/CodeGenHLSL/GlobalDestructors.hlsl
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=CS,NOINLINE,CHECK
// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -disable-llvm-passes %s -o - | FileCheck %s --check-prefixes=LIB,NOINLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O1 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -emit-llvm -O0 %s -o - | FileCheck %s --check-prefixes=INLINE,CHECK
// Tests that constructors and destructors are appropriately generated for globals
// and that their calls are inlined when AlwaysInline is run
diff --git a/clang/test/CodeGenHLSL/inline-constructors.hlsl b/clang/test/CodeGenHLSL/inline-constructors.hlsl
index 298d7d4272678..b0d5a783fb372 100644
--- a/clang/test/CodeGenHLSL/inline-constructors.hlsl
+++ b/clang/test/CodeGenHLSL/inline-constructors.hlsl
@@ -1,9 +1,9 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,NOINLINE_LIB
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -Wno-hlsl-extensions -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -disable-llvm-passes %s | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O0 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library -std=hlsl202x -emit-llvm -o - -O1 %s | FileCheck %s --check-prefixes=CHECK,INLINE
// Tests that implicit constructor calls for user classes will always be inlined.
@@ -50,10 +50,6 @@ void NionsDay(int hours) {
// NOINLINE-NEXT: call void @_GLOBAL__sub_I_inline_constructors.hlsl()
// NOINLINE-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
// NOINLINE-NEXT: call void @_Z4mainj(i32 %0)
-// NOINLINE_LIB: call void @_ZN4WeedC1Ev
-// NOINLINE_LIB-NEXT: call void @_ZN5KittyC1Ev
-// NOINLINE_LIB-NEXT: %0 = call i32 @llvm.dx.flattened.thread.id.in.group()
-// NOINLINE_LIB-NEXT: call void @_Z4mainj(i32 %0)
// Verify inlining leaves only calls to "llvm." intrinsics
// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
diff --git a/clang/test/CodeGenHLSL/inline-functions.hlsl b/clang/test/CodeGenHLSL/inline-functions.hlsl
index 4a7217e3a89e3..4748eeee7475f 100644
--- a/clang/test/CodeGenHLSL/inline-functions.hlsl
+++ b/clang/test/CodeGenHLSL/inline-functions.hlsl
@@ -1,35 +1,22 @@
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_LIB_OPTNONE
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_NOPASS
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_CS_OPTNONE_PASS
-// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,CHECK_OPT
-
-// Tests inlining of user functions based on specified optimization options.
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE,OPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.3-library %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s --check-prefixes=CHECK,NOINLINE
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O0 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,OPT_ATTR
+// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-compute %s -emit-llvm -O1 -o - | FileCheck %s --check-prefixes=CHECK,INLINE,NOOPT_ATTR
+
+// Tests that user functions will always be inlined.
// This includes exported functions and mangled entry point implementation functions.
+// The unmangled entry functions must not be alwaysinlined.
#define MAX 100
float nums[MAX];
-// Check optnone attribute for library target compilation
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_PASS: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[ExtAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for opt compilation to library target
-// CHECK_OPT: Function Attrs: alwaysinline
-// CHECK_OPT: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %ix1, i32 noundef %ix2) {{[a-z_ ]*}} [[SwapOptAttr:\#[0-9]+]]
-
-// CHECK: ret void
-
+// Verify that all functions have the alwaysinline attribute
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @_Z4swapA100_jjj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %ix1, i32 noundef %ix2) [[IntAttr:\#[0-9]+]]
+// NOINLINE: ret void
// Swap the values of Buf at indices ix1 and ix2
void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
float tmp = Buf[ix1];
@@ -37,24 +24,9 @@ void swap(unsigned Buf[MAX], unsigned ix1, unsigned ix2) {
Buf[ix2] = tmp;
}
-// Check optnone attribute for library target compilation
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
-
-// Check alwaysinline attribute for non-entry functions of compute target compilation
-// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_PASS: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[ExtAttr]]
-
-// Check alwaysinline attribute for opt compilation to library target
-// CHECK_OPT: Function Attrs: alwaysinline
-// CHECK_OPT: define void @_Z10BubbleSortA100_jj(ptr noundef readonly byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[BubOptAttr:\#[0-9]+]]
-
-// CHECK: ret void
-
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define void @_Z10BubbleSortA100_jj(ptr noundef byval([100 x i32]) align 4 %Buf, i32 noundef %size) [[IntAttr]]
+// NOINLINE: ret void
// Inefficiently sort Buf in place
void BubbleSort(unsigned Buf[MAX], unsigned size) {
bool swapped = true;
@@ -69,26 +41,12 @@ void BubbleSort(unsigned Buf[MAX], unsigned size) {
}
}
-// Check optnone attribute for library target compilation of exported function
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
+// Note ExtAttr is the inlined export set of attribs
+// CHECK: Function Attrs: alwaysinline
+// CHECK: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) {{[a-z_ ]*}}[[ExtAttr:\#[0-9]+]]
+// CHECK: ret i32
// Sort Buf and remove any duplicate values
// returns the number of values left
-
-// Check alwaysinline attribute for exported function of compute target compilation
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for exported function of compute target compilation
-// CHECK_CS_OPTNONE_PASS: Function Attrs: alwaysinline
-// CHECK_CS_OPTNONE_PASS: define noundef i32 @_Z11RemoveDupesA100_jj(ptr {{[a-z_ ]*}}noundef byval([100 x i32]) align 4 {{.*}}%Buf, i32 noundef %size) [[ExportAttr:\#[0-9]+]]
-
-// Check alwaysinline attribute for exported function of library target compilation
-// CHECK_OPT: Function Attrs: alwaysinline
-// CHECK_OPT: define noundef i32 @_Z11RemoveDupesA100_jj(ptr noundef byval([100 x i32]) align 4 captures(none) %Buf, i32 noundef %size) {{[a-z_ ]*}} [[RemOptAttr:\#[0-9]+]]
-
-// CHECK: ret i32
-
export
unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
BubbleSort(Buf, size);
@@ -105,44 +63,20 @@ unsigned RemoveDupes(unsigned Buf[MAX], unsigned size) {
RWBuffer<unsigned> Indices;
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// Internal function attributes are the same as those of source function's
-// CHECK_LIB_OPTNONE: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
-// CHECK_LIB_OPTNONE: ret void
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: alwaysinline
-// Internal function attributes are different from those of source function's
-// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z4mainj(i32 noundef %GI) [[ExtAttr]]
-// CHECK_CS_OPTNONE_NOPASS: ret void
-
-// Check internal function @_Z4mainj is not generated when LLVM passes enabled
-// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z4mainj
-
-// Check internal function @_Z4mainj is not generated as it should be inlined
-// for opt builds
-// CHECK_OPT-NOT: define internal void @_Z4mainj
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @_Z4mainj(i32 noundef %GI) [[IntAttr]]
+// NOINLINE: ret void
// The unmangled version is not inlined, EntryAttr reflects that
-// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
-// CHECK_LIB_OPTNONE: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_LIB_OPTNONE: call void @_Z4mainj
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}optnone
-// CHECK_CS_OPTNONE_NOPASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_CS_OPTNONE_NOPASS: call void @_Z4mainj
-
-// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
-// CHECK_CS_OPTNONE_PASS: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is inlined when LLVM passes are enabled
-// CHECK_CS_OPTNONE_PASS: _Z4mainj.exit:
-
-// CHECK_OPT: Function Attrs: {{.*}}noinline
-// CHECK_OPT: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
-// Make sure internal function is inlined as optimization is enabled
-// CHECK_OPT: _Z4mainj.exit:
-
+// OPT_ATTR: Function Attrs: {{.*}}optnone
+// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone
+// CHECK: define void @main() {{[a-z_ ]*}}[[EntryAttr:\#[0-9]+]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
[numthreads(1,1,1)]
@@ -157,41 +91,20 @@ void main(unsigned int GI : SV_GroupIndex) {
tmpIndices[i] = Indices[i];
}
-// CHECK_LIB_OPTNONE: Function Attrs:{{.*}}optnone
-// CHECK_LIB_OPTNONE: define internal void @_Z6main10v() [[ExtAttr]]
-// CHECK_LIB_OPTNONE: ret void
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs:{{.*}}alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: define internal void @_Z6main10v() [[ExtAttr]]
-// CHECK_CS_OPTNONE_NOPASS: ret void
-
-// Check internal function @_Z6main10v is not generated when LLVM passes are enabled
-// CHECK_CS_OPTNONE_PASS-NOT: define internal void @_Z6main10v
-
-// Check internal function @_Z6main10v is not generated as it should be inlined
-// CHECK_OPT-NOT: define internal void @_Z6main10v
+// The mangled version of main only remains without inlining
+// because it has internal linkage from the start
+// Note main functions get the norecurse attrib, which IntAttr reflects
+// NOINLINE: Function Attrs: alwaysinline
+// NOINLINE: define internal void @_Z6main10v() [[IntAttr]]
+// NOINLINE: ret void
// The unmangled version is not inlined, EntryAttr reflects that
-// CHECK_LIB_OPTNONE: Function Attrs: {{.*}}noinline
-// CHECK_LIB_OPTNONE: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_LIB_OPTNONE: call void @_Z6main10v
-
-// CHECK_CS_OPTNONE_NOPASS: Function Attrs: {{.*}}noinline
-// CHECK_CS_OPTNONE_NOPASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure internal function is not inlined when optimization is disabled
-// CHECK_CS_OPTNONE_NOPASS: call void @_Z6main10v
-
-// CHECK_CS_OPTNONE_PASS: Function Attrs: {{.*}}noinline
-// CHECK_CS_OPTNONE_PASS: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Check internal function is inlined as optimization is enabled when LLVM passes
-// are enabled
-// CHECK_CS_OPTNONE_PASS: _Z6main10v.exit:
-
-// CHECK_OPT: Function Attrs: {{.*}}noinline
-// CHECK_OPT: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
-// Make sure internal function is inlined as optimization is enabled
-// CHECK_OPT: _Z6main10v.exit:
+// OPT_ATTR: Function Attrs: {{.*}}optnone
+// NOOPT_ATTR-NOT: Function Attrs: {{.*}}optnone
+// CHECK: define void @main10() {{[a-z_ ]*}}[[EntryAttr]]
+// Make sure function calls are inlined when AlwaysInline is run
+// This only leaves calls to llvm. intrinsics
+// INLINE-NOT: call {{[^@]*}} @{{[^l][^l][^v][^m][^\.]}}
// CHECK: ret void
[numthreads(1,1,1)]
@@ -200,16 +113,6 @@ void main10() {
main(10);
}
-// CHECK_LIB_OPTNONE: attributes [[ExtAttr]] = {{.*}} optnone
-// CHECK_LIB_OPTNONE: attributes [[ExportAttr]] = {{.*}} optnone
-
-// CHECK_CS_OPTNONE_NOPASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
-// CHECK_CS_OPTNONE_NOPASS: attributes [[EntryAttr]] = {{.*}} noinline
-
-// CHECK_CS_OPTNONE_PASS: attributes [[ExtAttr]] ={{.*}} alwaysinline
-// CHECK_CS_OPTNONE_PASS: attributes [[EntryAttr]] = {{.*}} noinline
-
-// CHECK_OPT: attributes [[SwapOptAttr]] ={{.*}} alwaysinline
-// CHECK_OPT: attributes [[BubOptAttr]] ={{.*}} alwaysinline
-// CHECK_OPT: attributes [[RemOptAttr]] ={{.*}} alwaysinline
-// CHECK_OPT: attributes [[EntryAttr]] ={{.*}} noinline
+// NOINLINE: attributes [[IntAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[ExtAttr]] = {{.*}} alwaysinline
+// CHECK: attributes [[EntryAttr]] = {{.*}} noinline
diff --git a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
index 5c0e2a71f94b8..5a3bdc3d4d38e 100644
--- a/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
+++ b/clang/test/CodeGenHLSL/this-assignment-overload.hlsl
@@ -25,7 +25,7 @@ void main() {
}
// This test makes a probably safe assumption that HLSL 202x includes operator overloading for assignment operators.
-// CHECK: define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr:\#[0-9]+]] align 2 {
+// CHECK: define linkonce_odr noundef i32 @_ZN4Pair8getFirstEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
// CHECK-NEXT:entry:
// CHECK-NEXT:%this.addr = alloca ptr, align 4
// CHECK-NEXT:%Another = alloca %struct.Pair, align 4
@@ -42,7 +42,7 @@ void main() {
// CHECK-NEXT:%0 = load i32, ptr %First2, align 4
// CHECK-NEXT:ret i32 %0
-// CHECK: define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) [[Attr]] align 2 {
+// CHECK: define linkonce_odr noundef i32 @_ZN4Pair9getSecondEv(ptr noundef nonnull align 4 dereferenceable(8) %this) #0 align 2 {
// CHECK-NEXT:entry:
// CHECK-NEXT:%this.addr = alloca ptr, align 4
// CHECK-NEXT:%agg.tmp = alloca %struct.Pair, align 4
@@ -53,5 +53,3 @@ void main() {
// CHECK-NEXT:%Second = getelementptr inbounds nuw %struct.Pair, ptr %this1, i32 0, i32 1
// CHECK-NEXT:%0 = load i32, ptr %Second, align 4
// CHECK-NEXT:ret i32 %0
-
-// CHECK: attributes [[Attr]] = {{.*}}alwaysinline
>From 54b1e873d18a3efa6970b878ff3d1885f7b8929d Mon Sep 17 00:00:00 2001
From: "S. Bharadwaj Yadavalli" <Bharadwaj.Yadavalli at microsoft.com>
Date: Tue, 11 Feb 2025 10:26:36 -0500
Subject: [PATCH 3/3] Add comment
---
clang/lib/CodeGen/CGHLSLRuntime.cpp | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/clang/lib/CodeGen/CGHLSLRuntime.cpp b/clang/lib/CodeGen/CGHLSLRuntime.cpp
index f83916746bfaf..193a8d038a8ee 100644
--- a/clang/lib/CodeGen/CGHLSLRuntime.cpp
+++ b/clang/lib/CodeGen/CGHLSLRuntime.cpp
@@ -345,6 +345,11 @@ void clang::CodeGen::CGHLSLRuntime::setHLSLEntryAttributes(
WaveSizeAttr->getPreferred());
Fn->addFnAttr(WaveSizeKindStr, WaveSizeStr);
}
+ // HLSL entry functions are materialized for module functions with
+ // HLSLShaderAttr attribute. SetLLVMFunctionAttributesForDefinition called
+ // later in the compiler-flow for such module functions is not aware of and
+ // hence not able to set attributes of the newly materialized entry functions.
+ // So, set attributes of entry function here, as appropriate.
if (CGM.getCodeGenOpts().OptimizationLevel == 0) {
Fn->addFnAttr(llvm::Attribute::OptimizeNone);
}
More information about the cfe-commits
mailing list